SQL 기능을 다양하게 활용하여 EDA 해보기
데이터 추출 및 탐색
데이터 필터링 및 조건부 검색
데이터 집계 및 그룹화
통계량 계산
데이터 변환
데이터 정렬 및 분류
데이터 변형 및 계산
Property | - | 설명 |
---|---|---|
event_time | 이벤트 시간 | 이벤트가 발생한 시간(UTC) |
event_type | 이벤트 유형 | 이벤트의 유형은 총 4가지 |
product_id | 제품_아이디 | 제품의 ID |
category_id | 카테고리_아이디 | 제품 카테고리 ID |
category_code | 카테고리_코드 | 제품의 카테고리 택소노미(코드명)는 가능하다면 만들 수 있습니다. |
일반적으로 의미 있는 카테고리에 존재하고 다양한 종류의 액세서리에는 건너뜁니다. | ||
brand | 상표 | 브랜드 이름의 소문자 문자열. 놓칠 수 있음. |
price | 가격 | 제품의 부동 가격. 현재. |
user_id | 사용자_아이디 | 영구 사용자 ID. |
user_session | 사용자 세션 | 임시 사용자의 세션 ID. 각 사용자 세션에 대해 동일합니다. |
사용자가 긴 일시 정지에서 온라인 스토어로 돌아올 때마다 변경됩니다. |
SELECT
evt.days
, ROUND(SUM(price), 0) AS sales
FROM (
SELECT
SUBSTR(event_time, 9, 2) AS days
, event_type
, user_id
, price
FROM
ecommerce.feb
WHERE
event_type = 'purchase'
AND price > 0
) evt
GROUP BY
evt.days
;
SELECT
evt.days
, count(*) sales
FROM (
SELECT
SUBSTR(event_time, 9, 2) AS days
FROM
ecommerce.feb
WHERE
event_type = 'purchase'
AND price > 0
) evt
GROUP BY
evt.days
;
SELECT
CASE
WHEN evt.days < 3 THEN 'etc.'
WHEN evt.days < 10 THEN '1st'
WHEN evt.days < 17 THEN '2nd'
WHEN evt.days < 24 THEN '3rd'
ELSE '4th'
END AS weeknum
, ROUND(SUM(price), 0) sales
FROM (
SELECT
SUBSTR(event_time, 9, 2) AS days
, event_type
, user_id
, price
FROM
ecommerce.feb
WHERE
price > 0
) evt
WHERE
evt.event_type = 'purchase'
GROUP BY
`weeknum`
;
SELECT
CASE
WHEN evt.days < 3 THEN 'etc.'
WHEN evt.days < 10 THEN '1st'
WHEN evt.days < 17 THEN '2nd'
WHEN evt.days < 24 THEN '3rd'
ELSE '4th'
END AS week_num
, COUNT(*) sales_num
FROM (
SELECT
SUBSTR(event_time, 9, 2) AS days
, event_type
, user_id
, price
FROM
ecommerce.feb
WHERE
price > 0
) evt
WHERE
evt.event_type = 'purchase'
GROUP BY
`week_num`
;
SELECT
CASE
WHEN evt.days < 3 THEN 'etc.'
WHEN evt.days < 10 THEN '1st'
WHEN evt.days < 17 THEN '2nd'
WHEN evt.days < 24 THEN '3rd'
ELSE '4th'
END AS week_num
, COUNT(*) view_num
FROM (
SELECT
SUBSTR(event_time, 9, 2) AS days
, event_type
, user_id
, price
FROM
ecommerce.feb
WHERE
price > 0
) evt
WHERE
evt.event_type = 'view'
GROUP BY
`week_num`
;
SELECT
CASE
WHEN evt.days < 3 THEN 'etc.'
WHEN evt.days < 10 THEN '1st'
WHEN evt.days < 17 THEN '2nd'
WHEN evt.days < 24 THEN '3rd'
ELSE '4th'
END AS week_num
, COUNT(*) cart_num
FROM (
SELECT
SUBSTR(event_time, 9, 2) AS days
, event_type
, user_id
, price
FROM
ecommerce.feb
WHERE
price > 0
) evt
WHERE
evt.event_type = 'cart'
GROUP BY
`week_num`
;
SELECT
CASE
WHEN evt.days < 3 THEN 'etc.'
WHEN evt.days < 10 THEN '1st'
WHEN evt.days < 17 THEN '2nd'
WHEN evt.days < 24 THEN '3rd'
ELSE '4th'
END AS week_num
, COUNT(*) remove_num
FROM (
SELECT
SUBSTR(event_time, 9, 2) AS days
, event_type
, user_id
, price
FROM
ecommerce.feb
WHERE
price > 0
) evt
WHERE
evt.event_type = 'remove_from_cart'
GROUP BY
`week_num`
;
SELECT
evt.week_num
, MAX(IF(event_type = 'view', cnt, 0)) AS 'view'
, MAX(IF(event_type = 'cart', cnt, 0)) AS 'cart'
, MAX(IF(event_type = 'remove_from_cart', cnt, 0)) AS 'remove_from_cart'
, MAX(IF(event_type = 'purchase', cnt, 0)) AS 'purchase'
FROM (
SELECT
CASE
WHEN SUBSTR(event_time, 9, 2) < 3 THEN 'etc.'
WHEN SUBSTR(event_time, 9, 2) < 10 THEN '1st'
WHEN SUBSTR(event_time, 9, 2) < 17 THEN '2nd'
WHEN SUBSTR(event_time, 9, 2) < 24 THEN '3rd'
ELSE '4th'
END AS week_num
, event_type
, COUNT(*) cnt
FROM
ecommerce.feb
WHERE
price > 0
GROUP BY
1
, 2
) evt
GROUP BY
1
;
WITH RECURSIVE temp AS (
(SELECT
0 AS hours
)
UNION
(SELECT
hours + 1
FROM
temp
WHERE
hours < 23
)
)
SELECT
pt.days
, MAX(IF(pt.hh = 0, cnt, 0)) AS AM00
, MAX(IF(pt.hh = 1, cnt, 0)) AS AM01
, MAX(IF(pt.hh = 2, cnt, 0)) AS AM02
, MAX(IF(pt.hh = 3, cnt, 0)) AS AM03
, MAX(IF(pt.hh = 4, cnt, 0)) AS AM04
, MAX(IF(pt.hh = 5, cnt, 0)) AS AM05
, MAX(IF(pt.hh = 6, cnt, 0)) AS AM06
, MAX(IF(pt.hh = 7, cnt, 0)) AS AM07
, MAX(IF(pt.hh = 8, cnt, 0)) AS AM08
, MAX(IF(pt.hh = 9, cnt, 0)) AS AM09
, MAX(IF(pt.hh = 10, cnt, 0)) AS AM10
, MAX(IF(pt.hh = 11, cnt, 0)) AS AM11
, MAX(IF(pt.hh = 12, cnt, 0)) AS PM12
, MAX(IF(pt.hh = 13, cnt, 0)) AS PM01
, MAX(IF(pt.hh = 14, cnt, 0)) AS PM02
, MAX(IF(pt.hh = 15, cnt, 0)) AS PM03
, MAX(IF(pt.hh = 16, cnt, 0)) AS PM04
, MAX(IF(pt.hh = 17, cnt, 0)) AS PM05
, MAX(IF(pt.hh = 18, cnt, 0)) AS PM06
, MAX(IF(pt.hh = 19, cnt, 0)) AS PM07
, MAX(IF(pt.hh = 20, cnt, 0)) AS PM08
, MAX(IF(pt.hh = 21, cnt, 0)) AS PM09
, MAX(IF(pt.hh = 22, cnt, 0)) AS PM10
, MAX(IF(pt.hh = 23, cnt, 0)) AS PM11
FROM (
SELECT
SUBSTR(event_time, 1, 10) AS days
, SUBSTR(event_time, 12, 2) AS hh
, COUNT(*) AS cnt
FROM
ecommerce.feb
WHERE
event_type = 'purchase'
AND price > 0
GROUP BY
1
, 2
) AS pt
JOIN temp ON pt.hh = temp.hours
GROUP BY
pt.days
;
SELECT
pt.days
, MAX(IF(event_type = 'view', cnt, 0)) AS view
, MAX(IF(event_type = 'cart', cnt, 0)) AS cart
, MAX(IF(event_type= 'remove_from_cart', cnt, 0)) AS remove_from_cart
, MAX(IF(event_type = 'purchase', cnt, 0)) AS purchase
FROM (
SELECT
SUBSTR(event_time, 1, 10) AS days
, event_type
, COUNT(*) AS cnt
FROM
ecommerce.feb
WHERE
price > 0
GROUP BY
1
, 2
) AS pt
GROUP BY
pt.days
;
SELECT
pt.hh
, MAX(IF(event_type = 'view', cnt, 0)) AS view
, MAX(IF(event_type = 'cart', cnt, 0)) AS cart
, MAX(IF(event_type= 'remove_from_cart', cnt, 0)) AS remove_from_cart
, MAX(IF(event_type = 'purchase', cnt, 0)) AS purchase
FROM (
SELECT
SUBSTR(event_time, 12, 2) AS hh
, event_type
, COUNT(*) AS cnt
FROM
ecommerce.feb
WHERE
price > 0
GROUP BY
1
, 2
) AS pt
GROUP BY
pt.hh
;
SELECT
evt.day
, MAX(IF(event_type = 'view', cnt, 0)) AS 'view'
, MAX(IF(event_type = 'cart', cnt, 0)) AS 'cart'
, MAX(IF(event_type = 'remove_from_cart', cnt, 0)) AS 'remove_from_cart'
, MAX(IF(event_type = 'purchase', cnt, 0)) AS 'purchase'
, MAX(IF(event_type = 'purchase', sales, 0)) AS 'sales'
FROM (
SELECT
CASE
WHEN MOD(SUBSTR(event_time, 9, 2), 7) = 3 THEN 2
WHEN MOD(SUBSTR(event_time, 9, 2), 7) = 4 THEN 3
WHEN MOD(SUBSTR(event_time, 9, 2), 7) = 5 THEN 4
WHEN MOD(SUBSTR(event_time, 9, 2), 7) = 6 THEN 5
WHEN MOD(SUBSTR(event_time, 9, 2), 7) = 0 THEN 6
WHEN MOD(SUBSTR(event_time, 9, 2), 7) = 1 THEN 7
WHEN MOD(SUBSTR(event_time, 9, 2), 7) = 2 THEN 1
END AS day
, event_type
, COUNT(*) cnt
, ROUND(SUM(price), 0) AS sales
FROM
ecommerce.feb
WHERE
price > 0
GROUP BY
1
, 2
) evt
GROUP BY
1
;
SELECT
COUNT(DISTINCT user_id) AS total_usercnt
FROM
ecommerce.feb
;
SELECT
COUNT(*) AS purchase_user
FROM (
SELECT
DISTINCT user_id
FROM
ecommerce.feb
WHERE
event_type = 'purchase'
) cnt
;
SELECT
user_id,
COUNT(*) AS in_out
FROM
ecommerce.feb
GROUP BY
user_id
ORDER BY
COUNT(*) DESC
;
SELECT
user_id,
COUNT(*) AS in_out
FROM
ecommerce.feb
WHERE
event_type = 'purchase'
GROUP BY
user_id
ORDER BY
COUNT(*) DESC
;
SELECT
*
FROM
ecommerce.feb
WHERE
user_id = 601469771
ORDER BY
event_time
;
SELECT
COUNT(DISTINCT product_id) AS product_cnt
FROM
ecommerce.feb
;
SELECT
COUNT(DISTINCT product_id) AS product_cnt
FROM
ecommerce.feb
WHERE
event_type = 'purchase'
;
SELECT
category_id
, COUNT(*) AS cnt
FROM
ecommerce.feb
WHERE
event_type = 'purchase'
GROUP BY
category_id
ORDER BY
category_id
;