분석 기획
주제 : SQL을 활용하여 화장품 이커머스 로그데이터 분석 및 전략 도출
풀고자 하는 문제 : cart 상태에서 remove_from_cart로 제품을 삭제하는 고객이 있다. 이들을 제품 구매로 연결되어 매출을 높히고자 한다.
분석 방향 : 화장품 이커머스 몰에서 발생하는 로그데이터으로 고객의 행동을 파악한다.
목표
-리텐션과 전환율을 파악하고, 구매율을 높일 수 있는 방안을 제시한다.
-카트 제거 현상의 원인을 규명하고 적절한 해결방안을 모색한다.
EDA
해당 데이터는 2019년 12월 한 달 데이터를 먼저 가공하여 패턴을 분석하였다.
아래 from 절 from 2019dec
로 동일하여 코드에 표시하지 않았다. (실제 코드에서는 삽입 필수)
특이사항 : category_id는 값이 하나 밖에 없어 분석에서 제외한다.
SELECT COUNT(user_id)
#추출 값 : 3,533,286
SELECT COUNT(DISTINCT user_id, user_session)
#추출 값 : 840,161
SELECT COUNT(DISTINCT user_id, user_session) / COUNT(user_id) AS ratio
#추출 값 : 0.2378
SELECT COUNT(DISTINCT user_id)
#추출 값 : 370,154
SELECT COUNT(DISTINCT user_session)
#추출 값 : 839,813
SELECT COUNT(DISTINCT user_id, user_session)
FROM 2019dec
WHERE event_type = 'view'
#추출 값 : : 794,471
SELECT COUNT(DISTINCT user_id, user_session)
FROM 2019dec
WHERE event_type = 'cart'
#추출 값 : 165,571
SELECT COUNT(DISTINCT user_id, user_session)
FROM 2019dec
WHERE event_type = 'remove_from_cart'
#추출 값 : 84,668
SELECT COUNT(DISTINCT user_id, user_session)
FROM 2019dec
WHERE event_type = 'purchase'
#추출 값 : 28,894
1차로 확인하고 싶은 것
funnel 분석 - 전환율 확인
(한 달 전체 view 중 cart 비율, cart 중 purchase 비율, cart 중 remove_from_cart 비율, view 중 purchase 비율)
view에서 cart 가는데 걸리는 시간 평균
cart에서 remove_from_cart 가는 데이터 패턴 확인
WITH vt AS (
SELECT user_id, user_session, event_time
FROM 2019dec
WHERE event_type = 'view'
), ct AS (
SELECT user_id, user_session, event_time
FROM 2019dec
WHERE event_type = 'cart'
), pt AS (
SELECT user_id, user_session, event_time
FROM 2019dec
WHERE event_type = 'purchase'
), rt AS (
SELECT user_id, user_session, event_time
FROM 2019dec
WHERE event_type = 'remove_from_cart'
)
🔍 view > cart > purchase 비율 확인
SELECT COUNT(DISTINCT vt.user_id, vt.user_session) AS view_count
, COUNT(DISTINCT ct.user_id, ct.user_session) AS cart_count
, COUNT(DISTINCT pt.user_id, pt.user_session) AS phrchase_count
, COUNT(DISTINCT ct.user_id, ct.user_session) / COUNT(DISTINCT vt.user_id, vt.user_session) AS view_cart_ratio
, COUNT(DISTINCT pt.user_id, pt.user_session) / COUNT(DISTINCT ct.user_id, ct.user_session) AS cart_phrchase_ratio
, COUNT(DISTINCT pt.user_id, pt.user_session) / COUNT(DISTINCT vt.user_id, vt.user_session) AS view_phurchase_ratio
FROM vt
LEFT JOIN ct ON vt.user_id = ct.user_id
AND vt.user_session = ct.user_session
AND vt.event_time <= ct.event_time
LEFT JOIN pt ON ct.user_id = pt.user_id
AND ct.user_session = pt.user_session
AND ct.event_time <= pt.event_time
🔍 remove_from_cart로 빠지는 비율도 확인
SELECT COUNT(DISTINCT vt.user_id, vt.user_session) AS view_count
, COUNT(DISTINCT ct.user_id, ct.user_session) AS cart_count
, COUNT(DISTINCT rt.user_id, rt.user_session) AS remove_count
, COUNT(DISTINCT ct.user_id, ct.user_session) / COUNT(DISTINCT vt.user_id, vt.user_session) AS view_cart_ratio
, COUNT(DISTINCT rt.user_id, rt.user_session) / COUNT(DISTINCT ct.user_id, ct.user_session) AS cart_remove_ratio
, COUNT(DISTINCT rt.user_id, rt.user_session) / COUNT(DISTINCT vt.user_id, vt.user_session) AS view_remove_ratio
FROM vt
LEFT JOIN ct ON vt.user_id = ct.user_id
AND vt.user_session = ct.user_session
AND vt.event_time <= ct.event_time
LEFT JOIN rt ON ct.user_id = rt.user_id
AND ct.user_session = rt.user_session
AND ct.event_time <= rt.event_time
시간이 엄청 오래 걸린다..
🔍 view > cart로 넘어가는 시간 파악
SELECT ct.*
, ct.event_time AS cart_event_time
, TIMESTAMPDIFF(SECOND, vt.event_time, ct.event_time) AS time_gap
FROM vt
LEFT JOIN ct ON vt.user_id = ct.user_id
AND vt.user_session = ct.user_session
AND vt.product_id = ct.product_id
AND vt.event_time <= ct.event_time
➡️ 데이터가 1779130에 달해서 product_id별 time_gap을 파악
SELECT product_id, AVG(time_gap) AS gap
FROM (
SELECT ct.*
, ct.event_time AS cart_event_time
, TIMESTAMPDIFF(SECOND, vt.event_time, ct.event_time) AS time_gap
FROM vt
LEFT JOIN ct ON vt.user_id = ct.user_id
AND vt.user_session = ct.user_session
AND vt.product_id = ct.product_id
AND vt.event_time <= ct.event_time) t1
GROUP BY product_id
ORDER BY gap DESC;
값이 너무 많음..
최저는 0이 우르르 & NULL 값도 존재
➡️ 데이터가 1779130에 달해서 user_id별 time_gap을 파악
SELECT user_id, AVG(time_gap) AS gap
FROM (
SELECT ct.*
, ct.event_time AS purchase_event_time
, TIMESTAMPDIFF(SECOND, vt.event_time, ct.event_time) AS time_gap
FROM vt
LEFT JOIN ct ON vt.user_id = ct.user_id
AND vt.user_session = ct.user_session
AND vt.product_id = ct.product_id
AND vt.event_time <= ct.event_time) t2
GROUP BY user_id
값이 너무 다양해고 기준이 없으므로 cart에서 purchase로 넘어가는 고객의 데이터와 비교해보는 것도 하나의 방법
🔍 cart > purchase 넘어가는 시간 파악
SELECT user_id, AVG(time_gap) AS gap
FROM (
SELECT ct.*
, pt.event_time AS remove_event_time
, TIMESTAMPDIFF(SECOND, ct.event_time, pt.event_time) AS time_gap
FROM ct
LEFT JOIN pt ON ct.user_id = pt.user_id
AND ct.user_session = pt.user_session
AND ct.product_id = pt.product_id
AND ct.event_time <= pt.event_time) t3
GROUP BY user_id
ORDER BY gap DESC;
🔍 cart > remove_from_cart 넘어가는 시간 파악
SELECT user_id, AVG(time_gap) AS gap
FROM (
SELECT ct.*
, rt.event_time AS a_event_time
, TIMESTAMPDIFF(SECOND, ct.event_time, rt.event_time) AS time_gap
FROM ct
LEFT JOIN rt ON ct.user_id = rt.user_id
AND ct.user_session = rt.user_session
AND ct.product_id = rt.product_id
AND ct.event_time <= rt.event_time) t4
GROUP BY user_id
ORDER BY gap DESC;
SELECT product_id, AVG(time_gap) AS gap
FROM (
SELECT ct.*
, rt.event_time AS a_event_time
, TIMESTAMPDIFF(SECOND, ct.event_time, rt.event_time) AS time_gap
FROM ct
LEFT JOIN rt ON ct.user_id = rt.user_id
AND ct.user_session = rt.user_session
AND ct.product_id = rt.product_id
AND ct.event_time <= rt.event_time) t4
GROUP BY product_id
ORDER BY gap DESC;
🔍 cart에서 remove_from_cart 가는 데이터 패턴 확인
SELECT product_id, AVG(time_gap) AS gap
FROM (
SELECT ct.*
, rt.event_time AS a_event_time
, TIMESTAMPDIFF(SECOND, ct.event_time, rt.event_time) AS time_gap
FROM ct
LEFT JOIN rt ON ct.user_id = rt.user_id
AND ct.user_session = rt.user_session
AND ct.product_id = rt.product_id
AND ct.event_time <= rt.event_time) t4
GROUP BY product_id
ORDER BY gap DESC;