코호트 분석 (고객 세분화 중 시간의 흐름에 따라 고객을 나누어보는 것)으로
리텐션 값(지속적으로 사용하는지, 핵심 가치를 꾸준히 경험하는지를 측정)을 얻는다.
진행 사항
CREATE TABLE purchase_t AS
SELECT
STR_TO_DATE(SUBSTRING(event_time, 1, 19), '%Y-%m-%d %H:%i:%s') AS event_time,
event_type,
product_id,
category_id,
category_code,
brand,
price,
user_id,
user_session
FROM (
SELECT * FROM oct19 WHERE event_type = 'purchase'
UNION ALL
SELECT * FROM nov19 WHERE event_type = 'purchase'
UNION ALL
SELECT * FROM dec19 WHERE event_type = 'purchase'
UNION ALL
SELECT * FROM jan20 WHERE event_type = 'purchase'
UNION ALL
SELECT * FROM feb20 WHERE event_type = 'purchase'
) AS combined_data;
SELECT COUNT(DISTINCT user_id)
FROM purchase_t;
CREATE TABLE first_month (
SELECT user_id
, MIN(event_time) AS first_event_time
, DATE_FORMAT(MIN(event_time), '%Y-%m-01') AS first_month
FROM purchase_t
GROUP BY user_id
ORDER BY first_event_time);
SELECT *
FROM first_month;
SELECT p.event_time AS order_date
, p.user_id
, f.first_month AS first_order_month
FROM purchase_t p
LEFT JOIN first_month f ON p.user_id = f.user_id;
SELECT first_month, COUNT(DISTINCT user_id) AS user_counts
FROM first_month
GROUP BY first_month;
WITH a AS (
SELECT p.user_id
, f.first_month AS first_order_month
, p.event_time AS order_date
FROM purchase_t p
INNER JOIN first_month f ON p.user_id = f.user_id
)
SELECT
first_order_month,
COUNT(DISTINCT user_id) AS month0,
COUNT(DISTINCT CASE WHEN DATE_FORMAT(DATE_ADD(first_order_month, INTERVAL 1 MONTH), '%Y-%m') = DATE_FORMAT(order_date, '%Y-%m') THEN user_id END) AS month1,
COUNT(DISTINCT CASE WHEN DATE_FORMAT(DATE_ADD(first_order_month, INTERVAL 2 MONTH), '%Y-%m') = DATE_FORMAT(order_date, '%Y-%m') THEN user_id END) AS month2,
COUNT(DISTINCT CASE WHEN DATE_FORMAT(DATE_ADD(first_order_month, INTERVAL 3 MONTH), '%Y-%m') = DATE_FORMAT(order_date, '%Y-%m') THEN user_id END) AS month3,
COUNT(DISTINCT CASE WHEN DATE_FORMAT(DATE_ADD(first_order_month, INTERVAL 4 MONTH), '%Y-%m') = DATE_FORMAT(order_date, '%Y-%m') THEN user_id END) AS month4
FROM a
GROUP BY first_order_month
ORDER BY first_order_month;
월별로 했을 경우 5개월 분량으로 자세히 확인하기 어려워 주차별로 수정
여기서 year을 제외하고 week만 했을 때 해가 달라지는 이슈 때문에 계산이 제대로 되지 않아 년도 + 주차
로 계산하였다.
-- id 별 첫 구매 주차 테이블 생성
CREATE TABLE first_week (
SELECT user_id
, MIN(event_time) AS first_event_time
, YEARWEEK(MIN(event_time), 1) AS first_order_week
FROM purchase_t
GROUP BY user_id
ORDER BY first_event_time);
-- 클래식 리텐션
WITH all_weeks AS (
SELECT
p.user_id,
p.event_time AS order_date,
YEARWEEK(p.event_time, 1) AS order_week,
f.first_order_week,
SUBSTRING(YEARWEEK(p.event_time, 1), 1, 4) AS order_year,
SUBSTRING(YEARWEEK(p.event_time, 1), 5, 2) AS order_week_num,
SUBSTRING(f.first_order_week, 1, 4) AS first_order_year,
SUBSTRING(f.first_order_week, 5, 2) AS first_order_week_num
FROM
purchase_t p
INNER JOIN
first_week f ON p.user_id = f.user_id
)
SELECT
first_order_week,
COUNT(DISTINCT CASE WHEN order_week = first_order_week THEN user_id END) AS week0,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 1 THEN user_id END) AS week1,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 2 THEN user_id END) AS week2,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 3 THEN user_id END) AS week3,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 4 THEN user_id END) AS week4,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 5 THEN user_id END) AS week5,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 6 THEN user_id END) AS week6,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 7 THEN user_id END) AS week7,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 8 THEN user_id END) AS week8,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 9 THEN user_id END) AS week9,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 10 THEN user_id END) AS week10,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 11 THEN user_id END) AS week11,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 12 THEN user_id END) AS week12,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 13 THEN user_id END) AS week13,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 14 THEN user_id END) AS week14,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 15 THEN user_id END) AS week15,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 16 THEN user_id END) AS week16,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 17 THEN user_id END) AS week17,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 18 THEN user_id END) AS week18,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 19 THEN user_id END) AS week19,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 20 THEN user_id END) AS week20,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 21 THEN user_id END) AS week21,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num = first_order_year * 52 + first_order_week_num + 22 THEN user_id END) AS week22
FROM
all_weeks
GROUP BY
first_order_week
ORDER BY
first_order_week;
10월부터 2월까지 총 22주가 존재한다.
<그래프>
-- last_week 값이 필요한 관계로 추가 테이블 생성
CREATE TABLE first_and_last_week (
SELECT user_id
, MIN(event_time) AS first_event_time
, YEARWEEK(MIN(event_time), 1) AS first_order_week
, MAX(event_time) AS last_event_time
, YEARWEEK(MAX(event_time), 1) AS last_order_week
FROM purchase_t
GROUP BY user_id);
WITH all_weeks AS (
SELECT
p.user_id,
p.event_time AS order_date,
YEARWEEK(p.event_time, 1) AS order_week,
f.first_order_week,
SUBSTRING(YEARWEEK(p.event_time, 1), 1, 4) AS order_year,
SUBSTRING(YEARWEEK(p.event_time, 1), 5, 2) AS order_week_num,
SUBSTRING(f.first_order_week, 1, 4) AS first_order_year,
SUBSTRING(f.first_order_week, 5, 2) AS first_order_week_num,
f.last_order_week,
SUBSTRING(f.last_order_week, 1, 4) AS last_order_year,
SUBSTRING(f.last_order_week, 5, 2) AS last_order_week_num
FROM
purchase_t p
INNER JOIN
first_and_last_week f ON p.user_id = f.user_id
)
SELECT first_order_week,
COUNT(DISTINCT CASE WHEN order_week = last_order_week THEN user_id END) AS week0,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 1 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week1,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 2 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week2,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 3 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week3,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 4 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week4,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 5 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week5,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 6 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week6,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 7 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week7,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 8 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week8,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 9 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week9,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 10 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week10,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 11 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week11,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 12 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week12,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 13 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week13,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 14 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week14,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 15 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week15,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 16 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week16,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 17 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week17,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 18 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week18,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 19 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week19,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 20 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week20,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 21 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week21,
COUNT(DISTINCT CASE WHEN order_year * 52 + order_week_num + 22 <= last_order_year * 52 + last_order_week_num THEN user_id END) AS week22
FROM
all_weeks
GROUP BY
first_order_week
ORDER BY
first_order_week;
<그래프>
구간은 겹치지 않고 연달아 설정한다. Day 1-3 다음에 Day4-6 이 나오게, 기간을 묶어서 진행하기 때문에 노이즈에 강하다. 하루 접속하지 않아도 리텐션에 영향을 주지 않는다.
Range 리텐션 = Next Period에 방문한 사람 수 / 최초 Period에 방문한 사람 수
클래식 리텐션: 특정 기준일 이후 특정 시점에 얼마나 많은 사용자가 다시 활동했는지 측정.
범위 리텐션: 특정 주차에 시작한 사용자가 해당 주차를 포함하여 이후 주차까지 누적하여 활동했는지 여부를 측정 (특정 기간 내에 누적된 사용자의 수)
롤링 리텐션 장 단점
-장점 : 이탈에 대한 정보를 얻을 수 있다.
-단점 : 충성고객과 일시고객이 동일하게 고려된다, 시간이 지나 오지 않던 고객이 기간 후 오게되면 리텐션에 카운트 되서 숫자가 바뀔 수 있다.
-그래서 기존리텐션 지표의 보조로 사용하는 경우가 많다.
범위 리텐션 장 단점
-클래식 리텐션과 비슷하지만 기간을 늘려서 살펴본다.
-장점 : 개념이 간단하기 때문에 설명하기쉽다.
-단점 : period 기간만큼 시간이 지나야 지표를 확인할 수 있다.
-그래서 기간은 비즈니스에 맞게 선정하는 것이 중요하다.