SELECT DATE_FORMAT(event_time, '%Y-%m-%d') AS evetn_date
, SUM(price) AS revenue
FROM purchase_t
GROUP BY DATE_FORMAT(event_time, '%Y-%m-%d');
41763.189934
달러이다.여기서 튀는 값을 발견할 수 있었다.
event_type
이 view
와 cart
는 제외하고 remove_from_cart
와 purchase
만을 고려할 것user_id
, product_id
에서 remove_from_cart
와 purchase
가 모두 있다면 가장 마지막에 있는 것을 최종으로 고려WITH ranked_events AS (
SELECT
user_id,
product_id,
event_time,
event_type,
price,
ROW_NUMBER() OVER (PARTITION BY user_id, product_id ORDER BY event_time DESC) as rn
FROM dec19
WHERE event_type IN ('purchase', 'remove_from_cart')
)
SELECT
user_id,
product_id,
event_time AS product_last_event,
event_type,
price
FROM ranked_events
WHERE rn = 1;
-- 임시 테이블로 생성
CREATE TEMPORARY TABLE p_or_r AS
SELECT
user_id, product_id, event_time, event_type, price
FROM (
SELECT user_id, product_id, event_time, event_type, price,
ROW_NUMBER() OVER (PARTITION BY user_id, product_id ORDER BY event_time DESC) as rn
FROM dec19
WHERE event_type IN ('purchase', 'remove_from_cart')
) ranked_events
WHERE rn = 1;
SELECT COUNT(DISTINCT user_id) AS total_users
, COUNT(event_type) AS total_counts
, SUM(price) AS total_sales
FROM p_or_r
WHERE event_type = 'purchase';
SELECT COUNT(DISTINCT user_id) AS total_users
, COUNT(event_type) AS total_counts
, SUM(price) AS total_sales
FROM p_or_r
WHERE event_type = 'remove_from_cart';
-total users 차이 : 0.593
-total counts 차이 : 0.520
-total sales 차이 : 0.4939
remove_from_cart
가 purchase
보다 매출액이 2배 많다.
▶️매출에 차이가 난다고 볼 수 있지 않을까?
WITH first_purchase AS (
SELECT
user_id,
MIN(event_time) AS first_purchase_time
FROM purchase_t
GROUP BY user_id
),
events_grouped AS (
SELECT
p.user_id,
FLOOR(DATEDIFF(p.event_time, f.first_purchase_time) / 14) AS biweek_period,
COUNT(*) AS event_count
FROM purchase_t p
JOIN first_purchase f ON p.user_id = f.user_id
GROUP BY p.user_id, biweek_period
),
biweek_retention AS (
SELECT
biweek_period,
COUNT(DISTINCT user_id) AS active_users
FROM events_grouped
GROUP BY biweek_period
)
SELECT
biweek_period,
active_users,
active_users / (SELECT active_users FROM biweek_retention WHERE biweek_period = 0) AS retention_rate
FROM biweek_retention
ORDER BY biweek_period;