내가 쓴 쿼리
select a.state,
b.order_id,
sum(b.amount) "총 주문금액",
sum(b.profit) "총 이익",
row_number() over (partition by state order by sum(b.amount) desc) "state 매출 순",
case when sum(b.amount) >= avg(c.avg_target)*0.5 then "목표달성"
else "목표 미달성" end "목표 달성 여부"
from list_of_orders a join order_details b
on a.order_id=b.order_id join (
select category, avg(target) avg_target
from sales_target
group by category) c on b.Category=c.Category
group by a.state, a.order_id, b.category
튜터님 쿼리
WITH Average_Targets AS (
SELECT
Category,
AVG(Target) AS Avg_Target
FROM
sales_target
GROUP BY
Category)
SELECT
lo.order_id,
lo.State,
SUM(od.Amount) AS Total_Amount,
SUM(od.Profit) AS Total_Profit,
RANK() OVER (PARTITION BY lo.State ORDER BY SUM(od.Amount) DESC) AS State_Rank,
CASE
WHEN SUM(od.Amount) >= at.Avg_Target / 2 THEN 'Met 50% Target'
ELSE 'Below 50% Target'
END AS Target_Comparison
FROM
list_of_orders AS lo
JOIN
order_details AS od ON lo.order_id = od.order_id
JOIN
Average_Targets AS at ON od.Category = at.Category
GROUP BY
lo.order_id, lo.State, at.Avg_Target
내가 보기엔 두 쿼리 답안이 거의 비슷한 것 같은데..(정확하게 비교는 안해봤지만)
rank 대신 row_number를 써도 되는지 물었을때 row_num은 정렬해서 순서를 나열하는 거이기 때문에 rank랑은 다르다고 답변을 받았다.🥲
그렇담 이문제는 틀린 걸까...
어제 있었던 특강에서 쿼리내에 조인이 너무 많이 걸려 있는 경우, 가독성도 안좋아 질 뿐만 아니라 방대한 양의 데이터였더라면 처리속도에서 문제가 생길 수 있다라고 한다.
앞으로 with문을 자주 활용해서 문제푸는 연습을 해봐야겠다.
내가 쓴 쿼리
SELECT
a.CustomerName,
sum(b.amount) as "총 구매액",
count(distinct a.order_id) as "총 주문 횟수",
ifnull(round(avg(datediff(str_to_date(a.order_date, '%d-%m-%Y'),
(select max(str_to_date(order_date, '%d-%m-%Y'))
from list_of_orders loo2
where loo2.CustomerName=a.CustomerName
and str_to_date(loo2.order_date, '%d-%m-%Y') < str_to_date(a.order_date, '%d-%m-%Y'))))),1) as "주문간 평균 일수"
from list_of_orders a
join order_details b on a.order_id = b.order_id
group by a.CustomerName
order by a.CustomerName
튜터님 쿼리
WITH Customer_Order_Details AS (
SELECT
lo.CustomerName,
STR_TO_DATE(lo.order_date, '%d-%m-%Y') AS order_date,
od.Amount,
ROW_NUMBER() OVER (PARTITION BY lo.CustomerName ORDER BY lo.order_date) AS rn
FROM
list_of_orders AS lo
JOIN
order_details AS od ON lo.order_id = od.order_id
),
Order_Intervals AS (
SELECT
CustomerName,
DATEDIFF(order_date, LAG(order_date) OVER (PARTITION BY CustomerName ORDER BY order_date)) AS days_between_orders
FROM
Customer_Order_Details
WHERE
rn > 1
),
Customer_Stats AS (
SELECT
cod.CustomerName,
SUM(cod.Amount) AS Total_Amount,
COUNT(DISTINCT cod.order_date) AS Total_Orders,
AVG(oi.days_between_orders) AS Avg_Days_Between_Orders
FROM
Customer_Order_Details AS cod
LEFT JOIN
Order_Intervals AS oi ON cod.CustomerName = oi.CustomerName
GROUP BY
cod.CustomerName
)
SELECT
cs.CustomerName,
cs.Total_Amount,
cs.Total_Orders,
cs.Avg_Days_Between_Orders,
cs.Total_Amount / cs.Total_Orders AS Avg_Order_Value,
cs.Total_Orders / NULLIF(cs.Avg_Days_Between_Orders, 0) AS Purchase_Frequency,
(cs.Total_Amount / cs.Total_Orders) * (cs.Total_Orders / NULLIF(cs.Avg_Days_Between_Orders, 0)) * 1 AS Estimated_CLV -- 1년 기준 CLV
FROM
Customer_Stats AS cs;
나는 총 구매액, 총 주문 횟수, 주문간 평균일수 까지는 구했는데, CLV에서 막혔다.
질문방에 관련된걸 올렸었는데,, 튜터님이 답변이 기다 아니다 라고 정확하게 해주시지 않아서, 평균 CLV를 구하는 줄알았는데, 튜터님 쿼리를 보니 고객별 CLV를 구하는 거였더라...