WITH
CATEGORY_1(category,sales_category) AS (SELECT category,ROUND(SUM(sales),2) FROM records GROUP BY category ),
SUB_CATEGORY_1(category,sub_category,sales_sub_category ) AS (SELECT category,sub_category,ROUND(SUM(sales),2) FROM records GROUP BY category, sub_category ),
TOTAL_1(sales_total) AS (SELECT ROUND(SUM(sales),2) FROM records),
JOIN_FINAL(category,sub_category , sales_sub_category, sales_category,sales_total, pct_in_category,pct_in_total) AS (
SELECT A.category ,B.sub_category , sales_sub_category, sales_category,sales_total,ROUND((B.sales_sub_category/A.sales_category)100,2) AS pct_in_category,ROUND((sales_sub_category/sales_total)100,2) AS pct_in_total
FROM CATEGORY_1 A
RIGHT JOIN SUB_CATEGORY_1 B ON A.category=B.category
LEFT JOIN TOTAL_1 C ON B.sales_sub_category < C.sales_total)
SELECT * FROM JOIN_FINAL;
FROM CATEGORY_1 A
RIGHT JOIN SUB_CATEGORY_1 B ON A.category=B.category
LEFT JOIN TOTAL_1 C ON B.sales_sub_category < C.sales_total
: 비율을 구하는 문항이라는 점에서 나중에 * 100 하는거 잊지 말기!
/
SELECT
FROM olist_orders_dataset
order by 1 limit 10;
*/
-- 성공과 실패를 case when 구문을 활용해서 구하기
-- 단 배송 완료 여야함 + 배송 예정 시각 또는 완료 시각 데이터 존재해야한다.
-- step1. 조건 더하기
/
select
from olist_orders_dataset
where order_status in ('delivered') and order_delivered_customer_date is not null and order_delivered_customer_date is not null
order by 1 limit 10;
*/
-- step2. 조건에 성공 실패 추출하기
/*
select date(order_purchase_timestamp) as purchase_date,order_id,
case when date(order_delivered_customer_date) <= date(order_estimated_delivery_date) then 'success'
else 'fail'
end as 'result'
from olist_orders_dataset
where order_purchase_timestamp like '%2017-01%' and order_status in ('delivered') and order_delivered_customer_date is not null and order_delivered_customer_date is not null
order by 1 limit 10;
*/
-- step 3. step2 데이터 바탕으로 일자별 실패와 성공 건수 출력하기
--- 오답으로 뜬다..왜?
/*
select purchase_date,
count(case when result= 'success' then 1 end) as 'success',
count(case when result= 'fail' then 1 end) as 'fail'
from (
select date(order_purchase_timestamp) as purchase_date,order_id,
case when date(order_delivered_customer_date) <= date(order_estimated_delivery_date) then 'success'
else 'fail'
end as 'result'
from olist_orders_dataset
where order_purchase_timestamp like '%2017-01%' and order_status in ('delivered') and order_delivered_customer_date is not null and order_delivered_customer_date is not null
) t
group by purchase_date;
*/
--- 조건을 많이 부여한 것 같아 빼주기, 사실 조건이 붙어서 오답이 된 것은 아니었다!
--- 오답원인: success 에 등호를 붙임
-- 이때 일자가 같은 경우 배송 예정 시각이 지나 고객에게 주문한 것이기 때문에 "fail"로 처리해야한다.
select purchase_date,
count(case when result= 'success' then 1 end) as 'success',
count(case when result= 'fail' then 1 end) as 'fail'
from (
select date(order_purchase_timestamp) as purchase_date,order_id,
case when date(order_delivered_customer_date) <date(order_estimated_delivery_date) then 'success'
else 'fail'
end as 'result'
from olist_orders_dataset
where order_purchase_timestamp like '%2017-01%' and ((order_delivered_customer_date is not null) and (order_estimated_delivery_date is not null))
) t
group by purchase_date
with cte(day,total_bill,time,sex,ranking) as (select day, total_bill, time, sex , rank() over (partition by day order by total_bill desc) as ranking
from tips)
select day,total_bill,time,sex from cte where ranking in (1,2,3) ;
: LEAD 와 LAG을 이용해서 푸는 문항
: 아직 개념이 없어서 나중에 풀 예정
-- 체류시간을 30분에서 1시간으로 늘림
-- 특정 사용자의 세션을 재정의, 시작 시간과 종료 시간을 출력
-- 연속적인 이벤트를 분석하는 문제 :LEAD, LAG 함수
SELECT event_timestamp_kst,LEAD(event_timestamp_kst)
FROM GA
WHERE user_pseudo_id IN ('S3WDQCqLpK')
GROUP BY DAY_OF
ORDER BY 1 LIMIT 30;