3-1. 복수 국적 메달 수상한 선수 찾기
select name
from athletes a join records r on a.id = r.athlete_id
join games g on r.game_id = g.id
join teams t on t.id = r.team_id
where g.year>=2000 and r.medal is not null
group by athlete_id
having count(distinct(team_id))>1
order by name
??이건 왜 안되는지...? IN 서브쿼리로는 안되는거임? 대체 왜...???
select distinct(name)
from athletes
where id in (select athlete_id
from records
where game_id in (select game_id
from games
where year>=2000)
and medal is not null
group by athlete_id
having count(distinct(team_id))>1)
order by name
3-2. 할부는 몇 개월로 해드릴까요
select payment_installments, count(distinct(order_id)) order_count, min(payment_value) min_value, max(payment_value) max_value, avg(payment_value) avg_value
from olist_order_payments_dataset
where payment_type = 'credit_card'
group by payment_installments
3-3. 지역별 주문의 특징 ❌
select region as 'Region',
count(DISTINCT(CASE WHEN category = 'Furniture' THEN order_id END)) as 'Furniture',
count(DISTINCT(CASE WHEN category = 'Office Supplies' THEN order_id END)) as 'Office Supplies',
count(DISTINCT(CASE WHEN category = 'Technology' THEN order_id END)) as 'Technology'
from records
group by region
3-4. 배송 예정일 예측 성공과 실패
☁️ julianday
를 사용해서 날짜 차이를 구할 수도 있다.
select strftime('%Y-%m-%d', order_purchase_timestamp) as purchase_date,
count(case when julianday(order_estimated_delivery_date)-julianday(order_delivered_customer_date)>=0 then order_id end) as success,
count(case when julianday(order_estimated_delivery_date)-julianday(order_delivered_customer_date)<0 then order_id end) as fail
from olist_orders_dataset
where strftime('%Y-%m',order_purchase_timestamp)='2017-01'
group by strftime('%Y-%m-%d', order_purchase_timestamp)
order by order_purchase_timestamp
3-5. 쇼핑몰의 일일 매출액과 ARPPU
select strftime('%Y-%m-%d',order_purchase_timestamp) dt, count(DISTINCT(customer_id)) as pu, round(sum(payment_value),2) as revenue_daily,
round(sum(payment_value)/count(DISTINCT(customer_id)),2) as arppu
from olist_orders_dataset as o join olist_order_payments_dataset as p on o.order_id=p.order_id
where order_purchase_timestamp>='2018-01-01'
group by strftime('%Y-%m-%d',order_purchase_timestamp)