[수업 목표]
[복습]
SELECT column1, special_column
FROM (/*subquery*/
SELECT column1, column2 special_column
FROM table1
) a
-- LEFT JOIN
SELECT 조회할 컬럼
FROM 테이블1 a LEFT JOIN 테이블2 b ON a.공통컬럼명 = b.공통컬럼명
-- INNER JOIN
SELECT 조회할 컬럼
FROM 테이블1 a INNER JOIN 테이블2 b ON a.공통컬럼명 = b.공통컬럼명
SELECT restaurant_name,
AVG(rating) average_of_rating,
AVG(IF(rating<>'NOT given', rating, NULL)) average_of_rating2
FROM food_orders
GROUP BY 1
실제 연산에 사용되는 데이터는 다음과 같음
SELECT fo.order_id ,
fo.customer_id ,
fo.restaurant_name ,
fo.price ,
c.name ,
c.age ,
c.gender
FROM food_orders fo LEFT JOIN customers c ON fo.customer_id = c.customer_id
WHERE c.customer_id IS NOT NULL
→ NULL 제거를 한 결과는 INNER JOIN 결과와 동일
사용할 수 없는 값 대신 다른 값을 대체해서 사용할 수 있음
데이터 분석 시에는 평균값 혹은 중앙값 등 대표값을 이용하여 대체하기도 함
다른 값으로 변경하고 싶을 때 두 개의 문법을 이용할 수 있음
IF(rating>=1. rating, 대체값)
COALESCE(age, 대체값)
값의 변경
SELECT fo.order_id ,
fo.customer_id ,
fo.restaurant_name ,
fo.price ,
c.name ,
c.age ,
COALESCE(c.age,20) "null 제거",
c.gender
FROM food_orders fo LEFT JOIN customers c ON fo.customer_id = c.customer_id
WHERE c.age IS NULL
SELECT customer_id, name, email, gender, age,
CASE
WHEN age < 15 THEN 15
WHEN age > 80 THEN 80
ELSE age
END "범위를 지정해준 age"
FROM customers
a. 음식점별, 시간별 주문건수 집계
SELECT fo.restaurant_name,
SUBSTRING(p.time,1,2) hh,
COUNT(1) cnt_order
FROM food_orders fo INNER JOIN payments p ON fo.order_id = p.order_id
WHERE SUBSTRING(p.time,1,2) BETWEEN 15 AND 20
GROUP BY 1, 2
b. Pivot view 구조 만들기
SELECT restaurant_name,
MAX(IF(hh='15', cnt_order, 0)) "15",
MAX(IF(hh='16', cnt_order, 0)) "16",
MAX(IF(hh='17', cnt_order, 0)) "17",
MAX(IF(hh='18', cnt_order, 0)) "18",
MAX(IF(hh='19', cnt_order, 0)) "19",
MAX(IF(hh='20', cnt_order, 0)) "20"
FROM (
SELECT fo.restaurant_name,
SUBSTRING(p.time,1,2) hh,
COUNT(1) cnt_order
FROM food_orders fo INNER JOIN payments p ON fo.order_id = p.order_id
WHERE SUBSTRING(p.time,1,2) BETWEEN 15 AND 20
GROUP BY 1, 2
) a
GROUP BY 1
ORDER BY 7 DESC
SELECT c.gender ,
CASE
WHEN age BETWEEN 10 AND 19 THEN 10
WHEN age BETWEEN 20 AND 29 THEN 20
WHEN age BETWEEN 30 AND 39 THEN 30
WHEN age BETWEEN 40 AND 49 THEN 40
WHEN age BETWEEN 50 AND 59 THEN 50
END range_of_age,
COUNT(1)
FROM food_orders fo INNER JOIN customers c ON fo.customer_id = c.customer_id
WHERE c.age BETWEEN 10 AND 59
GROUP BY 1, 2
b. Pivot view 구조 만들기
SELECT range_of_age,
MAX(IF(gender='female',order_count,0)) female,
MAX(IF(gender='male',order_count,0)) male
FROM (
SELECT c.gender ,
CASE
WHEN age BETWEEN 10 AND 19 THEN 10
WHEN age BETWEEN 20 AND 29 THEN 20
WHEN age BETWEEN 30 AND 39 THEN 30
WHEN age BETWEEN 40 AND 49 THEN 40
WHEN age BETWEEN 50 AND 59 THEN 50
END range_of_age,
COUNT(1) order_count
FROM food_orders fo INNER JOIN customers c ON fo.customer_id = c.customer_id
WHERE c.age BETWEEN 10 AND 59
GROUP BY 1, 2
) t
GROUP BY 1
ORDER BY range_of_age
Window Function
활용 예시
기본 SQL 구조로 해결하기 위해서는 복잡하게 Subquery문을 이용하거나 여러 번의 연산을 수행해야 하지만 자체적으로 제공되는 Window function 기능을 이용하면 조금 더 편리함
기본 구조
WINDOW_FUNCTION(argument) OVER (PARTITION BY 그룹 기준 컬럼 ORDER BY 정렬 기준)
SELECT cuisine_type , restaurant_name , COUNT(1) order_count
FROM food_orders fo
GROUP BY 1, 2
b. RANK 함수 적용
SELECT cuisine_type ,
restaurant_name ,
RANK() OVER (PARTITION BY cuisine_type ORDER BY order_count DESC) rn,
order_count
FROM (
SELECT cuisine_type , restaurant_name , COUNT(1) order_count
FROM food_orders fo
GROUP BY 1, 2
) a
c. 3위까지 조회하고 음식 타입별, 순위별 정렬
SELECT cuisine_type,
restaurant_name,
order_count,
rn "순위"
FROM (
SELECT cuisine_type ,
restaurant_name ,
RANK() OVER (PARTITION BY cuisine_type ORDER BY order_count DESC) rn,
order_count
FROM (
SELECT cuisine_type , restaurant_name , COUNT(1) order_count
FROM food_orders fo
GROUP BY 1, 2
) a
) b
WHERE rn <= 3
ORDER BY 1, 4
SELECT cuisine_type , restaurant_name , count(1) order_count
FROM food_orders fo
GROUP BY 1, 2
b. 카테고리별 합, 카테고리별 누적합
SELECT cuisine_type ,
restaurant_name ,
cnt_order ,
SUM(cnt_order) OVER (PARTITION BY cuisine_type) sum_cuisine,
SUM(cnt_order) OVER (PARTITION BY cuisine_type ORDER BY cnt_order) cum_cuisine
FROM (
SELECT cuisine_type , restaurant_name , count(1) cnt_order
FROM food_orders fo
GROUP BY 1, 2
) a
ORDER BY cuisine_type , cnt_order
→ cnt_order가 동일한 값을 일괄로 더해 cum_cuisine으로 출력되는 문제가 있음
(cum_cuisine이 1,2,3,..,,13,16,…이 아닌 9,9,9,…,13,37,…로 나옴)
c. 누적합 수정
WINDOW
함수(SUM
등)를 사용할 때, SUM
으로 동일한 cnt_order
값을 가진 여러 행이 있을 경우 SQL 엔진이 이 값을 한꺼번에 더하는 현상ORDER BY
절에 cnt_order
외에 추가적인 열에 순서를 부여할 수 있는 restaurant_name
을 포함시키기cnt_order
값을 가진 행들이 명확하게 순서가 정해져 누적합이 정상적으로 처리됨ORDER BY
에 cum_cuisine
을 추가SELECT cuisine_type ,
restaurant_name ,
cnt_order ,
SUM(cnt_order) OVER (PARTITION BY cuisine_type) sum_cuisine,
SUM(cnt_order) OVER (PARTITION BY cuisine_type ORDER BY cnt_order, restaurant_name) cum_cuisine
FROM (
SELECT cuisine_type , restaurant_name , count(1) cnt_order
FROM food_orders fo
GROUP BY 1, 2
) a
ORDER BY cuisine_type , cnt_order , cum_cuisine
a. yyyy-mm-dd 형식의 컬럼을 date type으로 변경
SELECT date(date) date_type,
date
FROM payments
b. date type을 date_format을 이용하여 년, 월, 일, 주로 조회해보기
SELECT DATE(date) date_type,
DATE_FORMAT(DATE(date), '%Y') "년",
DATE_FORMAT(DATE(date), '%m') "월",
DATE_FORMAT(DATE(date), '%d') "일",
DATE_FORMAT(DATE(date), '%w') "요일"
FROM payments p
a. 년도, 월을 포함하여 데이터 가공하기
SELECT DATE_FORMAT(DATE(date), '%Y') y,
DATE_FORMAT(DATE(date), '%m') m,
count(1) cnt_order
FROM food_orders fo LEFT JOIN payments p ON fo.order_id = p.order_id
b. 년도, 월별 주문 건수 구하기
SELECT DATE_FORMAT(DATE(date), '%Y') y,
DATE_FORMAT(DATE(date), '%m') m,
count(1) order_count
FROM food_orders fo LEFT JOIN payments p ON fo.order_id = p.order_id
GROUP BY 1, 2
c. 3월 조건으로 지정하고, 년도별로 정렬하기
SELECT DATE_FORMAT(DATE(date), '%Y') y,
DATE_FORMAT(DATE(date), '%m') m,
count(1) order_count
FROM food_orders fo LEFT JOIN payments p ON fo.order_id = p.order_id
WHERE DATE_FORMAT(DATE(date), '%m') = '03'
GROUP BY 1, 2
ORDER BY 1
💡 음식 타입별, 연령별 주문건수 pivot view 만들기
SELECT cuisine_type ,
MAX(IF(age=10, cnt_order, 0)) "10대",
MAX(IF(age=20, cnt_order, 0)) "20대",
MAX(IF(age=30, cnt_order, 0)) "30대",
MAX(IF(age=40, cnt_order, 0)) "40대",
MAX(IF(age=50, cnt_order, 0)) "50대"
FROM (
SELECT fo.cuisine_type ,
CASE
WHEN age BETWEEN 10 AND 19 THEN 10
WHEN age BETWEEN 20 AND 29 THEN 20
WHEN age BETWEEN 30 AND 39 THEN 30
WHEN age BETWEEN 40 AND 49 THEN 40
WHEN age BETWEEN 50 AND 59 THEN 50
END age,
COUNT(1) cnt_order
FROM food_orders fo INNER JOIN customers c ON fo.customer_id = c.customer_id
WHERE age BETWEEN 10 AND 59
GROUP BY 1, 2
) a
GROUP BY 1