중앙값을 계산하는 방법
계절별로 구별한 행과 pm10만 따로 추출하여 CTE 생성
WITH cte AS (SELECT CASE WHEN measured_at >= '2022-03-01' AND measured_at < '2022-06-01' THEN 'spring'
WHEN measured_at >= '2022-06-01' AND measured_at < '2022-09-01' THEN 'summer'
WHEN measured_at >= '2022-09-01' AND measured_at < '2022-12-01' THEN 'autumn'
ELSE 'winter' END as season,
ROW_NUMBER() OVER(ORDER BY pm10 ASC) as pm10_asc,
pm10
FROM measurements)
SELECT season, pm10_asc, pm10
FROM cte
계절 별로 구별해야 하므로
WITH cte AS (SELECT CASE WHEN measured_at >= '2022-03-01' AND measured_at < '2022-06-01' THEN 'spring'
WHEN measured_at >= '2022-06-01' AND measured_at < '2022-09-01' THEN 'summer'
WHEN measured_at >= '2022-09-01' AND measured_at < '2022-12-01' THEN 'autumn'
ELSE 'winter' END as season,
pm10
FROM measurements
),
cte2 AS (SELECT season,
ROW_NUMBER() OVER(PARTITION BY season ORDER BY pm10 DESC) as pm10_desc,
pm10
FROM cte)
WITH cte AS (SELECT CASE WHEN measured_at >= '2022-03-01' AND measured_at < '2022-06-01' THEN 'spring'
WHEN measured_at >= '2022-06-01' AND measured_at < '2022-09-01' THEN 'summer'
WHEN measured_at >= '2022-09-01' AND measured_at < '2022-12-01' THEN 'autumn'
ELSE 'winter' END as season,
pm10
FROM measurements
),
cte2 AS (SELECT season,
ROW_NUMBER() OVER(PARTITION BY season ORDER BY pm10 DESC) as pm10_desc,
pm10
FROM cte
),
-- 짝수면 중간 두 행의 평균값 / 홀수면 중간 행의 pm10
-- 홀수 : CEIL(cnt / 2), 짝수 : cnt/2, cnt/2 + 1
median AS (SELECT season,
AVG(pm10) as pm10_median
-- ROUND(AVG(pm10), 2) as pm10_average
FROM cte2
WHERE pm10_desc >= CEIL(pm10_desc / 2) AND pm10_desc <= FLOOR(pm10_desc/2 + 1)
GROUP BY
season
),
average AS (SELECT season,
ROUND(AVG(pm10), 2) as pm10_average
FROM cte2
GROUP BY
season
)
SELECT m.season, m.pm10_median, a.pm10_average
FROM median m INNER JOIN average a
ON m.season = a.season
틀렸다.
window function을 제대로 사용해보자. CTE 이렇게 계속해서 만들 필요가 없는데 반성합니다.
WITH cte AS (SELECT CASE WHEN measured_at >= '2022-03-01' AND measured_at < '2022-06-01' THEN 'spring'
WHEN measured_at >= '2022-06-01' AND measured_at < '2022-09-01' THEN 'summer'
WHEN measured_at >= '2022-09-01' AND measured_at < '2022-12-01' THEN 'autumn'
ELSE 'winter' END as season,
pm10
FROM measurements
),
cte2 AS (SELECT season, pm10,
ROW_NUMBER() OVER(PARTITION BY season ORDER BY pm10 DESC) as pm10_desc,
ROUND(AVG(pm10) OVER(PARTITION BY season) , 2) as pm10_average,
COUNT(*) OVER(PARTITION BY season) as cnt
FROM cte
)
SELECT season,
AVG(pm10) as pm10_median,
MAX(pm10_average) as pm10_average
FROM cte2
WHERE pm10_desc BETWEEN cnt/2 AND cnt/2+1
GROUP BY
season
되게 단순한 방법인데, COUNT, AVG 같은 집계 함수를 WINDOW FUNCTION으로 활용할 수 있다는 걸 새까맣게 잊고 있었다. 이런 부분에서 문제를 꾸준히 푸는 건 도움이 된다.