02-04) SQL심화-02

slow_starter·2025년 6월 30일
0

모두의연구소-DS4기

목록 보기
18/30
post-thumbnail

01. 윈도우 함수(1) 순위 함수

  • window 함수 구조는 아래와 같음

    SELECT 컬럼 명,
    WINDOW_FUNCTION(ARGUMENTS) OVER ([PARTITION BY 컬럼][ORDER BY 절] [WINDOWING 절])
    FROM 테이블 명;

  • window 함수 요소 설명
구성 요소의미
WINDOW_FUNCTIONROW_NUMBER(), RANK(), SUM() 등 함수
OVER()윈도우 함수의 범위를 지정
PARTITION BY그룹을 나누는 기준 (GROUP BY랑 비슷하지만 결과를 안 묶음)
ORDER BY그룹 내 정렬 기준
ROWS BETWEEN ...현재 행 기준 앞뒤 범위 설정 (슬라이딩 윈도우)
  • window 함수 예시(부서별 급여 순위 구하기)
SELECT employee_id,
       department_id,
       salary,
       RANK() OVER (PARTITION BY department_id ORDER BY salary DESC) AS salary_rank
FROM employees;
  • 순위 관련 함수 설명
    • RANK() : 동일한 값에 동일한 순위
    • DENSE_RANK() : 동일한 값에 동일한 순위 부여하나 한 건으로
    • ROW_NUMBER() : 동일한 값이어도 고유한 순위 부여(일반적으로 PRIMARY-KEY 오름차순으로 순서 부여)

02. 윈도우 함수(2) 집계 함수, 그룹 함수

  • 집계 함수 종류
    • SUM(컬럼) : 컬럼을 기준으로 합계 계산
    • AVG(평균) : 컬럼을 기준으로 평균 계산
    • MAX(컬럼) : 컬럼을 기준으로 최대값 계산
    • MIN(컬럼) : 컬럼을 기준으로 최소값 계산
  • 행 순서 집계 함수
    • FIRST_VALUE(컬럼) : 조건 만족하는 첫 번째 값
    • LAST_VALUE(컬럼) : 조건 만족하는 마지막 값
    • LAG(컬럼, n) : 이전 n번째 행
    • LAG(컬럼, n) : 이후 n번째 행
  • WINDOWING 절 : 행의 범위 기준 의미
    • CURRENT ROW : 현재 행
    • UNBOUNDED PRECEDING : 윈도우 시작 위치가 첫 번째 행
    • UNBOUNDED FOLLOWING : 윈도우의 마지막 위치가 마지막 행
  • 그룹 함수
    • ROLLUP : 소그룹 간 계산
    • CUBE : 다차원으로 소계 계산
    • GROUPING SETS : 특정 항목 소계

03. window 함수 연습 예시

  • 순위 함수 연습
SELECT item_id, category, item_name, price,
RANK() OVER(ORDER BY price DESC) AS RANK,
DENSE_RANK() OVER(ORDER BY price DESC) AS DENSE_RANK,
ROW_NUMBER() OVER(ORDER BY price DESC) AS ROW_NUMBER
FROM oceanic-antler-456102-i6.modulabs.item
ORDER BY price DESC;

-- partition by 적용해보기 
SELECT item_id, category, item_name, price,
RANK() OVER(PARTITION BY category ORDER BY price DESC) AS RANK,
DENSE_RANK() OVER(PARTITION BY category ORDER BY price DESC) AS DENSE_RANK,
ROW_NUMBER() OVER(PARTITION BY category ORDER BY price DESC) AS ROW_NUMBER
FROM oceanic-antler-456102-i6.modulabs.item
ORDER BY category, price DESC;
  • 집계 함수(SUM,AVG,MAX,MIN) 연습
-- 임시 테이블 생성
CREATE OR REPLACE TABLE oceanic-antler-456102-i6.modulabs.web (
 YYMM STRING,
 YYMMDD DATE,
 Visits INT64
);

-- 임시 테이블에 데이터 삽입
INSERT INTO oceanic-antler-456102-i6.modulabs.web (YYMM, YYMMDD, Visits)
VALUES
 ('2024-01', '2024-01-01', 100),
 ('2024-01', '2024-01-02', 120),
 ('2024-01', '2024-01-03', 140),
 ('2024-01', '2024-01-04', 120),
 ('2024-01', '2024-01-05', 130),
 ('2024-01', '2024-01-06', 140);

 -- 집계함수 연습

 SELECT *,
 SUM(visits) OVER (PARTITION BY YYMM) as SUM,
 AVG(visits) OVER (PARTITION BY YYMM) as AVG,
 MAX(visits) OVER (PARTITION BY YYMM) as MAX,
 MIN(visits) OVER (PARTITION BY YYMM) as MIN
 FROM oceanic-antler-456102-i6.modulabs.web;
  • 행 순서 집계 함수 실습
-- FIRST_VALUE, LAST_VALUE with ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
SELECT *,
 FIRST_VALUE(price) OVER (PARTITION BY category ORDER BY item_name DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS FIRST_VALUE,
 LAST_VALUE(price) OVER (PARTITION BY category ORDER BY item_name DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS LAST_VALUE
FROM oceanic-antler-456102-i6.modulabs.item;

-- FIRST_VALUE, LAST_VALUE with ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
SELECT *,
 FIRST_VALUE(price) OVER (PARTITION BY category ORDER BY item_name DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS FIRST_VALUE,
 LAST_VALUE(price) OVER (PARTITION BY category ORDER BY item_name DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS LAST_VALUE
FROM oceanic-antler-456102-i6.modulabs.item
ORDER BY category, item_name DESC;

-- LAG(col, 1), LEAD(col, 1)
SELECT *,
 LAG(price, 1) OVER (ORDER BY price DESC) AS LAG,
 LEAD(price, 1) OVER (ORDER BY price DESC) AS LEAD
FROM oceanic-antler-456102-i6.modulabs.item
ORDER BY price DESC;

-- LAG(col, 2), LEAD(col, 2)
SELECT *,
 LAG(price, 2) OVER (ORDER BY price DESC) AS LAG,
 LEAD(price, 2) OVER (ORDER BY price DESC) AS LEAD
FROM oceanic-antler-456102-i6.modulabs.item
ORDER BY price DESC;
  • 그룹함수 실습
-- department와 job으로 group by 하는 것!

SELECT department, job, SUM(salary) as SUM_salary
FROM oceanic-antler-456102-i6.modulabs.employees
GROUP BY department, job;

-- ROLLUP을 쓰면?
-- 부서(department), 직무(job)별 salary 소계: ROLLUP
SELECT department, job, SUM(salary) as SUM_salary
FROM oceanic-antler-456102-i6.modulabs.employees
GROUP BY ROLLUP(department, job);

-- 부서(department), 직무(job)별 salary 소계: CUBE
-- 명시된 컬럼의 모든 소계?
SELECT department, job, SUM(salary) as SUM_salary
FROM oceanic-antler-456102-i6.modulabs.employees
GROUP BY CUBE(department, job);

-- 부서(department), 직무(job)별 다양한 소계: GROUPING SETS

SELECT department, job, SUM(salary)
FROM oceanic-antler-456102-i6.modulabs.employees
GROUP BY GROUPING SETS((department, job), (department), ());
  • 로그데이터 파싱(JSON 형식) 실습
-- JSON_EXTRACT로 dictionary형 데이터 쪼개기
SELECT *,
 JSON_EXTRACT(log, '$.user') AS user,
 JSON_EXTRACT(log, '$.action') AS action,
 JSON_EXTRACT(log, '$.timestamp') AS timestamp
FROM oceanic-antler-456102-i6.modulabs.user_logs;
profile
2025화이팅!

0개의 댓글