HR 데이터 분석을 통한 채용 기획 - Window Function

dpwl·2024년 5월 25일
0

Data Analysis with SQL

목록 보기
92/120
post-thumbnail

1. Window Function (윈도우 함수)

  • 집계된 결과값을 기존 데이터에 추가하여 보여줌
  • 합계, 평균, 순위 매기기, 순서 조작 등
  • 결과를 보여주되, 결과 건수가 줄어들지 않음
SELECT WINDOW_FUNCTION(계산 컬럼)
	   OVER (PARTITION BY <Column>	-- 집계 기준 Column
       		 ORDER BY <Column>)		-- 정렬 기준 Column
FROM Table

1.1 Window Function Group By

SUM, AVG, MIN, MAX, COUNT 집계 함수를 활용하여 특정 컬럼을 기준으로 집계된 결과 산출

1.2 Window Function vs Group By

1) Sample Data

SELECT hn.EmployeeNumber, JobLevel, PerformanceRating
FROM hr.hr_number hn
LEFT JOIN hr_cate hc
ON hn.EmployeeNumber = hc.EmployeeNumber 
WHERE hc.Attrition = 'No'
;

# 총 1,233 rows

1.1.1 Group By

# GROUP BY - JobLevel, PerformanceRating 별 인원 수 집계
SELECT JobLevel, PerformanceRating
	 , count(hn.EmployeeNumber) AS hc
FROM hr.hr_number hn
LEFT JOIN hr_cate hc
ON hn.EmployeeNumber = hc.EmployeeNumber 
WHERE hc.Attrition = 'No'
GROUP BY 1, 2
ORDER BY 1, 2
;

# 총 10 rows

1) Group By - JobLevel 별 인원 수 합계

SELECT JobLevel, count(hn.EmployeeNumber) AS hc
FROM hr.hr_number hn 
LEFT JOIN hr_cate hc
ON hn.EmployeeNumber = hc.EmployeeNumber 
WHERE hc.Attrition = 'No'
GROUP BY 1
;

2) Group By - PerformanceRating 별 인원 수 합계

SELECT PerformanceRating, count(hn.EmployeeNumber) AS hc
FROM hr.hr_number hn
LEFT JOIN hr_cate hc
ON hn.EmployeeNumber = hc.EmployeeNumber 
WHERE hc.Attrition = 'No'
GROUP BY 1
;

1.1.2 Window Function

1) Window Function - JobLevel 별 인원 수 합계

SELECT JobLevel, PerformanceRating
	 , count(hn.EmployeeNumber) AS hc
	 , sum(count(hn.EmployeeNumber))
	   OVER (PARTITION BY JobLevel) AS hc_job_level
FROM hr.hr_number hn
LEFT JOIN hr_cate hc
ON hn.EmployeeNumber = hc.EmployeeNumber 
WHERE hc.Attrition = 'No'
GROUP BY 1, 2
ORDER BY 1, 2
;

2) Window Function - JobLevel 별 인원 수 합계

SELECT JobLevel, PerformanceRating
	 , count(hn.EmployeeNumber) AS hc
	 , sum(count(hn.EmployeeNumber))
	   OVER (PARTITION BY JobLevel) AS hc_job_level
	 , sum(count(hn.EmployeeNumber))
	   OVER (PARTITION BY PerformanceRating) AS hc_perform_level
FROM hr.hr_number hn
LEFT JOIN hr_cate hc
ON hn.EmployeeNumber = hc.EmployeeNumber 
WHERE hc.Attrition = 'No'
GROUP BY 1, 2
ORDER BY 1, 2
;

2. LEAD & LAG

LEAD: 앞에 있는 걸 당겨옴
LAG: 뒤에 있는 걸 데려옴

2.1 LEAD & LAG (+ ORDER BY)

SELECT LEAD or LAG (<Column>)		-- 순서 조작 대상 Column
	   OVER (ORDER BY <Column>)		-- 순서 정렬 기준 Column
FROM Table

1) JobLevel 별 오름차순 기준으로 LEAD & LAG (+ ORDER BY)

-- JobLevel 별 인원 수
SELECT JobLevel
	 , count(hn.EmployeeNumber) AS hc
	 , lead(count(hn.EmployeeNumber)) OVER (ORDER BY JobLevel) AS lead_hc
	 , lag(count(hn.EmployeeNumber)) OVER (ORDER BY JobLevel) AS lag_hc
FROM hr.hr_cate hc 
LEFT JOIN hr.hr_number hn 
ON hc.EmployeeNumber = hn.EmployeeNumber 
GROUP BY 1
ORDER BY 1
;

SELECT LEAD or LAG (<Column>, N)	-- 순서 조작 대상 Column
	   OVER (ORDER BY <Column>)		-- 순서 정렬 기준 Column
FROM Table
SELECT JobLevel
	 , count(hn.EmployeeNumber) AS hc
	 , lead(count(hn.EmployeeNumber), 2) OVER (ORDER BY JobLevel) AS lead_hc
	 , lag(count(hn.EmployeeNumber), 2) OVER (ORDER BY JobLevel) AS lag_hc
FROM hr.hr_cate hc 
LEFT JOIN hr.hr_number hn 
ON hc.EmployeeNumber = hn.EmployeeNumber 
GROUP BY 1
ORDER BY 1
;

2.2 LEAD & LAG (+ PARTITION BY, ORDER BY)

SELECT LEAD  or LAG (<Column>)		-- 순서 조작 대상 Column
	   OVER (PARTITION BY <Column>	-- grouping 대상 Column
       		 ORDER BY <Column>)		-- 순서 정렬 기준 Column
FROM Table

1) Department, JobLevel 별 인원 with LEAD & LAG

SELECT Department, JobLevel
	 , count(hn.EmployeeNumber) AS hc
	 , lead(count(hn.EmployeeNumber)) OVER (PARTITION BY Department ORDER BY JobLevel) AS lead_hc
	 , lag(count(hn.EmployeeNumber)) OVER (PARTITION BY Department ORDER BY JobLevel) AS lag_hc
FROM hr.hr_cate hc 
LEFT JOIN hr.hr_number hn 
ON hc.EmployeeNumber = hn.EmployeeNumber 
GROUP BY 1, 2
ORDER BY 1, 2
;

profile
거북선통통통통

0개의 댓글