SQL ranking

가연·2024년 12월 31일

랭킹 ranking

동일한 값에 다른 등수 : row_number()
동일한 값에 같은 등수 :
1) 같은 값 다음에 이어지는 등수 (1,2,2,3..) : dense_rank()
2) 같은 값 다음에 점프하는 등수 (1,2,2,4..) : rank()

정렬 : ranking over(order by ~~)
partion by : 끼리끼리 묶어서 진행

select * from salaries S left join employees E on S.emp_no = E.emp_no;
# table 생성
create table temp (select E.emp_no, E.first_name, E.last_name, E.gender, S.salary
			from salaries S left join employees E on S.emp_no = E.emp_no);
# salary 기준 랭킹 부여
# 1) 고액 연봉 순
select emp_no, first_name, last_name, salary, 
			rank() over(order by salary desc) `rank` from temp;   
# 원하는 부분 별로 랭킹 부여 (부서별 랭킹) : partition by
select emp_no, first_name, last_name, salary, 
			rank() over(
	        partition by emp_no order by salary desc) `rank` from temp;

이름 부여
ranking 맨 뒤에 그냥 name 부여하면 끝

데이터 추출 쿼리 문제풀이

#Q1. Division Name 종류 별로 평점 평균, 내림차순 정렬
select `Division Name`, avg(Rating) from dataset2 group by `Division Name` order by avg(Rating) desc ;

#Q2. Department Name 종류 별로 평점 평균, 내림차순 정렬
select `Department Name`, avg(rating) as `평점 평균` from dataset2 group by 1 order by 2 desc;
#group by(or order by 등) 1,2.. (select 항목 순서 숫자로 대체 가능!)

#Q3. Department Name의 값이 Trend인 항목
select * from dataset2 where `Department Name` = "Trend";

#Q4. 3번의 데이터를 나이대별로 처리
#중분류 : Trend 인 항목에서, 평점을 3점 이하로 준 사람들
select age, `Department Name`, rating ,case when Age between 10 and 19 then '10대'
			when Age between 20 and 29 then '20대'
            when Age between 30 and 39 then '30대'
            when Age between 40 and 49 then '40대'
            when Age between 50 and 59 then '50대'
            when Age between 60 and 69 then '60대'
			else '기타'
            end as `나이대`
            from dataset2 where `Department Name` = "Trend" and Rating <= 3;
            
#cf) 나눗셈 몫을 이용해 쉽게 나이대를 얻을 수 있음
#floor(22/10)*10  => 2
select floor(age/10)*10 as `나이대` 
	from dataset2 where `Department Name` = "Trend" and Rating <= 3;
# where 조건1 and 조건2


            
#Q5 4번 결과값들 몇 건인지
select case when Age between 10 and 19 then '10대'
			when Age between 20 and 29 then '20대'
            when Age between 30 and 39 then '30대'
            when Age between 40 and 49 then '40대'
            when Age between 50 and 59 then '50대'
            when Age between 60 and 69 then '60대'
			else '기타'
            end as `나이대`, count(1) as `건수`
            from dataset2 where `Department Name` = "Trend" group by `나이대`;
            #???
            
#cf)
select floor(Age/10)*10 as `나이대`, 
		count(1) as `CNT` from dataset2 
		where `Department Name` = "Trend" and Rating <= 3 
		group by 1 order by 2 desc;
            
#Q6. Trend 항목 리뷰 중 50대들의 3점 이하의 리뷰들을 출력(10개만)
select * from dataset2 
		where `Department Name` = "Trend" 
				and rating <=3 
				and age between 50 and 59 
		limit 10;

#Q7. (Deparment and ClothID)의 항목을 기준으로 평점 평균 계산
select `Department Name`, `Clothing ID`, avg(rating) 
		from dataset2 
		group by 1,2 order by 3;
        
#Q8. Department별로 랭킹 독립적으로 부여, 랭킹 기준은 7번의 평점 평균(내림차순)
#중첩 테이블 사용. 쿼리 값으로 얻은 데이터(테이블)을 바탕으로 문제 해결하기 -> from 에 쿼리 넣어주기
select *, rank() 
		over(partition by `Department Name` order by `AVGRATE` desc) `Rank` 
        from (select `Department Name`, `Clothing ID`, avg(rating) as `AVGRATE` from dataset2 
		group by 1,2 order by 3)A;
        
        
#Q9. 8번에서 Department 별로 평점 평균 순위 Top 10 출력
#위의 테이블 사용해서 또 넣어주기 (중첩(중첩))        
#select * from (~~~)B 
#				where `Rank` <= 10 order by `Department Name`;

select * from (select *, rank() 
		over(partition by `Department Name` order by `AVGRATE` desc) `Rank` 
        from (select `Department Name`, `Clothing ID`, avg(rating) as `AVGRATE` from dataset2 
		group by 1,2 order by 3)A)B 
				where `Rank` <= 10 order by `Department Name`;

#Q10. Department, 나이대를 기준으로 그룹 만들어 평점 평균 구하기
select `Department Name`, floor(Age/10)*10 as `나이대`, avg(rating) as `AVGRATE` 
			from dataset2 group by 1,2;
#group by 여러개는 , 로 연결 / where 여러개는 and/or로 연결

#Q11. 연령대 별로, 생성한 평점평균을 기준으로 랭킹 부여
select `나이대`, `AVGRATE`,
			rank() over(partition by `나이대` order by `AVGRATE`) `Rank`
			from (select `Department Name`, floor(Age/10)*10 as `나이대`, avg(rating) as `AVGRATE` from dataset2 group by 1,2)A;

#Q12. 리뷰 중 size 언급 리뷰인지 아닌지 체크 
#size 단어가 포함되면 1, 아니면 0으로 출력 -> case when
select (case when `Review Text` like "%size%" then 1 
								else 0 
                                end) `size check`
				from dataset2;

#Q13. 전체 리뷰 수, size 언급 리뷰 수 출력
select count(1) `Total CNT`, sum(case when `Review Text` like "%size%" then 1 
								else 0 
                                end) `Size CNT`
				from dataset2;
                
#Q14. loose 언급 리뷰수, small 언급 리뷰 수, tight 언급 리뷰 수, 전체 리뷰 수 추출
select count(1) `Totalcnt`,
sum(case when `Review Text` like "%size%" then 1 else 0 end) `sizecnt`,
sum(case when `Review Text` like "%large%" then 1 else 0 end) `largecnt`,
sum(case when `Review Text` like "%loose%" then 1 else 0 end) `loosecnt`,
sum(case when `Review Text` like "%small%" then 1 else 0 end) `smallcnt`,
sum(case when `Review Text` like "%tight%" then 1 else 0 end) `tightcnt`
from dataset2;


#Q15. 14번을 department별로 보기
select `Department Name`, count(1) `Totalcnt`,
sum(case when `Review Text` like "%size%" then 1 else 0 end) `sizecnt`,
sum(case when `Review Text` like "%large%" then 1 else 0 end) `largecnt`,
sum(case when `Review Text` like "%loose%" then 1 else 0 end) `loosecnt`,
sum(case when `Review Text` like "%small%" then 1 else 0 end) `smallcnt`,
sum(case when `Review Text` like "%tight%" then 1 else 0 end) `tightcnt`
from dataset2 group by 1;

내가 필요한 정보들을 모으자 => 어디에 있는지 체크!
없으면, 필요한 정보를 기반으로 새롭게 만들자
새롭게 만들면서 추가적인 진행을 할 때 from(~)A ; ** 별칭 필요
내가 처리한 결과를 감싸서 진행을 할 수도 있다
=> 쿼리의 효율성 문제가 있을 수 있지만, 일단 원하는 결과에 대한 접근!
기존의 값을 가지고 집계처리 : 여러 수식 함수 + 구글
변형처리 : case when ~
+ 카운팅 : count(~), sum(case when~1/0~)
필터링을 할 때 :
where~ (and/or)~
having ~ 생성된 정보에 대한 조건 ~에 대해서 구별
select문 순서
select~from (~) where ~ group by ~ having ~ order by ~ limit ~..

가연

이전 포스트

SQL join

다음 포스트

SQL ranking

랭킹 ranking

데이터 추출 쿼리 문제풀이

SQL join

EDA

0개의 댓글