1. Transaction
2. 기타 고급 SQL 문법
3. 4일차 과제 해설
1. 트랜잭션이란
BEGIN;
A의 계좌로부터 인출; # 이 명령어들은 마치 하나의 명령어처럼 처리됨.
B의 계좌로 입금; 다 성공하던지 다 실패하던지 둘중의 하나가 됨
END;
- END와 COMMIT은 동일
- 만일 BEGIN 전의 상태로 돌아가고 싶다면 ROLLBACK 실행
- 이 동작은 commit mode에 따라 달라짐!
2. 트랜잭션 커밋 모드: autocommit
3. 트랜잭션 방식
4. DELETE FROM vs. TRUNCATE
1. UNION, EXCEPT, INTERSECT
2. COALESCE, NULLIF
3. LISTAGG
SELECT
userid,
LISTAGG(channel) WITHIN GROUP (ORDER BY ts) channels
FROM raw_data.user_session_channel usc
JOIN raw_data.session_timestamp st ON usc.sessionid = st.sessionid
GROUP BY 1
LIMIT 10;
68 YoutubeGoogleInstagramYoutubeInstagramInstagramInstagramOrganicInstagramYoutube
SELECT
userid,
LISTAGG(channel, '->') WITHIN GROUP (ORDER BY ts) channels
FROM raw_data.user_session_channel usc
JOIN raw_data.session_timestamp st ON usc.sessionid = st.sessionid
GROUP BY 1
LIMIT 10;
68 Youtube->Google->Instagram->Youtube->Instagram->Instagram->Instagram->...
4. WINDOW
Syntax:
Useful functions:
WINDOW - LAG 함수
-- 이전 채널 찾기
SELECT usc.*, st.ts,
LAG(channel,1) OVER (PARTITION BY userId ORDER BY ts) prev_channel
FROM raw_data.user_session_channel usc
JOIN raw_data.session_timestamp st ON usc.sessionid = st.sessionid
ORDER BY usc.userid, st.ts
5. JSON Parsing Functions
예제) JSON_EXTRACT_PATH_TEXT
- SELECT JSON_EXTRACT_PATH_TEXT('{"f2":{"f3":"1"},"f4":{"f5":"99","f6":"star"}}','f4', 'f6');
{
"f2":{
"f3":"1"
},
"f4":{
"f5":"99",
"f6":"star"
}
}
과제1. 사용자별로 처음 채널과 마지막 채널 알아내기
해설1. CTE를 빌딩블록으로 사용
WITH first AS (
SELECT userid, ts, channel, ROW_NUMBER() OVER(PARTITION BY userid ORDER BY ts) seq
FROM raw_data.user_session_channel usc
JOIN raw_data.session_timestamp st ON usc.sessionid = st.sessionid
)
, last AS (
SELECT userid, ts, channel, ROW_NUMBER() OVER(PARTITION BY userid ORDER BY ts DESC) seq
FROM raw_data.user_session_channel usc
JOIN raw_data.session_timestamp st ON usc.sessionid = st.sessionid
)
SELECT first.userid AS userid, first.channel AS first_channel, last.channel AS last_channel
FROM first
JOIN last ON first.userid = last.userid and last.seq = 1
WHERE first.seq = 1;
해설2. JOIN 방식
SELECT first.userid AS userid, first.channel AS first_channel, last.channel AS last_channel
FROM (
SELECT userid, ts, channel, ROW_NUMBER() OVER(PARTITION BY userid ORDER BY ts) seq
FROM raw_data.user_session_channel usc
JOIN raw_data.session_timestamp st ON usc.sessionid = st.sessionid
) first
JOIN (
SELECT userid, ts, channel, ROW_NUMBER() OVER(PARTITION BY userid ORDER BY ts DESC) seq
FROM raw_data.user_session_channel usc
JOIN raw_data.session_timestamp st ON usc.sessionid = st.sessionid
) last ON first.userid = last.userid and last.seq = 1
WHERE first.seq = 1;
해설3. GROUP BY 방식
SELECT userid,
MAX(CASE WHEN rn1 = 1 THEN channel END) first_touch,
MAX(CASE WHEN rn2 = 1 THEN channel END) last_touch
FROM (
SELECT userid,
channel,
(ROW_NUMBER() OVER (PARTITION BY usc.userid ORDER BY st.ts asc)) AS rn1,
(ROW_NUMBER() OVER (PARTITION BY usc.userid ORDER BY st.ts desc)) AS rn2
FROM raw_data.user_session_channel usc
JOIN raw_data.session_timestamp st ON usc.sessionid = st.sessionid
)
GROUP BY 1;
해설4. FIRST_VALUE/LAST_VALUE
SELECT DISTINCT
A.userid,
FIRST_VALUE(A.channel) over(partition by A.userid order by B.ts
rows between unbounded preceding and unbounded following) AS First_Channel,
LAST_VALUE(A.channel) over(partition by A.userid order by B.ts
rows between unbounded preceding and unbounded following) AS Last_Channel
FROM raw_data.user_session_channel A
LEFT JOIN raw_data.session_timestamp B ON A.sessionid = B.sessionid;
과제2. Gross Revenue가 가장 큰 UserID 10개 찾기
해설1. GROUP BY
SELECT
userID,
SUM(amount)
FROM raw_data.session_transaction st
LEFT JOIN raw_data.user_session_channel usc ON st.sessionid = usc.sessionid
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;
해설2. SUM OVER
SELECT DISTINCT
usc.userid,
SUM(amount) OVER(PARTITION BY usc.userid)
FROM raw_data.user_session_channel AS usc
JOIN raw_data.session_transaction AS revenue ON revenue.sessionid = usc.sessionid
ORDER BY 2 DESC
LIMIT 10;
과제3. raw_data.nps 테이블을 바탕으로 월별 NPS 계산
해설1.
SELECT month,
ROUND((promoters-detractors)::float/total_count*100, 2) AS overall_nps
FROM (
SELECT LEFT(created, 7) AS month,
COUNT(CASE WHEN score >= 9 THEN 1 END) AS promoters,
COUNT(CASE WHEN score <= 6 THEN 1 END) AS detractors,
COUNT(CASE WHEN score > 6 AND score < 9 THEN 1 END) As passives,
COUNT(1) AS total_count
FROM raw_data.nps
GROUP BY 1
ORDER BY 1
);
해설2.
SELECT LEFT(created, 7) AS month,
ROUND(SUM(CASE
WHEN score >= 9 THEN 1
WHEN score <= 6 THEN -1 END)::float*100/COUNT(1), 2)
FROM raw_data.nps
GROUP BY 1
ORDER BY 1;