%%sql
WITH first AS (
SELECT userid, ts, channel, ROW_NUMBER() OVER(PARTITION BY userid ORDER BY ts) seq
FROM raw_data.user_session_channel usc
JOIN raw_data.session_timestamp st ON usc.sessionid = st.sessionid
), last AS (
SELECT userid, ts, channel, ROW_NUMBER() OVER(PARTITION BY userid ORDER BY ts DESC) seq
FROM raw_data.user_session_channel usc
JOIN raw_data.session_timestamp st ON usc.sessionid = st.sessionid
)
SELECT first.userid AS userid, first.channel AS first_channel, last.channel AS last_channel
FROM first
JOIN last ON first.userid = last.userid and last.seq = 1
WHERE first.seq = 1;
%%sql
SELECT first.userid AS userid, first.channel AS first_channel, last.channel AS last_channel
FROM (
SELECT userid, ts, channel, ROW_NUMBER() OVER(PARTITION BY userid ORDER BY ts) seq
FROM raw_data.user_session_channel usc
JOIN raw_data.session_timestamp st ON usc.sessionid = st.sessionid
) first
JOIN (
SELECT userid, ts, channel, ROW_NUMBER() OVER(PARTITION BY userid ORDER BY ts DESC) seq
FROM raw_data.user_session_channel usc
JOIN raw_data.session_timestamp st ON usc.sessionid = st.sessionid
) last ON first.userid = last.userid and last.seq = 1
WHERE first.seq = 1;
%%sql
SELECT userid,
MAX(CASE WHEN rn1 = 1 THEN channel END) first_touch,
MAX(CASE WHEN rn2 = 1 THEN channel END) last_touch
FROM (
SELECT userid,
channel,
(ROW_NUMBER() OVER (PARTITION BY usc.userid ORDER BY st.ts asc)) AS rn1,
(ROW_NUMBER() OVER (PARTITION BY usc.userid ORDER BY st.ts desc)) AS rn2
FROM raw_data.user_session_channel usc
JOIN raw_data.session_timestamp st ON usc.sessionid = st.sessionid
)
GROUP BY 1;
group by 를 이용하기 때문에 aggregate함수를 무조건 써야함.
그러므로 max를 이용함. min도 사용 가능
%%sql
SELECT DISTINCT
A.userid,
FIRST_VALUE(A.channel) over(partition by A.userid order by B.ts
rows between unbounded preceding and unbounded following) AS First_Channel,
LAST_VALUE(A.channel) over(partition by A.userid order by B.ts
rows between unbounded preceding and unbounded following) AS Last_Channel
FROM raw_data.user_session_channel A
LEFT JOIN raw_data.session_timestamp B ON A.sessionid = B.sessionid
ORDER BY 1;
%%sql
SELECT
userID,
SUM(amount)
FROM raw_data.session_transaction st
LEFT JOIN raw_data.user_session_channel usc ON st.sessionid = usc.sessionid
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;
%%sql
SELECT DISTINCT
usc.userid,
SUM(amount) OVER(PARTITION BY usc.userid)
FROM raw_data.user_session_channel AS usc
JOIN raw_data.session_transaction AS revenue ON revenue.sessionid = usc.sessionid
ORDER BY 2 DESC
LIMIT 10;
%%sql
SELECT month,
ROUND((promoters-detractors)::float/total_count*100, 2) AS overall_nps
FROM (
SELECT LEFT(created, 7) AS month,
COUNT(CASE WHEN score >= 9 THEN 1 END) AS promoters,
COUNT(CASE WHEN score <= 6 THEN 1 END) AS detractors,
COUNT(CASE WHEN score > 6 AND score < 9 THEN 1 END) As passives,
COUNT(1) AS total_count
FROM raw_data.nps
GROUP BY 1
ORDER BY 1
);
%%sql
SELECT LEFT(created, 7) AS month,
ROUND(SUM(CASE
WHEN score >= 9 THEN 1
WHEN score <= 6 THEN -1 END)::float*100/COUNT(1), 2)
FROM raw_data.nps
GROUP BY 1
ORDER BY 1;
은행 계좌의 예가 좋은 예시가 될 수 있음
auto commit = True
모든 레코드 수정/삭제/추가 작업이 기본적으로 바로 데이터베이스에 쓰여짐
만약 특정 작업을 트랜잭션으로 묶고 싶다면 Begin, end(commit)/ rollback으로 처리
auto commit = False
모든 레코드 작업이 commit 호출 될 때까지 커밋되지 않음
Google Colab의 트랜잭션
기본적으로 모든 SQL statement가 바로 커밋됨
이름을 바꾸고 싶다면 BEGIN; END; 혹은 Begin; COMMIT을 사용 혹은 rollback
psycopg2 트랜잭션
autocommit 이라는 파라미터로 조절 가능
autocommit = True 가 되면 기본적으로 PostgreSQL의 커밋 모드와 동일
autocommit = False가 되면 커넥션 객체의 commit()과 .rollback()함수로 트랜잭션 조절 가능 무엇을 사용할지는 개인 취향
delete from table name
테이블에서 모든 레코드를 삭제 -> 롤백으로 돌릴 수 있음
truncate table name도 테이블에서 모든 레코드 삭제 - 속도가 빠름 but where과 transaction을 지원하지 않음. rollback이 되지 않는다.
LISTAGG - aggreagate 함수, 사용자를 ID별로 채널을 순서대로 리스트
Json parsing functions