SQL์์ JOIN์ด๋ ๋ ๊ฐ ๋๋ ๊ทธ ์ด์์ ํ
์ด๋ธ๋ค ํตํฉํ๋ ๊ฒ์ ๋งํ๋ค.
JOIN์ 6๊ฐ์ง๊ฐ ์กด์ฌํ๋๋ฐ ๊ฐ๊ฐ INNER, FULL, CROSS, LEFT, RIGHT, SELF ๊ฐ ์๋ค.
๐ JOIN
SELECT A.*, B.*
FROM raw_data.table1 A
JOIN raw_data.table2 B ON A.key = B.key;
๐ ์ฃผ์
SELECT *
FROM raw_data.User A
JOIN raw_data.product B ON A.userid = B.userid;
| A.userid | A.productid | A.weight | B.userid | B.type | B.date |
|---|---|---|---|---|---|
| 1001 | 1010 | 90 | 1001 | stick | 2024-04-25 |
SELECT *
FROM raw_data.User A
LEFT JOIN raw_data.product B ON A.userid = B.userid;
| A.userid | A.productid | A.weight | B.userid | B.type | B.date |
|---|---|---|---|---|---|
| 1001 | 1010 | 90 | NULL | NULL | NULL |
| 1002 | 2010 | 70 | NULL | NULL | NULL |
| 1003 | 1020 | 12 | 1003 | case | 2024-04-25 |
| 1004 | 3010 | 40 | NULL | NULL | NULL |
SELECT *
FROM raw_data.User A
FULL JOIN raw_data.product B ON A.userid = B.userid;
| A.userid | A.productid | A.weight | B.userid | B.type | B.date |
|---|---|---|---|---|---|
| 1001 | 1010 | 90 | NULL | NULL | NULL |
| 1002 | 2010 | 70 | NULL | pad | NULL |
| 1003 | 1020 | 12 | 1003 | case | 2024-04-25 |
| NULL | NULL | NULL | NULL | candy | 2024-04-22 |
SELECT *
FROM raw_data.User A
CROSS JOIN raw_data.product B ON A.userid = B.userid;
| A.userid | A.productid | A.weight | B.userid | B.type | B.date |
|---|---|---|---|---|---|
| 1001 | 1010 | 90 | 1001 | case | 2024-04-24 |
| 1001 | 2010 | 70 | 1001 | pad | 2024-05-22 |
| 1002 | 1020 | 12 | 1002 | case | 2024-04-25 |
| 1003 | 1020 | 12 | 1003 | case | 2024-04-22 |
SELECT *
FROM raw_data.User A1
JOIN raw_data.User A2 ON A1.userid = A2.userid;
| A1.userid | A1.productid | A1.weight | A2.userid | A2.productid | A2.weight |
|---|---|---|---|---|---|
| 1001 | 1010 | 90 | 1001 | 1010 | 90 |
SELECT
value,
COALESCE(value, 0) -> value๊ฐ NULL์ด๋ฉด 0์ ๋ฆฌํด
FROM raw_data.count_test;
%%sql
SELECT
A.userid AS userid,
A.channel AS First,
B.channel AS LAST
FROM (
SELECT
userid,
ts,
channel,
ROW_NUMBER() OVER(PARTITION BY userid ORDER BY ts) seq -- ๋ด๋ฆผ์ฐจ์์ผ๋ก ์ ๋ ฌ๋ ์ํ
FROM raw_data.user_session_channel usc
JOIN raw_data.session_timestamp st ON usc.sessionid = st.sessionid
) A
JOIN (
SELECT
userid,
ts,
channel,
ROW_NUMBER() OVER(PARTITION BY userid ORDER BY ts DESC) seq -- ์ค๋ฆ์ฐจ์์ผ๋ก ์ ๋ ฌ๋ ์ํ
FROM raw_data.user_session_channel usc
JOIN raw_data.session_timestamp st ON usc.sessionid = st.sessionid
) B ON A.userid = B.userid and B.seq = 1
WHERE A.seq = 1;
OUTPUT -> 
user_session_channel, session_transaction, session_timestamp 3๊ฐ ์ฌ์ฉํ๋ผ๊ณ ํ์ จ๋๋ฐ ๋ด ์๊ฐ์๋ timestamp๋ ํ์ ์์ด๋ณด์ Userid๋ ์๊ณ amount๋ ์๋ค.
๐ user_session_channel
%%sql
SELECT *
FROM raw_data.user_session_channel
LIMIT 2

๐ session_transaction
%%sql
SELECT *
FROM raw_data.session_transaction
LIMIT 2

๐ session_transaction
%%sql
SELECT *
FROM raw_data.session_timestamp
LIMIT 2

%%sql
SELECT
userid,
SUM(amount) AS Gross_Revenue
FROM raw_data.user_session_channel usc
JOIN raw_data.session_transaction st ON usc.sessionid = st.sessionid
GROUP BY userid
ORDER BY Gross_Revenue DESC -- ์์ 10๊ฐ๋๊น ์ด ์์ต ๊ธฐ์ค์ผ๋ก ๋ด๋ฆผ์ฐจ์
LIMIT 10
OUTPUT ->

๐ nps
%%sql
SELECT *
FROM raw_data.nps
LIMIT 10

score๋ฅผ ์ด์ฉํ๋ฉด ๋ ๊ฒ ๊ฐ๋ค.
%%sql
SELECT
LEFT(created_at, 7) AS Month,
ROUND(SUM(
CASE
WHEN score >= 9 THEN 1
WHEN score <= 6 THEN -1
END)::float * 100 / COUNT(1), 2
) AS NPS
FROM raw_data.nps
GROUP BY Month
ORDER BY Month
OUTPUT ->
