
ํ ์ด๋ธ์ ๋ ์ฝ๋๋ฅผ ๊ทธ๋ฃนํํ์ฌ ๊ทธ๋ฃน๋ณ๋ก ๋ค์ํ ์ ๋ณด๋ฅผ ๊ณ์ฐํ๋ค.
๋ ๋จ๊ณ๋ก ์ด๋ฃจ์ด์ง
1.๋จผ์ ๊ทธ๋ฃนํ์ ํ ํ๋๋ฅผ ์ง์ (ํ๋ ์ด์์ ํ๋๊ฐ ๋ ์ ์์)
2.๋ค์ ๊ทธ๋ฃน๋ณ๋ก ๊ณ์ฐํ ๋ด์ฉ์ ์ง์
EX1) ์๋ณ ์ธ์
์๋ฅผ ๊ณ์ฐํ๋ SQL
-raw_data.session_timestamp๋ฅผ ์ฌ์ฉ(sessionId์ tsํ๋)
SELECT
LEFT(ts, 7) AS mon, -- timestampํ์์ด ๋ฌธ์์ด ํจ์์ ๋ค์ด๊ฐ ๋ฌธ์์ด๋ก ์ถ์ถ๋จ
COUNT(1) AS session_count
FROM raw_data.session_timestamp
GROUP BY 1 -- GROUP BY mon, GROUP BY LEFT(ts, 7)
ORDER BY 1; -- mon ํ๋๋ฅผ ๊ธฐ์ค์ผ๋ก ์ค๋ฆ์ฐจ์ ์ ๋ ฌ
EX2) ๊ฐ์ฅ ๋ง์ด ์ฌ์ฉ๋ ์ฑ๋์ ๋ฌด์์ธ๊ฐ?
SELECT
channel,
COUNT(1) AS session_count, -- ์ฑ๋๋ณ ์ธ์
์
COUNT(DISTINCT userId) AS user_count -- ์ฑ๋๋ณ ์ ์ผํ ์ฌ์ฉ์ ์
FROM raw_data.user_session_channel
GROUP BY 1 -- GROUP BY channel
ORDER BY 2 DESC; -- ORDER BY session_count DESC
EX3) ๊ฐ์ฅ ๋ง์ ์ธ์ ์ ๋ง๋ค์ด๋ธ ์ฌ์ฉ์ ID๋ ๋ฌด์์ธ๊ฐ?
SELECT
userId,
COUNT(1) AS count
FROM raw_data.user_session_channel
GROUP BY 1 -- GROUP BY userId
ORDER BY 2 DESC -- ORDER BY count DESC
LIMIT 1; -- ๊ฐ์ฅ ๋ง์ ์ธ์
์ ๋ง๋ ์ฌ์ฉ์
EX4) ์๋ณ ์ ๋ํฌํ ์ฌ์ฉ์ ์(MAU)
SETECT
TO_CHAR(A.ts, 'YYYY_MM') AS month,
COUNT(DISTINCT B.userid) AS mau
FROM raw_data.session_timestamp A
JOIN raw_data.user_session_channel B ON A.sessionid = B.sessionid -- INNER JOIN(์์ชฝํ
์ด๋ธ ์ค์ ๋ค ์กฐ๊ฑด์ ๋ง๋ ๋ ์ฝ๋๋ง ๋จ์)
GROUP BY 1
GROUP BY 1 DESC;
*TO_CHAR: timestamp ํ์
์ ๋ฌธ์์ด๋ก ๋ณํ
LEFT(A.ts, 7)
DATE_TRUNC('month', A.ts): ์ด๊ฒ๋ง timestamp ํ์
์ผ๋ก ๋ฐํ, ํด๋น ์ 1์ผ๋ก ๋ฐ๊ฟ
SUBSTRING(A.ts, 1, 7)
EX5) ์๋ณ ์ฑ๋๋ณ ์ ๋ํฌํ ์ฌ์ฉ์ ์
SETECT
TO_CHAR(A.ts, 'YYYY_MM') AS month,
channel
COUNT(DISTINCT B.userid) AS mau
FROM raw_data.session_timestamp A
JOIN raw_data.user_session_channel B ON A.sessionid = B.sessionid
GROUP BY 1, 2
GROUP BY 1 DESC, 2; -- ์๋ณ์ ๋ด๋ฆผ์ฐจ์ ์ฑ๋๋ณ์ ์ค๋ฆ์ฐจ์
CTAS : SELECT๋ฅผ ๊ฐ์ง๊ณ ๊ฐ๋จํ๊ฒ ์๋ก์ด ํ ์ด๋ธ์ ๋ง๋๋ ๋ฐฉ๋ฒ.
์์ฃผ ์กฐ์ธํ๋ ํ ์ด๋ธ๋ค์ด ์๋ค๋ฉด ์ด๋ฅผ CTAS๋ฅผ ์ฌ์ฉํด์ ์กฐ์ธํด๋๋ฉด ํธ๋ฆฌํด์ง๋ค.
DROP TABLE IF EXISTS adhoc.junhee_session_summary; -- ํ
์ด๋ธ์ด ์ด๋ฏธ ์กด์ฌํ๋ฉด DROP
CREATE TABLE adhoc.junhee_session_summary AS -- ์๋ก์ด ํ
์ด๋ธ ์์ฑ
SELECT B.*, A.ts FROM raw_data.session_timestamp A
JOIN raw_data.user_session_channel B ON A.sessionid = B.sessionid;
์๋ณ ์ ๋ํฌํ ์ฌ์ฉ์ ์๋ฅผ ๋ค์ ์์ฑ
SETECT
TO_CHAR(A.ts, 'YYYY_MM') AS month,
COUNT(DISTINCT userid) AS mau
FROM adhoc.junhee_session_summary
GROUP BY 1
GROUP BY 1 DESC
SELECT COUNT(1) FROM adhoc.junhee_session_summary; -- ๋ชจ๋ ๋ ์ฝ๋ ์ ์ฒดํฌ
SELECT COUNT(1)
FROM (
SELECT DISTINCT userId, sessionId, ts, channel -- ์ ์ผํ ๋ ์ฝ๋ ์ ์ฒดํฌ
FROM adhoc.junhee_session_summary
);
With ds AS ( -- ์ค๋ณต ์ ๊ฑฐ์ ๊ฒฐ๊ณผ๊ฐ ds๋ผ๋ ์์ ํ
์ด๋ธ๋ก ๋ง๋ค์ด์ง
SELECT DISTINCT userId, sessionId, ts, channel
FROM adhoc.junhee_session_summary
)
SELECT COUNT(1)
FROM ds;
SELECT MIN(ts), MAX(ts)
FROM adhoc.junhee_session_summary;
4.Primary key uniqueness๊ฐ ์ง์ผ์ง๋์ง ์ฒดํฌํ๊ธฐ
์ฌ๊ธฐ์๋ sessionId๊ฐ Primary key
SELECT sessionId, COUNT(1)
FROM adhoc.junhee_session_summary
GROUP BY 1
ORDER BY 2 DESC
LIMIT 1;
SELECT
COUNT(CASE WHEN sessionId is NULL THEN 1 END) sessionid_null_count, -- ๊ฐ์ด NULL์ด๋ฉด ์นด์ดํธ
COUNT(CASE WHEN userId is NULL THEN 1 END) userid_null_count,
COUNT(CASE WHEN ts is NULL THEN 1 END) ts_null_count,
COUNT(CASE WHEN channel is NULL THEN 1 END) channel_null_count
FROM adhoc.junhee_session_summary;
๊ฐ์ฌํฉ๋๋ค. ์ด๋ฐ ์ ๋ณด๋ฅผ ๋๋ ์ฃผ์ ์ ์ข์์.