
데이터 웨어하우스와 SQL 기본에 대해 배우고 이를 바탕으로 데이터 분석에 대해 학습 (3)
SELECT
LEFT(ts, 7) AS mon,
COUNT(1) AS session_count
FROM raw_data.session_timestamp
GROUP BY 1 -- GROUP BY mon, GROUP BY LEFT(ts, 7)
ORDER BY 1;
세션 기반 or 유저 기반 생각
SELECT
channel,
COUNT(1) AS session_count,
COUNT(DISTINCT userId) AS user_count
FROM raw_data.user_session_channel
GROUP BY 1 -- GROUP BY channel
ORDER BY 2 DESC; -- ORDER BY user_count DESC
SELECT
userId,
COUNT(1) AS count
FROM raw_data.user_session_channel
GROUP BY 1 -- GROUP BY userId
ORDER BY 2 DESC -- ORDER BY count DESC
LIMIT 1;
user_session_channel 테이블과 session_timestamp 모두 필요
-> 두 테이블 조인
SELECT
TO_CHAR(A.ts, 'YYYY-MM') AS month,
COUNT(DISTINCT B.userid) AS mau
FROM raw_data.session_timestamp A
JOIN raw_data.user_session_channel B ON A.sessionid = B.sessionid
GROUP BY 1
ORDER BY 1 DESC;
user_session_channel 테이블과 session_timestamp 모두 필요
-> 두 테이블 조인
SELECT
TO_CHAR(A.ts, 'YYYY-MM') AS month,
channel,
COUNT(DISTINCT B.userid) AS mau
FROM raw_data.session_timestamp A
JOIN raw_data.user_session_channel B ON A.sessionid = B.sessionid
GROUP BY 1, 2
ORDER BY 1 DESC, 2;
CREATE TABLE adhoc.trick_session_summary AS
SELECT B.*, A.ts FROM raw_data.session_timestamp A
JOIN raw_data.user_session_channel B ON A.sessionid = B.sessionid;
SELECT
TO_CHAR(ts, 'YYYY-MM') AS month,
COUNT(DISTINCT userid) AS mau
FROM adhoc.trick_session_summary
GROUP BY 1
ORDER BY 1 DESC;
SELECT COUNT(1)
FROM adhoc.trick_session_summary;
SELECT COUNT(1)
FROM (
SELECT DISTINCT userId, sessionId, ts, channel
FROM adhoc.trick_session_summary
);
With ds AS (
SELECT DISTINCT userId, sessionId, ts, channel
FROM adhoc.trick_session_summary
)
SELECT COUNT(1)
FROM ds;
SELECT MIN(ts), MAX(ts)
FROM adhoc.trick_session_summary;
SELECT sessionId, COUNT(1)
FROM adhoc.trick_session_summary
GROUP BY 1
ORDER BY 2 DESC
LIMIT 1;
SELECT
COUNT(CASE WHEN sessionId is NULL THEN 1 END) sessionid_null_count,
COUNT(CASE WHEN userId is NULL THEN 1 END) userid_null_count,
COUNT(CASE WHEN ts is NULL THEN 1 END) ts_null_count,
COUNT(CASE WHEN channel is NULL THEN 1 END) channel_null_count
FROM adhoc.trick_session_summary;
CASE WHEN 문에서 ELSE가 없어 NULL값을 리턴(COUNT는 NULL이 아닌 값만을 센다.)