SQL#CHALLENGE3-1

codataffee·2024년 5월 29일
0

SQL

목록 보기
13/19
post-thumbnail

📌 퍼널 분석 과제

  • 환경 세팅
  1. MODE 접속 - 로그인 - Write SQL 클릭 - tutorial.yammer_events 검색 후 쿼리 조회

MODE 사이트의 SQL은 PostgreSQL을 사용..
날짜나 시간을 가지고 쿼리를 작성해야 하기 때문에 처음 사용하는 함수가 많았다..
날짜, 시간 함수 관련 참고 사이트


📌 과제 1

유저의 계정 생성 후 회원가입 완료 전환율 (location, device별)

목표
사용자가 계정을 생성한 후 24시간 이내
회원가입을 완료하는 전환율을 사용자 위치와 기기별로 계산.

결과
locationdevice를 기준으로 전환율을 백분율로 산출.


과정

1. 테이블 살펴보기

SELECT *
FROM tutorial.yammer_events

2. 사용할 데이터 찾기

SELECT DISTINCT
	   event_name
FROM tutorial.yammer_events

3. 유저 기준 create_user , complete_signup 최초 시간을 조회하는 CTE 생성하기

WITH create_complete AS (
    SELECT user_id
         , MIN(CASE WHEN event_name = 'create_user' THEN occurred_at END) create_user_time
         , MIN(CASE WHEN event_name = 'complete_signup' THEN occurred_at END) complete_signup_time
    FROM tutorial.yammer_events
    WHERE event_name IN ('create_user', 'complete_signup')
    GROUP BY user_id
)

SELECT *
FROM create_complete

4. create_user ~ complete_signup : 24시간 이내인지 조회하는 CTE 생성하기

WITH create_complete AS (
    SELECT user_id
         , MIN(CASE WHEN event_name = 'create_user' THEN occurred_at END) create_user_time
         , MIN(CASE WHEN event_name = 'complete_signup' THEN occurred_at END) complete_signup_time
    FROM tutorial.yammer_events
    WHERE event_name IN ('create_user', 'complete_signup')
    GROUP BY user_id
),
completed_time AS (
    SELECT user_id
         , create_user_time
         , complete_signup_time
         , CASE 
            WHEN complete_signup_time <= create_user_time + INTERVAL '1 day'
            THEN 1
            ELSE 0 
            END completed_within_24h
    FROM create_complete
)

SELECT *
FROM completed_time

5. 기존 이벤트 테이블과 completed_time 테이블을 조인하여
create_user 이벤트를 가지고 있는 유저들의 위치별, 기기별 백분율 구하기

SELECT e.location
     , e.device
     , ROUND(AVG(c.completed_within_24h) * 100, 2) conversion_rate
FROM tutorial.yammer_events e JOIN completed_time c
     ON e.user_id = c.user_id
WHERE e.event_name = 'create_user'
GROUP BY e.location, e.device 
ORDER BY 1, 2

결과

전체 쿼리

WITH create_complete AS (
    SELECT user_id
         , MIN(CASE WHEN event_name = 'create_user' THEN occurred_at END) create_user_time
         , MIN(CASE WHEN event_name = 'complete_signup' THEN occurred_at END) complete_signup_time
    FROM tutorial.yammer_events
    WHERE event_name IN ('create_user', 'complete_signup')
    GROUP BY user_id
),
completed_time AS (
    SELECT user_id
         , create_user_time
         , complete_signup_time
         , CASE 
            WHEN complete_signup_time <= create_user_time + INTERVAL '1 day'
            THEN 1
            ELSE 0 
            END completed_within_24h
    FROM create_complete
)

SELECT e.location
     , e.device
     , ROUND(AVG(c.completed_within_24h) * 100, 2) conversion_rate
FROM tutorial.yammer_events e JOIN completed_time c
     ON e.user_id = c.user_id
WHERE e.event_name = 'create_user'
GROUP BY e.location, e.device 
ORDER BY 1, 2

📌 과제 2

회원가입 전환율 단계별 분석

목표
유저의 계정 생성부터 회원가입 완료까지의
각 단계별 전환율을 24시간 이내에 완료된 경우로 계산.

  • 계정 생성 후 이메일 입력 화면 진입률.
  • 이메일 입력 화면 진입 후 개인 정보 입력 화면 진입률.
  • 개인 정보 입력 화면 진입 후 회원가입 완료 전환율.
  • 계정 생성 후 회원가입 완료 전환율.

결과
각 단계별 전환율을 백분율로 산출.


과정

1. 테이블 살펴보기

SELECT *
FROM tutorial.yammer_events

2. 사용할 데이터 찾기

SELECT DISTINCT
	   event_name
FROM tutorial.yammer_events

3. 각 행동별 최초 시간을 조회하는 CTE 생성하기

WITH user_actions AS (
    SELECT user_id
    	 , MIN(CASE WHEN event_name = 'create_user' THEN occurred_at END) create_user_time
         , MIN(CASE WHEN event_name = 'enter_email' THEN occurred_at END) enter_email_time
         , MIN(CASE WHEN event_name = 'enter_info' THEN occurred_at END) enter_info_time
         , MIN(CASE WHEN event_name = 'complete_signup' THEN occurred_at END) complete_signup_time
    FROM tutorial.yammer_events
    WHERE event_name IN ('create_user', 'enter_email', 'enter_info', 'complete_signup')
    GROUP BY user_id
)

SELECT *
FROM user_actions

4. 각 단계별 시간 간격이 24시간 이내인지 조회하는 CTE 생성하기

WITH user_actions AS (
    SELECT user_id
    	 , MIN(CASE WHEN event_name = 'create_user' THEN occurred_at END) create_user_time
         , MIN(CASE WHEN event_name = 'enter_email' THEN occurred_at END) enter_email_time
         , MIN(CASE WHEN event_name = 'enter_info' THEN occurred_at END) enter_info_time
         , MIN(CASE WHEN event_name = 'complete_signup' THEN occurred_at END) complete_signup_time
    FROM tutorial.yammer_events
    WHERE event_name IN ('create_user', 'enter_email', 'enter_info', 'complete_signup')
    GROUP BY user_id
),
completion_status AS (
    SELECT user_id
    	 , create_user_time
         , enter_email_time
         , enter_info_time
         , complete_signup_time
         , CASE 
            WHEN enter_email_time <= create_user_time + INTERVAL '1 day'
            THEN 1
            ELSE 0
            END enter_email_24h
         , CASE 
            WHEN enter_info_time <= enter_email_time + INTERVAL '1 day'
            THEN 1
            ELSE 0
            END enter_info_24h
         , CASE 
            WHEN complete_signup_time <= enter_info_time + INTERVAL '1 day'
            THEN 1
            ELSE 0
            END complete_signup_24h
         , CASE 
            WHEN complete_signup_time <= create_user_time + INTERVAL '1 day'
            THEN 1
            ELSE 0
            END signup_complete_24h
    FROM user_actions
)

SELECT *
FROM completion_status

5. completion_status 테이블에서 각 단계별 전환 백분율 구하기

SELECT ROUND(AVG(enter_email_24h) * 100, 2) enter_email_rate
	 , ROUND(AVG(enter_info_24h) * 100, 2) enter_info_rate
     , ROUND(AVG(complete_signup_24h) * 100, 2) complete_signup_rate
     , ROUND(AVG(signup_complete_24h) * 100, 2) total_conversion_rate
FROM completion_status

결과

전체 쿼리

WITH user_actions AS (
    SELECT user_id
    	 , MIN(CASE WHEN event_name = 'create_user' THEN occurred_at END) create_user_time
         , MIN(CASE WHEN event_name = 'enter_email' THEN occurred_at END) enter_email_time
         , MIN(CASE WHEN event_name = 'enter_info' THEN occurred_at END) enter_info_time
         , MIN(CASE WHEN event_name = 'complete_signup' THEN occurred_at END) complete_signup_time
    FROM tutorial.yammer_events
    WHERE event_name IN ('create_user', 'enter_email', 'enter_info', 'complete_signup')
    GROUP BY user_id
),
completion_status AS (
    SELECT user_id
    	 , create_user_time
         , enter_email_time
         , enter_info_time
         , complete_signup_time
         , CASE 
            WHEN enter_email_time <= create_user_time + INTERVAL '1 day'
            THEN 1
            ELSE 0
            END enter_email_24h
         , CASE 
            WHEN enter_info_time <= enter_email_time + INTERVAL '1 day'
            THEN 1
            ELSE 0
            END enter_info_24h
         , CASE 
            WHEN complete_signup_time <= enter_info_time + INTERVAL '1 day'
            THEN 1
            ELSE 0
            END complete_signup_24h
         , CASE 
            WHEN complete_signup_time <= create_user_time + INTERVAL '1 day'
            THEN 1
            ELSE 0
            END signup_complete_24h
    FROM user_actions
)

SELECT ROUND(AVG(enter_email_24h) * 100, 2) enter_email_rate
	 , ROUND(AVG(enter_info_24h) * 100, 2) enter_info_rate
     , ROUND(AVG(complete_signup_24h) * 100, 2) complete_signup_rate
     , ROUND(AVG(signup_complete_24h) * 100, 2) total_conversion_rate
FROM completion_status

📌 과제 3

회원가입 전환율 및 이후 행동 전환율 분석

목표
유저가 계정을 생성한 후 30분 이내에 각 단계별 전환율을 계산.

  • 계정 생성 후 회원가입 완료 전환율.
  • 회원가입 완료 후 홈페이지 진입률.
  • 홈페이지 진입 후 메시지 좋아요 클릭률.
  • 회원가입 후 메시지 좋아요 클릭 전환율.

결과
각 단계별 전환율을 백분율로 산출.


과정

1. 테이블 살펴보기

SELECT *
FROM tutorial.yammer_events

2. 사용할 데이터 찾기

SELECT DISTINCT
       event_name
FROM tutorial.yammer_events
WHERE event_name IN ('create_user', 'complete_signup', 'home_page', 'like_message')

3. 각 행동별 최초 시간을 조회하는 CTE 생성하기

WITH user_actions AS (
    SELECT user_id
         , MIN(CASE WHEN event_name = 'create_user' THEN occurred_at END) create_user_time
         , MIN(CASE WHEN event_name = 'complete_signup' THEN occurred_at END) complete_signup_time
         , MIN(CASE WHEN event_name = 'home_page' THEN occurred_at END) home_page_time
         , MIN(CASE WHEN event_name = 'like_message' THEN occurred_at END) like_message_time
    FROM tutorial.yammer_events
    WHERE event_name IN ('create_user', 'complete_signup', 'home_page', 'like_message')
    GROUP BY user_id
)

SELECT *
FROM user_actions
ORDER BY user_actions.create_user_time

4. 각 행동 단계별 전환이 30분 이내인 값 조회하는 CTE 생성하기

WITH user_actions AS (
    SELECT user_id
         , MIN(CASE WHEN event_name = 'create_user' THEN occurred_at END) create_user_time
         , MIN(CASE WHEN event_name = 'complete_signup' THEN occurred_at END) complete_signup_time
         , MIN(CASE WHEN event_name = 'home_page' THEN occurred_at END) home_page_time
         , MIN(CASE WHEN event_name = 'like_message' THEN occurred_at END) like_message_time
    FROM tutorial.yammer_events
    WHERE event_name IN ('create_user', 'complete_signup', 'home_page', 'like_message')
    GROUP BY user_id
),
completion_status AS (
    SELECT user_id
      	 , create_user_time
         , complete_signup_time
         , home_page_time
         , like_message_time
         , CASE 
            WHEN complete_signup_time <= create_user_time + INTERVAL '30 minutes'
            THEN 1
            ELSE 0
            END complete_signup_30m
         , CASE 
            WHEN home_page_time <= complete_signup_time + INTERVAL '30 minutes'
            THEN 1
            ELSE 0
            END home_page_30m
         , CASE 
            WHEN like_message_time <= home_page_time + INTERVAL '30 minutes'
            THEN 1
            ELSE 0
            END like_message_30m
         , CASE 
            WHEN like_message_time <= complete_signup_time + INTERVAL '30 minutes'
            THEN 1
            ELSE 0
            END signup_like_message_30m
    FROM user_actions
)

SELECT *
FROM completion_status
ORDER BY completion_status.create_user_time

5. completion_status 테이블에서 각 단계별 전환 백분율 구하기

SELECT ROUND(AVG(complete_signup_30m) * 100, 2) complete_signup_rate
  	 , ROUND(AVG(home_page_30m) * 100, 2) home_page_rate
     , ROUND(AVG(like_message_30m) * 100, 2) like_message_rate
     , ROUND(AVG(signup_like_message_30m) * 100, 2) signup_like_message_rate
FROM completion_status

결과

전체 쿼리

WITH user_actions AS (
    SELECT user_id
         , MIN(CASE WHEN event_name = 'create_user' THEN occurred_at END) create_user_time
         , MIN(CASE WHEN event_name = 'complete_signup' THEN occurred_at END) complete_signup_time
         , MIN(CASE WHEN event_name = 'home_page' THEN occurred_at END) home_page_time
         , MIN(CASE WHEN event_name = 'like_message' THEN occurred_at END) like_message_time
    FROM tutorial.yammer_events
    WHERE event_name IN ('create_user', 'complete_signup', 'home_page', 'like_message')
    GROUP BY user_id
),
completion_status AS (
    SELECT user_id
      	 , create_user_time
         , complete_signup_time
         , home_page_time
         , like_message_time
         , CASE 
            WHEN complete_signup_time <= create_user_time + INTERVAL '30 minutes'
            THEN 1
            ELSE 0
            END complete_signup_30m
         , CASE 
            WHEN home_page_time <= complete_signup_time + INTERVAL '30 minutes'
            THEN 1
            ELSE 0
            END home_page_30m
         , CASE 
            WHEN like_message_time <= home_page_time + INTERVAL '30 minutes'
            THEN 1
            ELSE 0
            END like_message_30m
         , CASE 
            WHEN like_message_time <= complete_signup_time + INTERVAL '30 minutes'
            THEN 1
            ELSE 0
            END signup_like_message_30m
    FROM user_actions
)

SELECT ROUND(AVG(complete_signup_30m) * 100, 2) complete_signup_rate
  	 , ROUND(AVG(home_page_30m) * 100, 2) home_page_rate
     , ROUND(AVG(like_message_30m) * 100, 2) like_message_rate
     , ROUND(AVG(signup_like_message_30m) * 100, 2) signup_like_message_rate
FROM completion_status

📌 [심화] 과제 4

일별로 회원 가입 프로세스의 각 단계 완료 평균 시간

목표
일별 회원 가입 프로세스의 각 단계 완료 평균 시간을 계산.

  • 계정 생성 후 이메일 입력 화면 진입까지.
  • 이메일 입력 화면 진입 후 개인 정보 입력 화면 진입까지.
  • 개인 정보 입력 화면 진입 후 회원가입 완료까지.
  • 계정 생성 후 회원가입 완료까지.

결과
각 단계별 평균 시간을 분 단위로 산출.


과정

1. 테이블 살펴보기

SELECT *
FROM tutorial.yammer_events

2. 사용할 데이터 찾기

SELECT occurred_at
     , event_name
FROM tutorial.yammer_events
WHERE event_name IN ('create_user', 'enter_email', 'enter_info', 'complete_signup')

3. 각 행동별 최초 시간과 해당 일자를 조회하는 CTE 생성하기

WITH user_actions AS (
    SELECT user_id
         , DATE_TRUNC('day', MIN(CASE WHEN event_name = 'create_user' THEN occurred_at END)) event_day
         , MIN(CASE WHEN event_name = 'create_user' THEN occurred_at END) create_user_time
         , MIN(CASE WHEN event_name = 'enter_email' THEN occurred_at END) enter_email_time
         , MIN(CASE WHEN event_name = 'enter_info' THEN occurred_at END) enter_info_time
         , MIN(CASE WHEN event_name = 'complete_signup' THEN occurred_at END) complete_signup_time
    FROM tutorial.yammer_events
    WHERE event_name IN ('create_user', 'enter_email', 'enter_info', 'complete_signup')
    GROUP BY user_id
)

SELECT *
FROM user_actions

4. 해당 일자와 각 행동 전환에 걸리는 시간(분으로 변환)을 조회하는 CTE 생성하기

WITH user_actions AS (
    SELECT user_id
         , DATE_TRUNC('day', MIN(CASE WHEN event_name = 'create_user' THEN occurred_at END)) event_day
         , MIN(CASE WHEN event_name = 'create_user' THEN occurred_at END) create_user_time
         , MIN(CASE WHEN event_name = 'enter_email' THEN occurred_at END) enter_email_time
         , MIN(CASE WHEN event_name = 'enter_info' THEN occurred_at END) enter_info_time
         , MIN(CASE WHEN event_name = 'complete_signup' THEN occurred_at END) complete_signup_time
    FROM tutorial.yammer_events
    WHERE event_name IN ('create_user', 'enter_email', 'enter_info', 'complete_signup')
    GROUP BY user_id
),
completion_time AS (
    SELECT event_day
         , (DATE_PART('day', enter_email_time - create_user_time) * 24 * 60 +
            DATE_PART('hour', enter_email_time - create_user_time) * 60 +
            DATE_PART('minute', enter_email_time - create_user_time) +
            DATE_PART('second', enter_email_time - create_user_time) / 60) create_enter_minutes
         , (DATE_PART('day', enter_info_time - enter_email_time) * 24 * 60 +
            DATE_PART('hour', enter_info_time - enter_email_time) * 60 +
            DATE_PART('minute', enter_info_time - enter_email_time) +
            DATE_PART('second', enter_info_time - enter_email_time) / 60) email_info_minutes
         , (DATE_PART('day', complete_signup_time - enter_info_time) * 24 * 60 +
            DATE_PART('hour', complete_signup_time - enter_info_time) * 60 +
            DATE_PART('minute', complete_signup_time - enter_info_time) +
            DATE_PART('second', complete_signup_time - enter_info_time) / 60) info_signup_minutes
         , (DATE_PART('day', complete_signup_time - create_user_time) * 24 * 60 +
            DATE_PART('hour', complete_signup_time - create_user_time) * 60 +
            DATE_PART('minute', complete_signup_time - create_user_time) +
            DATE_PART('second', complete_signup_time - create_user_time) / 60) create_signup_minutes
    FROM user_actions
)

SELECT *
FROM completion_time

5. 일자별로 그룹화 후 각 시간들의 평균 조회

::NUMERIC : 각 시간의 차이를 분으로 만들었던 것을

SELECT event_day
     , ROUND(AVG(create_enter_minutes)::NUMERIC, 2) avg_create_enter_minutes
     , ROUND(AVG(email_info_minutes)::NUMERIC, 2) avg_email_info_minutes
     , ROUND(AVG(info_signup_minutes)::NUMERIC, 2) avg_info_signup_minutes
     , ROUND(AVG(create_signup_minutes)::NUMERIC, 2) avg_create_signup_minutes
FROM completion_time
GROUP BY 1
ORDER BY 1

결과 : 어찌어찌 구하긴 한 것 같은데.. 이게 맞나?

전체 쿼리

WITH user_actions AS (
    SELECT user_id
         , DATE_TRUNC('day', MIN(CASE WHEN event_name = 'create_user' THEN occurred_at END)) event_day
         , MIN(CASE WHEN event_name = 'create_user' THEN occurred_at END) create_user_time
         , MIN(CASE WHEN event_name = 'enter_email' THEN occurred_at END) enter_email_time
         , MIN(CASE WHEN event_name = 'enter_info' THEN occurred_at END) enter_info_time
         , MIN(CASE WHEN event_name = 'complete_signup' THEN occurred_at END) complete_signup_time
    FROM tutorial.yammer_events
    WHERE event_name IN ('create_user', 'enter_email', 'enter_info', 'complete_signup')
    GROUP BY user_id
),
completion_time AS (
    SELECT event_day
         , (DATE_PART('day', enter_email_time - create_user_time) * 24 * 60 +
            DATE_PART('hour', enter_email_time - create_user_time) * 60 +
            DATE_PART('minute', enter_email_time - create_user_time) +
            DATE_PART('second', enter_email_time - create_user_time) / 60) create_enter_minutes
         , (DATE_PART('day', enter_info_time - enter_email_time) * 24 * 60 +
            DATE_PART('hour', enter_info_time - enter_email_time) * 60 +
            DATE_PART('minute', enter_info_time - enter_email_time) +
            DATE_PART('second', enter_info_time - enter_email_time) / 60) email_info_minutes
         , (DATE_PART('day', complete_signup_time - enter_info_time) * 24 * 60 +
            DATE_PART('hour', complete_signup_time - enter_info_time) * 60 +
            DATE_PART('minute', complete_signup_time - enter_info_time) +
            DATE_PART('second', complete_signup_time - enter_info_time) / 60) info_signup_minutes
         , (DATE_PART('day', complete_signup_time - create_user_time) * 24 * 60 +
            DATE_PART('hour', complete_signup_time - create_user_time) * 60 +
            DATE_PART('minute', complete_signup_time - create_user_time) +
            DATE_PART('second', complete_signup_time - create_user_time) / 60) create_signup_minutes
    FROM user_actions
)

SELECT event_day
     , ROUND(AVG(create_enter_minutes)::NUMERIC, 2) avg_create_enter_minutes
     , ROUND(AVG(email_info_minutes)::NUMERIC, 2) avg_email_info_minutes
     , ROUND(AVG(info_signup_minutes)::NUMERIC, 2) avg_info_signup_minutes
     , ROUND(AVG(create_signup_minutes)::NUMERIC, 2) avg_create_signup_minutes
FROM completion_time
GROUP BY 1
ORDER BY 1

📌 [심화] 과제 5

퍼널 분석을 통한 인사이트 도출

목표
다양한 테이블을 조인하여 퍼널 분석을 수행하고,
실험 그룹 간의 전환율 차이를 비교 분석.

... 레벨업 좀 더 하고 시도해볼 예정

profile
커피 좋아하는 데이터 꿈나무

0개의 댓글

관련 채용 정보