[TIL]2025-02-06(목)

Arin lee·2025년 2월 6일

오늘 할 일

TIL

MVP모델과 개선 버전 비교하기

  • 버전 태그를 추가해야함.
    이미 자동화된 쿼리에 추가작업 진행.
    🔹 1️⃣ events_flat 테이블에 version_tag 컬럼 추가
ALTER TABLE `travel-project-448705.analytics_processed.events_flat`
ADD COLUMN version_tag STRING;

1️⃣ 기존 events_flat 데이터 업데이트 (UPDATE 사용)

UPDATE `travel-project-448705.analytics_processed.events_flat`
SET version_tag = 
  CASE 
    WHEN CAST(event_date AS INT64) BETWEEN 20250201 AND 20250202 THEN '버전 1'
    WHEN CAST(event_date AS INT64) BETWEEN 20250206 AND 20250208 THEN '버전 2'
    ELSE '기타'
  END
WHERE version_tag IS NULL;  -- 🔹 이미 값이 있는 경우 업데이트 방지

✅ 2️⃣ 자동화된 INSERT에서 version_tag 포함 유지

INSERT INTO `travel-project-448705.analytics_processed.events_flat`
(
  event_date,
  event_datetime_kst,
  event_name,
  user_pseudo_id,
  page_path,
  non_interaction,
  batch_page_id,
  ga_session_number,
  page_location,
  engaged_session_event,
  page_referrer,
  batch_ordering_id,
  event_category,
  ga_session_id,
  event_label,
  session_engaged,
  page_title,
  engagement_time_sec,
  device_category,
  browser,
  browser_version,
  city,
  country<,
  utm_campaign,
  utm_medium,
  utm_source,
  last_click_campaign_name,
  last_click_source,
  last_click_medium,
  version_tag  -- 🔹 추가된 컬럼
)
SELECT
  e.event_date,
  TIMESTAMP_MICROS(e.event_timestamp) + INTERVAL 9 HOUR AS event_datetime_kst,
  e.event_name,
  e.user_pseudo_id,
  ep_page_path.value.string_value AS page_path,
  ep_non_interaction.value.string_value AS non_interaction,
  ep_batch_page_id.value.int_value AS batch_page_id,
  ep_ga_session_number.value.int_value AS ga_session_number,
  ep_page_location.value.string_value AS page_location,
  ep_engaged_session_event.value.int_value AS engaged_session_event,
  ep_page_referrer.value.string_value AS page_referrer,
  ep_batch_ordering_id.value.int_value AS batch_ordering_id,
  ep_event_category.value.string_value AS event_category,
  SAFE_CAST(ep_ga_session_id.value.string_value AS INT64) AS ga_session_id,
  ep_event_label.value.string_value AS event_label,
  ep_session_engaged.value.int_value AS session_engaged,
  ep_page_title.value.string_value AS page_title,
  ep_engagement_time.value.int_value / 1000 AS engagement_time_sec,
  e.device.category AS device_category,
  e.device.web_info.browser AS browser,
  e.device.web_info.browser_version AS browser_version,
  e.geo.city AS city,
  e.geo.country AS country,
  e.traffic_source.name AS utm_campaign,
  e.traffic_source.medium AS utm_medium,
  e.traffic_source.source AS utm_source,
  e.session_traffic_source_last_click.manual_campaign.campaign_name AS last_click_campaign_name,
  e.session_traffic_source_last_click.manual_campaign.source AS last_click_source,
  e.session_traffic_source_last_click.manual_campaign.medium AS last_click_medium,

  -- 🔹 버전 태그 추가
  CASE 
    WHEN CAST(e.event_date AS INT64) BETWEEN 20250201 AND 20250202 THEN '버전 1'
    WHEN CAST(e.event_date AS INT64) BETWEEN 20250206 AND 20250208 THEN '버전 2'
    ELSE '기타'
  END AS version_tag

FROM `travel-project-448705.analytics_474540276.events_*` AS e  

LEFT JOIN UNNEST(e.event_params) AS ep_page_path ON ep_page_path.key = 'page_path'
LEFT JOIN UNNEST(e.event_params) AS ep_non_interaction ON ep_non_interaction.key = 'non_interaction'
LEFT JOIN UNNEST(e.event_params) AS ep_batch_page_id ON ep_batch_page_id.key = 'batch_page_id'
LEFT JOIN UNNEST(e.event_params) AS ep_ga_session_number ON ep_ga_session_number.key = 'ga_session_number'
LEFT JOIN UNNEST(e.event_params) AS ep_page_location ON ep_page_location.key = 'page_location'
LEFT JOIN UNNEST(e.event_params) AS ep_engaged_session_event ON ep_engaged_session_event.key = 'engaged_session_event'
LEFT JOIN UNNEST(e.event_params) AS ep_page_referrer ON ep_page_referrer.key = 'page_referrer'
LEFT JOIN UNNEST(e.event_params) AS ep_batch_ordering_id ON ep_batch_ordering_id.key = 'batch_ordering_id'
LEFT JOIN UNNEST(e.event_params) AS ep_event_category ON ep_event_category.key = 'event_category'
LEFT JOIN UNNEST(e.event_params) AS ep_ga_session_id ON ep_ga_session_id.key = 'ga_session_id'
LEFT JOIN UNNEST(e.event_params) AS ep_event_label ON ep_event_label.key = 'event_label'
LEFT JOIN UNNEST(e.event_params) AS ep_session_engaged ON ep_session_engaged.key = 'session_engaged'
LEFT JOIN UNNEST(e.event_params) AS ep_page_title ON ep_page_title.key = 'page_title'
LEFT JOIN UNNEST(e.event_params) AS ep_engagement_time ON ep_engagement_time.key = 'engagement_time_msec'

WHERE
  CAST(e.event_date AS INT64) >= 20250201  
  AND e.event_date NOT IN (SELECT DISTINCT event_date FROM `travel-project-448705.analytics_processed.events_flat`);

처음에는 에러가 떠서 당황했는데 기존 데이터 셋에 들어갈 컬럼이 존재하지 않아서 발생하는 문제였음.
기전에 29의 열이 있으니 열을 추가한뒤,값을 넣어주고 이미 생성된 데이터는 update사용!

향후 일정

  • 버전 1,2 비교 대시보드 만들기

  • a/b테스트 결과 보고 test하기

profile
Be DBA

0개의 댓글