SELECT
event_date,
event_name,
COUNT(*) AS event_count
FROM
`bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE
event_name IN ('page_view', 'session_start', 'purchase')
AND _TABLE_SUFFIX BETWEEN '20201201' AND '20201202'
GROUP BY 1, 2;
user_id 수를 계산한다. 하지만 Google 애널리틱스 클라이언트에서 각 조회와 함께 user_id를 전송하지 않거나 확실하지 않은 경우 고유 user_pseudo_id 수를 계산한다.-- COUNT(DISTINCT)를 사용한 총 사용자 수
WITH
UserInfo AS (
SELECT
user_pseudo_id,
MAX(IF(event_name IN ('first_visit', 'first_open'), 1, 0)) AS is_new_user
-- Replace table name.
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
-- Replace date range.
WHERE _TABLE_SUFFIX BETWEEN '20201101' AND '20201130'
GROUP BY 1
)
SELECT
COUNT(*) AS user_count,
SUM(is_new_user) AS new_user_count
FROM UserInfo;
-- APPROX_COUNT_DISTINCT를 사용한 대략적인 총 사용자 수
WITH
UserInfo AS (
SELECT
user_pseudo_id,
MAX(IF(event_name IN ('first_visit', 'first_open'), 1, 0)) AS is_new_user
-- Replace table name.
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
-- Replace date range.
WHERE _TABLE_SUFFIX BETWEEN '20201101' AND '20201130'
GROUP BY 1
)
SELECT
COUNT(*) AS user_count,
SUM(is_new_user) AS new_user_count
FROM UserInfo;
-- BigQuery HLL+를 사용한 대략적인 총 사용자 수
SELECT
HLL_COUNT.EXTRACT(HLL_COUNT.INIT(user_pseudo_id, 15)) AS approx_total_user_count,
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
-- Google 애널리틱스 UI에서 데이터를 복제하려면 다음과 같이 precision = 14를 사용
SELECT
HLL_COUNT.EXTRACT(HLL_COUNT.INIT(user_pseudo_id, 14)) AS total_user_count,
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
⚠️ 확인 필요
first_visit 이벤트 또는 engagement_time_msec 매개변수first_open 이벤트 또는 engagement_time_msec 매개변수first_open 또는 user_engagement 이벤트WITH prep AS (
SELECT
event_date,
event_name,
user_pseudo_id,
(SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'session_engaged')
AS session_engaged_flag,
(SELECT value.int_value FROM UNNEST(event_params)
WHERE key = 'engagement_time_msec') AS engagement_time_msec
FROM
TABLE_NAME
ORDER BY
event_date
)
SELECT
event_date,
COUNT(
DISTINCT
CASE
WHEN session_engaged_flag = '1' THEN user_pseudo_id
WHEN event_name = 'first_visit' THEN user_pseudo_id
WHEN engagement_time_msec > 0 THEN user_pseudo_id END
) AS active_user_count
FROM
prep
GROUP BY
event_date
ORDER BY
event_date
SELECT
COUNT(DISTINCT user_pseudo_id) as active_users
FROM
TABLE_NAME
WHERE
is_active_user is true
-- BigQuery HLL++ 함수를 사용한 대략적인 세션 수
SELECT
HLL_COUNT.EXTRACT(
HLL_COUNT.INIT(
CONCAT(
user_pseudo_id,
(SELECT `value` FROM UNNEST(event_params) WHERE key = 'ga_session_id' LIMIT 1).int_value),
12)) AS session_count,
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
SELECT
COUNT(
DISTINCT
CONCAT(
user_pseudo_id,
(SELECT value.int_value FROM UNNEST(event_params) WHERE key = "ga_session_id")
)
) AS engaged_sessions
FROM
TABLE_NAME
WHERE
(SELECT value.int_value
FROM UNNEST(event_params)
WHERE key = "session_engaged") = 1
📑 reference