이커머스 데이터를 통한 사업 현황 파악 - BigQuery에서 날짜 다루기

dpwl·2024년 5월 12일
0

Data Analysis with SQL

목록 보기
50/120

1. BigQuery 날짜

  • DATE(): 2024-03-08
  • DATEIME(): 2024-03-08T11:10:33
  • TIMESTAMP(): 2024-03-08 11:10:33 UTC
  • TIME(): 11:10:33
    UTC는 타임존으로 한국은 UTC +9으로 표시가 된다.
SELECT
  DATE(2024, 5, 5) AS date_ymd,
  DATE(DATETIME '2024-05-05 23:59:59') AS date_dt,
  DATE(TIMESTAMP '2024-05-05 05:30:00+07', 'America/Los_Angeles') AS date_tstz;

SELECT
  DATE(2024, 5, 5) AS date_ymd,
  DATE(DATETIME '2024-05-05 23:59:59') AS date_dt,
  DATE(TIMESTAMP '2024-05-05 05:30:00+07', 'Asia/Seoul') AS date_tstz;
-- 'Asia/Seoul': 한국 시간대

SELECT
  DATETIME(2008, 8, 24, 05, 30, 00) AS datetime_ymdhms,
  DATETIME(TIMESTAMP "2008-08-24 05:30:00+00", "Asia/Seoul") AS datetime_tstz;

-- DATE() / DATETIME() / TIMESTAMP()

SELECT
  order_purchase_timestamp,
  DATE(order_purchase_timestamp) AS ord_date,
  DATETIME(order_purchase_timestamp) AS ord_dt,
  TIMESTAMP(order_purchase_timestamp) AS ord_ts,
  TIMESTAMP(DATE(order_purchase_timestamp)) AS ord_date_ts,
  TIME(order_purchase_timestamp) AS ord_time
FROM `olist.olist_orders`
;

2. BigQuery 날짜 추출

날짜에서 일부분만 추출하기

EXTRACT('추출할 부분' FROM '날짜 컬럼')
추출할 부분의미결과(정수)
YEAR연도2024
MONTH11
DAY25
DAYOFWEEK요일(0: 일요일 ~ 6: 토요일)2
WEEK주(기본 일요일 시작)13
WEEK(MONDAY)주(월요일 시작)12
QUARTER분기2
HOUR11
MINUTE10
SECOND33

SELECT
  order_purchase_timestamp,
  EXTRACT(YEAR FROM order_purchase_timestamp) AS YEAR,
  EXTRACT(QUARTER FROM order_purchase_timestamp) AS QUARTER,
  EXTRACT(MONTH FROM order_purchase_timestamp) AS MONTH,
  EXTRACT(DAY FROM order_purchase_timestamp) AS DAY,
  EXTRACT(DAYOFWEEK FROM order_purchase_timestamp) AS DAYOFWEEK,
  EXTRACT(WEEK FROM order_purchase_timestamp) AS WEEK,
  EXTRACT(WEEK(MONDAY) FROM order_purchase_timestamp) AS WEEK_MONDAY,
  EXTRACT(hour FROM order_purchase_timestamp) AS HOUR,
  EXTRACT(MINUTE FROM order_purchase_timestamp) AS MINUTE,
  EXTRACT(SECOND FROM order_purchase_timestamp) AS SECOND
FROM `olist.olist_orders`
WHERE
  DATE(order_purchase_timestamp) = '2017-11-26' -- 일요일
;

3. BigQuery 원하는 날짜만 추출

날짜에서 원하는 정보만 남기기

DATE_TRUNC('날짜', '남기는 부분')
DATETIME_TRUNC('날짜', '남기는 부분')
TIMESTAMP_TRUNC('날짜', '남기는 부분')
남기는 부분의미결과(날짜)예시(2017-11-26 16:13:13 UTC
YEAR연도해당 연도의 첫 날2017-01-01 00:00:00 UTC
MONTH해당 연도, 월의 첫 날2017-11-01 00:00:00 UTC
DAY해당 날짜의 0시 0분2017-11-26 00:00:00 UTC
WEEK주(기본 일요일 시작)해당 주차의 첫 날(일)2017-11-26 00:00:00 UTC
WEEK(MONDAY)주(월요일 시작)해당 주차의 첫 날(월)2017-11-20 00:00:00 UTC
QUARTER분기해당 분기의 첫 날2017-10-01 00:00:00 UTC
HOUR2017-01-01 00:00:00 UTC
MINUTE2017-01-01 00:00:00 UTC
SECOND2017-01-01 00:00:00 UTC
SELECT
  order_purchase_timestamp,
  TIMESTAMP_TRUNC(order_purchase_timestamp, YEAR) AS YEAR,
  TIMESTAMP_TRUNC(order_purchase_timestamp, QUARTER) AS QUARTER,
  TIMESTAMP_TRUNC(order_purchase_timestamp, MONTH) AS MONTH,
  TIMESTAMP_TRUNC(order_purchase_timestamp, DAY) AS DAY,
  TIMESTAMP_TRUNC(order_purchase_timestamp, WEEK) AS WEEK,
  TIMESTAMP_TRUNC(order_purchase_timestamp, WEEK(MONDAY)) AS WEEK_MONDAY,
  TIMESTAMP_TRUNC(order_purchase_timestamp, HOUR) AS HOUR,
  TIMESTAMP_TRUNC(order_purchase_timestamp, MINUTE) AS MINUTE,
  TIMESTAMP_TRUNC(order_purchase_timestamp, SECOND) AS SECOND
FROM `olist.olist_orders`
WHERE
  DATE(order_purchase_timestamp) = '2017-11-26' -- 일요일
;

EXTRACT()TIMESTAMP_TRUNC()의 차이:

SELECT
  order_purchase_timestamp,
  EXTRACT(YEAR FROM order_purchase_timestamp) AS YEAR,
  TIMESTAMP_TRUNC(order_purchase_timestamp, YEAR) AS YEAR2,
  EXTRACT(month from order_purchase_timestamp) AS MONTH,
  TIMESTAMP_TRUNC(order_purchase_timestamp, month) AS MONTH2
FROM `olist.olist_orders`
WHERE
  DATE(order_purchase_timestamp) = '2017-11-26' -- 일요일
;

4. 날짜별 지표 뽑기

4.1 EXTRACT(YEAR FROM 날짜 컬럼)

/* 첫 번째: 주문 1건당 주문 금액, 판매상품 수 구하기 */
WITH tb AS (
  SELECT
    item.order_id,
    SUM(item.price) AS ord_amt,
    COUNT(item.order_item_id) AS prd_cnt
  FROM `olist.olist_order_items` AS item
  GROUP BY item.order_id
)

/* 두 번째: 주문 정보 테이블에 1번 결과와 고객 unique ID 붙이기 */
, base AS (
  SELECT
    ord.order_approved_at,
    DATE(ord.order_approved_at) AS ord_date,
    DATETIME(ord.order_approved_at) AS ord_dt,
    TIMESTAMP(ord.order_approved_at) AS ord_ts,

    -- EXTRACT
    EXTRACT(YEAR FROM ord.order_approved_at) AS ord_year,
    EXTRACT(MONTH FROM ord.order_approved_at) AS ord_month,

    -- TRUNC
    TIMESTAMP_TRUNC(ord.order_approved_at, YEAR) AS ord_year_t,
    TIMESTAMP_TRUNC(ord.order_approved_at, MONTH) AS ord_month_t,

    ord.order_id,
    ord.customer_id,
    cust.customer_unique_id,
    tb.ord_amt,
    tb.prd_cnt
  FROM `olist.olist_orders` AS ord
  LEFT JOIN `olist.olist_customers` AS cust
    ON ord.customer_id = cust.customer_id
  INNER JOIN tb
    ON ord.order_id = tb.order_id
)

SELECT
  ord_year,
  SUM(ord_amt) AS `총매출`,
  COUNT(DISTINCT order_id) AS `총주문수`,
  SUM(prd_cnt) AS `총판매상품수`,
  SUM(ord_amt) / COUNT(DISTINCT order_id) AS `주문당평균가격`,
  SUM(prd_cnt) / COUNT(DISTINCT order_id) AS `평균판매상품수`,
  SUM(ord_amt) / SUM(prd_cnt) AS `제품개당평균가격`,
  COUNT(DISTINCT customer_unique_id) AS `주문고객수`,
  COUNT(DISTINCT order_id) / COUNT(DISTINCT customer_unique_id) AS `주문빈도`
FROM base
GROUP BY ord_year
ORDER BY ord_year
;

4.2 EXTRACT(MONTH FROM 날짜 컬럼)

/* 첫 번째: 주문 1건당 주문 금액, 판매상품 수 구하기 */
WITH tb AS (
  SELECT
    item.order_id,
    SUM(item.price) AS ord_amt,
    COUNT(item.order_item_id) AS prd_cnt
  FROM `olist.olist_order_items` AS item
  GROUP BY item.order_id
)

/* 두 번째: 주문 정보 테이블에 1번 결과와 고객 unique ID 붙이기 */
, base AS (
  SELECT
    ord.order_approved_at,
    DATE(ord.order_approved_at) AS ord_date,
    DATETIME(ord.order_approved_at) AS ord_dt,
    TIMESTAMP(ord.order_approved_at) AS ord_ts,

    -- EXTRACT
    EXTRACT(YEAR FROM ord.order_approved_at) AS ord_year,
    EXTRACT(MONTH FROM ord.order_approved_at) AS ord_month,

    -- TRUNC
    TIMESTAMP_TRUNC(ord.order_approved_at, YEAR) AS ord_year_t,
    TIMESTAMP_TRUNC(ord.order_approved_at, MONTH) AS ord_month_t,

    ord.order_id,
    ord.customer_id,
    cust.customer_unique_id,
    tb.ord_amt,
    tb.prd_cnt
  FROM `olist.olist_orders` AS ord
  LEFT JOIN `olist.olist_customers` AS cust
    ON ord.customer_id = cust.customer_id
  INNER JOIN tb
    ON ord.order_id = tb.order_id
)

/* 세 번째: 2번 결과를 집계 */
SELECT
  ord_month,
  SUM(ord_amt) AS `총매출`,
  COUNT(DISTINCT order_id) AS `총주문수`,
  SUM(prd_cnt) AS `총판매상품수`,
  SUM(ord_amt) / COUNT(DISTINCT order_id) AS `주문당평균가격`,
  SUM(prd_cnt) / COUNT(DISTINCT order_id) AS `평균판매상품수`,
  SUM(ord_amt) / SUM(prd_cnt) AS `제품개당평균가격`,
  COUNT(DISTINCT customer_unique_id) AS `주문고객수`,
  COUNT(DISTINCT order_id) / COUNT(DISTINCT customer_unique_id) AS `주문빈도`
FROM base
GROUP BY ord_month
ORDER BY ord_month
;

4.3 TIMESTAMP_TRUNC(날짜 컬럼, MONTH)

/* 첫 번째: 주문 1건당 주문 금액, 판매상품 수 구하기 */
WITH tb AS (
  SELECT
    item.order_id,
    SUM(item.price) AS ord_amt,
    COUNT(item.order_item_id) AS prd_cnt
  FROM `olist.olist_order_items` AS item
  GROUP BY item.order_id
)

/* 두 번째: 주문 정보 테이블에 1번 결과와 고객 unique ID 붙이기 */
, base AS (
  SELECT
    ord.order_approved_at,
    DATE(ord.order_approved_at) AS ord_date,
    DATETIME(ord.order_approved_at) AS ord_dt,
    TIMESTAMP(ord.order_approved_at) AS ord_ts,

    -- EXTRACT
    EXTRACT(YEAR FROM ord.order_approved_at) AS ord_year,
    EXTRACT(MONTH FROM ord.order_approved_at) AS ord_month,

    -- TRUNC
    TIMESTAMP_TRUNC(ord.order_approved_at, YEAR) AS ord_year_t,
    TIMESTAMP_TRUNC(ord.order_approved_at, MONTH) AS ord_month_t,

    ord.order_id,
    ord.customer_id,
    cust.customer_unique_id,
    tb.ord_amt,
    tb.prd_cnt
  FROM `olist.olist_orders` AS ord
  LEFT JOIN `olist.olist_customers` AS cust
    ON ord.customer_id = cust.customer_id
  INNER JOIN tb
    ON ord.order_id = tb.order_id
)

/* 세 번째: 2번 결과를 집계 */
SELECT
  ord_month_t,
  SUM(ord_amt) AS `총매출`,
  COUNT(DISTINCT order_id) AS `총주문수`,
  SUM(prd_cnt) AS `총판매상품수`,
  SUM(ord_amt) / COUNT(DISTINCT order_id) AS `주문당평균가격`,
  SUM(prd_cnt) / COUNT(DISTINCT order_id) AS `평균판매상품수`,
  SUM(ord_amt) / SUM(prd_cnt) AS `제품개당평균가격`,
  COUNT(DISTINCT customer_unique_id) AS `주문고객수`,
  COUNT(DISTINCT order_id) / COUNT(DISTINCT customer_unique_id) AS `주문빈도`
FROM base
GROUP BY ord_month_t
ORDER BY ord_month_t
;

profile
거북선통통통통

0개의 댓글