[SQL 분석][Ch2] 빅쿼리에서 날짜 다루기

김지현·2024년 7월 12일

SQL 분석

목록 보기
17/40

DATE / DATETIME / TIMESTAMP / TIME

  • TIMESTAMP()는 시간대 설정 가능 (참고. 한국 시간대 : 'Asia/Seoul')
-- DATE() / DATETIME() / TIMESTAMP()

SELECT
  order_purchase_timestamp,

  DATE(order_purchase_timestamp) as ord_date,

  DATETIME(order_purchase_timestamp) as ord_dt,

  TIMESTAMP(order_purchase_timestamp) as ord_ts,
  TIMESTAMP(DATE(order_purchase_timestamp)) as ord_date_ts,

  TIME(order_purchase_timestamp) as ord_time
FROM `olist.olist_orders`
;

EXTRACT

  • 날짜에서 일부분만 추출하고 싶을 때
  • EXTRACT('추출할 부분' FROM '날짜 컬럼')

-- 필요한 값만 추출하는 EXTRACT
SELECT
  order_purchase_timestamp,
  EXTRACT(YEAR FROM order_purchase_timestamp) as YEAR,
  EXTRACT(QUARTER FROM order_purchase_timestamp) as QUARTER,
  EXTRACT(MONTH FROM order_purchase_timestamp) as MONTH,
  EXTRACT(DAY FROM order_purchase_timestamp) as DAY,
  EXTRACT(DAYOFWEEK FROM order_purchase_timestamp) as DAYOFWEEK,
  EXTRACT(WEEK FROM order_purchase_timestamp) as WEEK,
  EXTRACT(WEEK(MONDAY) FROM order_purchase_timestamp) as WEEK_MONDAY,
  EXTRACT(HOUR FROM order_purchase_timestamp) as HOUR,
  EXTRACT(MINUTE FROM order_purchase_timestamp) as MINUTE,
  EXTRACT(SECOND FROM order_purchase_timestamp) as SECOND
FROM `olist.olist_orders`
WHERE DATE(order_purchase_timestamp) = '2017-11-26'  -- 일요일
;

TRUNC

  • 날짜에서 원하는 정보만 남기고 싶을 때
  • DATE_TRUNC('날짜','남기는 부분')
  • DATETIME_TRUNC('날짜','남기는 부분')
  • TIMESTAMP_TRUNC('날짜','남기는 부분')

-- 원하는 정보까지만 남기는 TRUNC
SELECT
  order_purchase_timestamp,
  TIMESTAMP_TRUNC(order_purchase_timestamp, YEAR) as YEAR,
  TIMESTAMP_TRUNC(order_purchase_timestamp, QUARTER) as QUARTER,
  TIMESTAMP_TRUNC(order_purchase_timestamp, MONTH) as MONTH,
  TIMESTAMP_TRUNC(order_purchase_timestamp, DAY) as DAY,
  TIMESTAMP_TRUNC(order_purchase_timestamp, WEEK) as WEEK,
  TIMESTAMP_TRUNC(order_purchase_timestamp, WEEK(MONDAY)) as WEEK_MONDAY,
  TIMESTAMP_TRUNC(order_purchase_timestamp, HOUR) as HOUR,
  TIMESTAMP_TRUNC(order_purchase_timestamp, MINUTE) as MINUTE,
  TIMESTAMP_TRUNC(order_purchase_timestamp, SECOND) as SECOND
FROM `olist.olist_orders`
WHERE DATE(order_purchase_timestamp) = '2017-11-26'  -- 일요일
;

EXTRACT vs TRUNC

  • EXTRACT의 결과값은 숫자
-- EXTRACT vs TRUNC
SELECT
  order_purchase_timestamp,
  EXTRACT(YEAR FROM order_purchase_timestamp) as YEAR,
  TIMESTAMP_TRUNC(order_purchase_timestamp, YEAR) as YEAR2,

  EXTRACT(MONTH FROM order_purchase_timestamp) as MONTH,
  TIMESTAMP_TRUNC(order_purchase_timestamp, MONTH) as MONTH2,
FROM `olist.olist_orders`
WHERE DATE(order_purchase_timestamp) = '2017-11-26'  -- 일요일
;


종합쿼리에 날짜 추가

/* 1) 주문 1건당 주문금액, 판매 상품의 수 구하기 */
WITH tb AS (
  SELECT
    item.order_id,
    SUM(item.price) AS ord_amt,
    COUNT(item.order_item_id) AS prd_cnt
  FROM `olist.olist_order_items` AS item
  GROUP BY item.order_id
)

/* 2) 주문 정보 테이블에 1번 결과와 고객 unique ID 붙이기 (=> 테이블 조인 2번)*/
, base AS (   -- 임시 테이블 여러 개 이어서 사용 가능
  SELECT
    ord.order_approved_at,
    DATE(ord.order_approved_at) as ord_date,
    DATETIME(ord.order_approved_at) as ord_dt,
    TIMESTAMP(ord.order_approved_at) as ord_ts,

    -- Extract / Trunc
    -- EXTRACT
    EXTRACT(YEAR FROM ord.order_approved_at) as ord_year,
    EXTRACT(MONTH FROM ord.order_approved_at) as ord_month,
    
    -- TRUNC
    TIMESTAMP_TRUNC(ord.order_approved_at, YEAR) as ord_year_t,
    TIMESTAMP_TRUNC(ord.order_approved_at, MONTH) as ord_month_t,

    ord.order_id,
    ord.customer_id,
    cust.customer_unique_id,
    tb.ord_amt,
    tb.prd_cnt
  FROM `olist.olist_orders` AS ord
  LEFT JOIN `olist.olist_customers` AS cust
    ON ord.customer_id = cust.customer_id
  INNER JOIN tb
    ON ord.order_id = tb.order_id
)

/* 3) 2번 결과 집계 */
SELECT
  ord_month_t,
  SUM(ord_amt) AS `총매출`,
  COUNT(DISTINCT order_id) AS `총주문수`,
  SUM(prd_cnt) AS `총판매상품수`,
  SUM(ord_amt) / COUNT(DISTINCT order_id) AS `주문당평균가격`,
  SUM(prd_cnt) / COUNT(DISTINCT order_id) AS `평균판매상품수`,
  SUM(ord_amt) / SUM(prd_cnt) AS `제품개당평균가격`,
  COUNT(DISTINCT customer_unique_id) AS `주문고객수`,
  COUNT(DISTINCT order_id) / COUNT(DISTINCT customer_unique_id) AS `주문빈도`
FROM base
GROUP BY ord_month_t
ORDER BY ord_month_t

  • 결과 값에 null값이 있는 이유 : olist_orders 원본에서 일부 행의 날짜가 null값이기 때문

0개의 댓글