[Big query] 사용 함수

JONGYOON JEON·2024년 3월 9일
0

SQL

목록 보기
13/13

EXTRACT

DATE
DATETIME
TIMESTAMP

SELECT * EXCEPT(column_name)
FROM olist.olist_order

SELECT REPLACE(column_name 10000 AS column_name)
FROM olist.olist_order

CAST(value AS data_type)
SAFE_CAST(value AS data_type)

에러 있을 경우 에러를 발생시키지 않고 null 로 반환함

SAFE_ADD(x,y) = x + y
SAFE_SUBTRACT(x,y) = x - y
SAFE_MULTIPLY(x,y) = x * y
SAFE_DIVIDE(x,y) = x / y
0으로 나눌 때 NaN 방지

SELECT
 SAFE_DIVIDE(5, 0);
 IFNULL(SAFE_DIVIDE(5, 0) , 0)
> null
> 0

순위 행 매기는 방법


## order_id가 order_items 별로 price가 나눠져 있기 때문에 이를 합치기 위해 sum(order_items.price) 자료 준비

WITH tb as(
  SELECT
    item.order_id,
    sum(item.price) as ord_amt,
  from `olist.olist_order_items` as item
  group by item.order_id
)

## 도시, 주 별로 주문, 고객수, 매출을 agg 진행

, base as(
  SELECT
    cus.customer_city,
    cus.customer_state,
    count(distinct ord.order_id) as cnt_order,
    count(cus.customer_unique_id) as cnt_cus,
    sum(tb.ord_amt) as sum_sales
  
  FROM `olist.olist_orders` as ord
  INNER JOIN `tb`
    ON ord.order_id = tb.order_id
  LEFT JOIN `olist.olist_customers` as cus
    ON ord.customer_id = cus.customer_id
  WHERE 1=1
    AND EXTRACT(YEAR from order_approved_at) = 2017
    AND ord.order_status = 'delivered'
    AND customer_state= 'SP'
  GROUP BY 1,2
  ORDER by cnt_cus desc
)

## 위의 자료를 포함하여 랭킹까지 추가해줌

select *,
  row_number() over (partition by customer_state order by cnt_cus desc) as cust_rownum

from base
order by cust_rownum

월평균 구매금액 part 05. SQL chapter2 40)

/* 우리 고객들은 월 평균 얼마를 사용하고 있는가
월 주문건
월 주문금액
*/

WITH tb AS(

SELECT 
  order_id, 
  sum(price) as sales
FROM `olist.olist_order_items` as item
GROUP BY order_id
)
, base AS(

SELECT 
  DATE_TRUNC(DATE(ord.order_approved_at),MONTH) as part_month,
  count(distinct ord.order_id) as cnt_ord,
  count(distinct cus.customer_unique_id) as cnt_cus,
  round(sum(tb.sales),0) as sales,
  round(safe_divide(round(sum(tb.sales),0), count(distinct cus.customer_unique_id)), 1) as avg_sales
FROM `olist.olist_orders` as ord
INNER JOIN `tb`
  ON ord.order_id = tb.order_id
LEFT JOIN `olist.olist_customers` as cus
  ON ord.customer_id = cus.customer_id
GROUP BY 1
ORDER BY 1
)

select * from base
WHERE 1=1
  and part_month is not null

조건에 따라 값을 나누는 CASE

/* 고객들의 등급 변화가 어떻게 일어나고 있는가?
월 주문금액
고객
*/

WITH tb AS(

SELECT 
  order_id, 
  sum(price) as sales
FROM `olist.olist_order_items` as item
GROUP BY order_id
)
, base AS(

SELECT 
  DATE_TRUNC(DATE(ord.order_approved_at),MONTH) as ord_month,
  cus.customer_unique_id,
  sum(tb.sales) as sales,
  CASE WHEN sum(tb.sales) >= 300 THEN 'A'
    WHEN sum(tb.sales) >= 150 THEN 'B'
    ELSE 'C' END as level
FROM `olist.olist_orders` as ord
INNER JOIN `tb`
  ON ord.order_id = tb.order_id
LEFT JOIN `olist.olist_customers` as cus
  ON ord.customer_id = cus.customer_id
WHERE ord.order_status in ('delivered', 'shipped')
GROUP BY 1,2
ORDER BY level,3 desc
)

select * from base
WHERE 1=1
  and ord_month is not null

과제 2번

/* 상품 카테고리별 매출 데이터
2017년 월별 대카테고리별(영문명) 매출

1. 집계 조건 : 날짜 기준 : 승인일시(order_approved_at), null 제외, delivered 한정
2. 정렬 : 날짜(연도/월) 오름차순, 매출 내림차순
3. join 조건 : 주문 테이블과 주문 상품 정보 테이블에 모두 있는 주문건만 사용
              from 절로 주문상품 정보 테이블 사용(order_items)
              카테고리 정보가 없어도 매출이 집계되도록 함.
*/
SELECT 
  trans.catg_1 as cat,
  DATE_TRUNC(DATE(ord.order_approved_at), MONTH) as month,
  round(sum(item.price),2) as sum_price
FROM `olist.olist_order_items` as item -- 3-2
INNER JOIN `olist.olist_orders` as ord -- 3-1
  ON item.order_id = ord.order_id  -- USING(oder_id) 이렇게 써도됨
LEFT JOIN `olist.olist_products` as pro
  ON item.product_id = pro.product_id
LEFT JOIN `olist.product_category_name_translation` as trans
  ON pro.product_category_name = trans.product_category_name
WHERE 1=1
  AND EXTRACT(YEAR from DATE(ord.order_approved_at)) = 2017
  AND ord.order_approved_at is not null
  AND ord.order_status = 'delivered'
  -- AND pro.product_id = '5eb564652db742ff8f28759cd8d2652a'
GROUP BY 1,2
ORDER BY 2, sum_price desc
profile
효율적인 걸 좋아해요

0개의 댓글