이커머스 데이터를 통한 사업현황 파악

김지율·2024년 3월 7일
0

데이터분석

목록 보기
22/25

##google big query

select
 ord.order_id,
 ord.customer_id,
 cust.customer_unique_id
from `olist.olist_orders` as ord
left join `olist.olist_customers` as cust 
on ord.customer_id = cust.customer_id



/*주문 고객 unique세기*/
select
 count(distinct ord.order_id)   as ord_cnt, --주문 건수 세기
 count(distinct ord.customer_id)  as cust_cnt,
   --참고. 주문건의 고객 iid세기 (주문 건수와 같음) 
 count(distinct cust.customer_unique_id) as unique_cust_cnt
 --고객 고유 id 세기 
from `olist.olist_orders` as ord
left join `olist.olist_customers` as cust 
on ord.customer_id = cust.customer_id



/*주문 빈도까지 조회*/
select
 count(distinct ord.order_id)   as ord_cnt, --주문 건수 세기
 count(distinct ord.customer_id)  as cust_cnt,
   --참고. 주문건의 고객 iid세기 (주문 건수와 같음) 
 count(distinct cust.customer_unique_id) as unique_cust_cnt
 --고객 고유 id 세기 
 count(distinct ord.order_id)/ count(distinct cust.customer_unique_id) as frequency --빈도 
from `olist.olist_orders` as ord
left join `olist.olist_customers` as cust 
on ord.customer_id = cust.customer_id


/*총 매출,주문건수 > 주문 1건당 평균 가 */
select 
 count(distinct order_id) as unique_orders, --전체 주문건수
 sum(price) as ord_amt, --주문금액 총합 총매출 
 sum(price)/count(distinct order_id) as amount_per_price
 from `olist.olist_order_items`;

/*총 판매 상품 수 , 주문건수 집계*/
 select 
 count(order_item_id) as prd_cnt, --총 판매 상품 수 
 count(distinct order_id) as ord_cnt, --주문 건수  
count(order_item_id)/count(distinct order_id) as unit_per_order
 from `olist.olist_order_items`;

 /*총 매출, 총 판매 상품 수 집계*/
  select 
    sum(price) as ord_amt, --총 매출
    count(order_item_id) as prd_cnt, --총 판매 상품 수 
    sum(price)/count(order_item_id) as average_selling_price,
     --판매 제품 당 평균 가격
 from `olist.olist_order_items` as ord; --주문 정보 테이블 

/*합치기*/
  select 
    sum(price) as `총 매출`, 
    count(distinct order_id) as `총 주문수`,
    count(order_item_id) as `총 판매상품수`,

    sum(price)/ count(distinct order_id)as `주문당평균가격`,
    sum(price)/count(order_item_id) as `제품개당평균가격`,
    count(order_item_id)/count(distinct order_id)       as`평균판매상품수`
 from `olist.olist_order_items` as ord; --주문 정보 테이블 


/*지표 간의 관계 CHECK TEST*/
WITH TB AS(
  select 
    sum(price) as `총 매출`, 
    count(distinct order_id) as `총 주문수`,
    count(order_item_id) as `총 판매상품수`,

    sum(price)/ count(distinct order_id)as `주문당평균가격`,
    sum(price)/count(order_item_id) as `제품개당평균가격`,
    count(order_item_id)/count(distinct order_id)       as`평균판매상품수`
 from `olist.olist_order_items` as ord --주문 정보 테이블 
)
SELECT TB.`주문당평균가격`,
       TB.`평균판매상품수` * TB.`제품개당평균가격` AS CHECK_1,
       TB.`총 판매상품수`,
        TB.`평균판매상품수` * TB.`총 주문수` AS CHECK_2,
         TB.`총 매출`,
        TB.`제품개당평균가격` * TB.`총 판매상품수` AS CHECK_2,
      FROM TB;

/*첫번째: 주문 1건당 주문 금액, 판매 상품의 수를 구합니다. */
SELECT
 item.order_id,
 sum(item.price) as ord_amt, 
 count(item.order_item_id) as prd_cnt
 FROM `olist.olist_order_items` as item
 group by item.order_id;

 /*두번째: 주문 정보 테이블의 1번 결과와, 고객 unique id를 붙임*/
 with tb as (
  SELECT
 item.order_id,
 sum(item.price) as ord_amt, 
 count(item.order_item_id) as prd_cnt
 FROM `olist.olist_order_items` as item
 group by item.order_id
 )
 select 
  ord.order_id,
  ord.customer_id,
  cust.customer_unique_id,
  tb.ord_amt,
  tb.prd_cnt
 FROM `olist.olist_orders` as ord
 left join `olist.olist_customers` as cust
  on ord.customer_id =cust.customer_id
inner join tb
 on ord.order_id = tb.order_id;

/*세번째: 2번 결과를 집계합니다*/

 with tb as (
  SELECT
 item.order_id,
 sum(item.price) as ord_amt, 
 count(item.order_item_id) as prd_cnt
 FROM `olist.olist_order_items` as item
 group by item.order_id
 )
 , base as (
  select 
      ord.order_id,
      ord.customer_id,
      cust.customer_unique_id,
      tb.ord_amt,
      tb.prd_cnt
    FROM `olist.olist_orders` as ord
    left join `olist.olist_customers` as cust
      on ord.customer_id =cust.customer_id
    inner join tb
    on ord.order_id = tb.order_id
  )
  select
   sum(ord_amt) as `총매출`,
   count(distinct order_id) as `총주문수`,
   sum(prd_cnt) as `총판매상품수`,
   sum(ord_amt)/ count(distinct order_id) `주문당평균가격`,
   sum(prd_cnt)/ count(distinct order_id) `평균판매상품수`,
   count(distinct customer_unique_id) as `주문고객수`,
   count(distinct order_id)/count(distinct customer_unique_id) as `주문빈`
   from base


###날짜 실습


-- 필요한 값만 추출하는 EXTRACT

SELECT 
  order_purchase_timestamp, 
  EXTRACT(YEAR FROM order_purchase_timestamp) AS YEAR, 
  EXTRACT(QUARTER FROM order_purchase_timestamp) AS QUARTER,
  EXTRACT(MONTH FROM order_purchase_timestamp) AS MONTH,
  EXTRACT(DAY FROM order_purchase_timestamp) AS DAY,
  EXTRACT(DAYOFWEEK FROM order_purchase_timestamp) AS DAYOFWEEK,
  EXTRACT(WEEK FROM order_purchase_timestamp) AS WEEK,
  EXTRACT(WEEK(MONDAY) FROM order_purchase_timestamp) AS WEEK_MONDAY,
  EXTRACT(HOUR FROM order_purchase_timestamp) AS HOUR,
  EXTRACT(MINUTE FROM order_purchase_timestamp) AS MINUTE,
  EXTRACT(SECOND FROM order_purchase_timestamp) AS SECOND,
FROM `olist.olist_orders` 
WHERE 
  DATE(order_purchase_timestamp) = '2017-11-26' -- 일요일
;









-- 원하는 정보까지만 남기는 DATE/DATETIME/TIMESTAMP_TRUNC

SELECT 
  order_purchase_timestamp, 
  TIMESTAMP_TRUNC(order_purchase_timestamp, YEAR) AS YEAR, 
  TIMESTAMP_TRUNC(order_purchase_timestamp, QUARTER) AS QUARTER,
  TIMESTAMP_TRUNC(order_purchase_timestamp, MONTH) AS MONTH,
  TIMESTAMP_TRUNC(order_purchase_timestamp, DAY) AS DAY,
  TIMESTAMP_TRUNC(order_purchase_timestamp, WEEK) AS WEEK,
  TIMESTAMP_TRUNC(order_purchase_timestamp, WEEK(MONDAY)) AS WEEK_MONDAY,
  TIMESTAMP_TRUNC(order_purchase_timestamp, HOUR) AS HOUR,
  TIMESTAMP_TRUNC(order_purchase_timestamp, MINUTE) AS MINUTE,
  TIMESTAMP_TRUNC(order_purchase_timestamp, SECOND) AS SECOND,
FROM `olist.olist_orders` 
WHERE 
  DATE(order_purchase_timestamp) = '2017-11-26' -- 일요일  
;

-- EXTRACT vs DATE_TRUNC 
SELECT 
  order_purchase_timestamp, 
  EXTRACT(YEAR FROM order_purchase_timestamp) AS YEAR, 
  TIMESTAMP_TRUNC(order_purchase_timestamp, YEAR) AS YEAR2, 

  EXTRACT(MONTH FROM order_purchase_timestamp) AS MONTH, 
  TIMESTAMP_TRUNC(order_purchase_timestamp, MONTH) AS MONTH2, 

FROM `olist.olist_orders` 
WHERE 
  DATE(order_purchase_timestamp) = '2017-11-26' -- 일요일

  

###날짜별 지표뽑기


/* 첫 번째 : 주문 1건당 주문금액, 판매상품의 수를 구합니다 */

WITH tb AS (
  SELECT 
    item.order_id, 
    SUM(item.price) AS ord_amt, 
    COUNT(item.order_item_id) AS prd_cnt
  FROM `olist.olist_order_items` AS item
  GROUP BY item.order_id
)
/* 두 번째 : 주문 정보 테이블에 1번 결과와, 고객 unique ID를 붙입니다 */
 , base AS (
  SELECT 
    ord.order_approved_at, 
    -- 날짜 유형 
    DATE(ord.order_approved_at) as ord_date, 
    DATETIME(ord.order_approved_at) as ord_dt, 
    TIMESTAMP(ord.order_approved_at) as ord_ts, 

    -- Extract / Trunc 
    -- EXTRACT
    EXTRACT(YEAR FROM ord.order_approved_at) as ord_year, 
    EXTRACT(MONTH FROM ord.order_approved_at) as ord_month, 
    -- TRUNC
    TIMESTAMP_TRUNC(ord.order_approved_at, YEAR) as ord_year_t, 
    TIMESTAMP_TRUNC(ord.order_approved_at, MONTH) as ord_month_t, 

    ord.order_id,
    ord.customer_id,
    cust.customer_unique_id,
    tb.ord_amt, 
    tb.prd_cnt, 
  FROM `olist.olist_orders` AS ord
  LEFT JOIN `olist.olist_customers` AS cust
    ON ord.customer_id = cust.customer_id
  INNER JOIN tb 
    ON ord.order_id = tb.order_id
)
/* 세 번째 : 2번 결과를 집계합니다 */
SELECT 
  ord_month_t, 
  SUM(ord_amt) AS `총매출`, 
  COUNT(DISTINCT order_id) AS `총주문수`, 
  SUM(prd_cnt) AS `총판매상품수`, -- 총 판매 상품 수
  SUM(ord_amt) / COUNT(DISTINCT order_id) AS `주문당평균가격`, -- 주문 당 평균 가격  
  SUM(prd_cnt) / COUNT(DISTINCT order_id) AS `평균판매상품수`, -- 주문 당 평균 판매상품 수 
  SUM(ord_amt) / SUM(prd_cnt) AS `제품개당평균가격`, -- 판매 제품 당 평균 가격
  COUNT(DISTINCT customer_unique_id) AS `주문고객수` , 
  COUNT(DISTINCT order_id) / COUNT(DISTINCT customer_unique_id) AS `주문빈도`
FROM base
GROUP BY ord_month_t
ORDER BY ord_month_t

###유용한 함수


SELECT *
FROM `olist.olist_order_items`
LIMIT 100 
;

-- EXCEPT 컬럼제외 

SELECT * EXCEPT (shipping_limit_date, freight_value)
FROM `olist.olist_order_items`
LIMIT 100 
;

-- REPLACE
-- price 에 * 10,000 해주고 싶어요!

SELECT 
  * REPLACE (price * 10000 AS price)
FROM `olist.olist_order_items`
LIMIT 100 
;

###데이터 타입바꾸기



-- CAST & SAFE_CAST

SELECT 
  CAST('12345' AS INT64) AS cast_result, 
  SAFE_CAST('12345' AS INT64) AS safe_result
;

SELECT 
  SAFE_CAST('asdf' AS INT64) AS safe_result 
;

-- DIVIDE & SAFE_DIVIDE

SELECT
  SAFE_DIVIDE(5 , 0), 
  IFNULL(SAFE_DIVIDE(5 , 0), 0)
;


/*
1. in 을 사용해 특정 상태에서만 집계
2. 일자별 자료
3. round 를 활용해서 소수 둘째 자리까지만 표시, safe 연산 사용
4. 컬럼명 영문으로 대체 
 */
/* 첫 번째 : 주문 1건당 주문금액, 판매상품의 수를 구합니다 */

WITH tb AS (
  SELECT 
    item.order_id, 
    SUM(item.price) AS ord_amt, 
    COUNT(item.order_item_id) AS prd_cnt
  FROM `olist.olist_order_items` AS item
  GROUP BY item.order_id
)
/* 두 번째 : 주문 정보 테이블에 1번 결과와, 고객 unique ID를 붙입니다 */
 , base AS (
  SELECT 
    ord.order_approved_at, 
    -- 날짜 유형 
    DATE(ord.order_approved_at) as ord_date, 
    ord.order_id,
    ord.customer_id,
    cust.customer_unique_id,
    tb.ord_amt, 
    tb.prd_cnt, 
  FROM `olist.olist_orders` AS ord
  LEFT JOIN `olist.olist_customers` AS cust
    ON ord.customer_id = cust.customer_id
  INNER JOIN tb 
    ON ord.order_id = tb.order_id
  where 1=1
   and order_status in ('delievered', 'shipped')
   and order_approved_at is not null

)
/* 세 번째 : 2번 결과를 집계합니다 */
SELECT 
   ord_date,
  round(SUM(ord_amt),2) AS ord_amt, 
  COUNT(DISTINCT order_id) AS ord_cnt, 
  SUM(prd_cnt) AS prd_cnt, -- 총 판매 상품 수
  round(ifnull(safe_divide(SUM(ord_amt),COUNT(DISTINCT order_id)),0),2) AS avg_ord_amt, -- 주문 당 평균 가격  
   round(ifnull(safe_divide(SUM(prd_cnt), COUNT(DISTINCT order_id)),0),2) AS avg_ord_cnt, -- 주문 당 평균 판매상품 수 
   round(ifnull(safe_divide(SUM(ord_amt),SUM(prd_cnt)),0),2) AS cust_cnt, -- 판매 제품 당 평균 가격
  COUNT(DISTINCT customer_unique_id) AS `주문고객수` , 
   round(ifnull(safe_divide(COUNT(DISTINCT order_id),COUNT(DISTINCT customer_unique_id)),0),2) AS cust_freq
FROM base
GROUP BY  ord_date
ORDER BY ord_date

####대시보드용 결과 쿼리 저장

###빅쿼리 과제
https://console.cloud.google.com/bigquery?sq=664481595609:c4c2ea2a817f4f1ab506b832e2c7a801

https://console.cloud.google.com/bigquery?sq=664481595609:328ad410a4c741a796dbc3bb67d5c4f7

profile
김지율

0개의 댓글

관련 채용 정보