##google big query
select
ord.order_id,
ord.customer_id,
cust.customer_unique_id
from `olist.olist_orders` as ord
left join `olist.olist_customers` as cust
on ord.customer_id = cust.customer_id
/*주문 고객 unique세기*/
select
count(distinct ord.order_id) as ord_cnt, --주문 건수 세기
count(distinct ord.customer_id) as cust_cnt,
--참고. 주문건의 고객 iid세기 (주문 건수와 같음)
count(distinct cust.customer_unique_id) as unique_cust_cnt
--고객 고유 id 세기
from `olist.olist_orders` as ord
left join `olist.olist_customers` as cust
on ord.customer_id = cust.customer_id
/*주문 빈도까지 조회*/
select
count(distinct ord.order_id) as ord_cnt, --주문 건수 세기
count(distinct ord.customer_id) as cust_cnt,
--참고. 주문건의 고객 iid세기 (주문 건수와 같음)
count(distinct cust.customer_unique_id) as unique_cust_cnt
--고객 고유 id 세기
count(distinct ord.order_id)/ count(distinct cust.customer_unique_id) as frequency --빈도
from `olist.olist_orders` as ord
left join `olist.olist_customers` as cust
on ord.customer_id = cust.customer_id
/*총 매출,주문건수 > 주문 1건당 평균 가 */
select
count(distinct order_id) as unique_orders, --전체 주문건수
sum(price) as ord_amt, --주문금액 총합 총매출
sum(price)/count(distinct order_id) as amount_per_price
from `olist.olist_order_items`;
/*총 판매 상품 수 , 주문건수 집계*/
select
count(order_item_id) as prd_cnt, --총 판매 상품 수
count(distinct order_id) as ord_cnt, --주문 건수
count(order_item_id)/count(distinct order_id) as unit_per_order
from `olist.olist_order_items`;
/*총 매출, 총 판매 상품 수 집계*/
select
sum(price) as ord_amt, --총 매출
count(order_item_id) as prd_cnt, --총 판매 상품 수
sum(price)/count(order_item_id) as average_selling_price,
--판매 제품 당 평균 가격
from `olist.olist_order_items` as ord; --주문 정보 테이블
/*합치기*/
select
sum(price) as `총 매출`,
count(distinct order_id) as `총 주문수`,
count(order_item_id) as `총 판매상품수`,
sum(price)/ count(distinct order_id)as `주문당평균가격`,
sum(price)/count(order_item_id) as `제품개당평균가격`,
count(order_item_id)/count(distinct order_id) as`평균판매상품수`
from `olist.olist_order_items` as ord; --주문 정보 테이블
/*지표 간의 관계 CHECK TEST*/
WITH TB AS(
select
sum(price) as `총 매출`,
count(distinct order_id) as `총 주문수`,
count(order_item_id) as `총 판매상품수`,
sum(price)/ count(distinct order_id)as `주문당평균가격`,
sum(price)/count(order_item_id) as `제품개당평균가격`,
count(order_item_id)/count(distinct order_id) as`평균판매상품수`
from `olist.olist_order_items` as ord --주문 정보 테이블
)
SELECT TB.`주문당평균가격`,
TB.`평균판매상품수` * TB.`제품개당평균가격` AS CHECK_1,
TB.`총 판매상품수`,
TB.`평균판매상품수` * TB.`총 주문수` AS CHECK_2,
TB.`총 매출`,
TB.`제품개당평균가격` * TB.`총 판매상품수` AS CHECK_2,
FROM TB;
/*첫번째: 주문 1건당 주문 금액, 판매 상품의 수를 구합니다. */
SELECT
item.order_id,
sum(item.price) as ord_amt,
count(item.order_item_id) as prd_cnt
FROM `olist.olist_order_items` as item
group by item.order_id;
/*두번째: 주문 정보 테이블의 1번 결과와, 고객 unique id를 붙임*/
with tb as (
SELECT
item.order_id,
sum(item.price) as ord_amt,
count(item.order_item_id) as prd_cnt
FROM `olist.olist_order_items` as item
group by item.order_id
)
select
ord.order_id,
ord.customer_id,
cust.customer_unique_id,
tb.ord_amt,
tb.prd_cnt
FROM `olist.olist_orders` as ord
left join `olist.olist_customers` as cust
on ord.customer_id =cust.customer_id
inner join tb
on ord.order_id = tb.order_id;
/*세번째: 2번 결과를 집계합니다*/
with tb as (
SELECT
item.order_id,
sum(item.price) as ord_amt,
count(item.order_item_id) as prd_cnt
FROM `olist.olist_order_items` as item
group by item.order_id
)
, base as (
select
ord.order_id,
ord.customer_id,
cust.customer_unique_id,
tb.ord_amt,
tb.prd_cnt
FROM `olist.olist_orders` as ord
left join `olist.olist_customers` as cust
on ord.customer_id =cust.customer_id
inner join tb
on ord.order_id = tb.order_id
)
select
sum(ord_amt) as `총매출`,
count(distinct order_id) as `총주문수`,
sum(prd_cnt) as `총판매상품수`,
sum(ord_amt)/ count(distinct order_id) `주문당평균가격`,
sum(prd_cnt)/ count(distinct order_id) `평균판매상품수`,
count(distinct customer_unique_id) as `주문고객수`,
count(distinct order_id)/count(distinct customer_unique_id) as `주문빈`
from base
###날짜 실습
-- 필요한 값만 추출하는 EXTRACT
SELECT
order_purchase_timestamp,
EXTRACT(YEAR FROM order_purchase_timestamp) AS YEAR,
EXTRACT(QUARTER FROM order_purchase_timestamp) AS QUARTER,
EXTRACT(MONTH FROM order_purchase_timestamp) AS MONTH,
EXTRACT(DAY FROM order_purchase_timestamp) AS DAY,
EXTRACT(DAYOFWEEK FROM order_purchase_timestamp) AS DAYOFWEEK,
EXTRACT(WEEK FROM order_purchase_timestamp) AS WEEK,
EXTRACT(WEEK(MONDAY) FROM order_purchase_timestamp) AS WEEK_MONDAY,
EXTRACT(HOUR FROM order_purchase_timestamp) AS HOUR,
EXTRACT(MINUTE FROM order_purchase_timestamp) AS MINUTE,
EXTRACT(SECOND FROM order_purchase_timestamp) AS SECOND,
FROM `olist.olist_orders`
WHERE
DATE(order_purchase_timestamp) = '2017-11-26' -- 일요일
;
-- 원하는 정보까지만 남기는 DATE/DATETIME/TIMESTAMP_TRUNC
SELECT
order_purchase_timestamp,
TIMESTAMP_TRUNC(order_purchase_timestamp, YEAR) AS YEAR,
TIMESTAMP_TRUNC(order_purchase_timestamp, QUARTER) AS QUARTER,
TIMESTAMP_TRUNC(order_purchase_timestamp, MONTH) AS MONTH,
TIMESTAMP_TRUNC(order_purchase_timestamp, DAY) AS DAY,
TIMESTAMP_TRUNC(order_purchase_timestamp, WEEK) AS WEEK,
TIMESTAMP_TRUNC(order_purchase_timestamp, WEEK(MONDAY)) AS WEEK_MONDAY,
TIMESTAMP_TRUNC(order_purchase_timestamp, HOUR) AS HOUR,
TIMESTAMP_TRUNC(order_purchase_timestamp, MINUTE) AS MINUTE,
TIMESTAMP_TRUNC(order_purchase_timestamp, SECOND) AS SECOND,
FROM `olist.olist_orders`
WHERE
DATE(order_purchase_timestamp) = '2017-11-26' -- 일요일
;
-- EXTRACT vs DATE_TRUNC
SELECT
order_purchase_timestamp,
EXTRACT(YEAR FROM order_purchase_timestamp) AS YEAR,
TIMESTAMP_TRUNC(order_purchase_timestamp, YEAR) AS YEAR2,
EXTRACT(MONTH FROM order_purchase_timestamp) AS MONTH,
TIMESTAMP_TRUNC(order_purchase_timestamp, MONTH) AS MONTH2,
FROM `olist.olist_orders`
WHERE
DATE(order_purchase_timestamp) = '2017-11-26' -- 일요일
###날짜별 지표뽑기
/* 첫 번째 : 주문 1건당 주문금액, 판매상품의 수를 구합니다 */
WITH tb AS (
SELECT
item.order_id,
SUM(item.price) AS ord_amt,
COUNT(item.order_item_id) AS prd_cnt
FROM `olist.olist_order_items` AS item
GROUP BY item.order_id
)
/* 두 번째 : 주문 정보 테이블에 1번 결과와, 고객 unique ID를 붙입니다 */
, base AS (
SELECT
ord.order_approved_at,
-- 날짜 유형
DATE(ord.order_approved_at) as ord_date,
DATETIME(ord.order_approved_at) as ord_dt,
TIMESTAMP(ord.order_approved_at) as ord_ts,
-- Extract / Trunc
-- EXTRACT
EXTRACT(YEAR FROM ord.order_approved_at) as ord_year,
EXTRACT(MONTH FROM ord.order_approved_at) as ord_month,
-- TRUNC
TIMESTAMP_TRUNC(ord.order_approved_at, YEAR) as ord_year_t,
TIMESTAMP_TRUNC(ord.order_approved_at, MONTH) as ord_month_t,
ord.order_id,
ord.customer_id,
cust.customer_unique_id,
tb.ord_amt,
tb.prd_cnt,
FROM `olist.olist_orders` AS ord
LEFT JOIN `olist.olist_customers` AS cust
ON ord.customer_id = cust.customer_id
INNER JOIN tb
ON ord.order_id = tb.order_id
)
/* 세 번째 : 2번 결과를 집계합니다 */
SELECT
ord_month_t,
SUM(ord_amt) AS `총매출`,
COUNT(DISTINCT order_id) AS `총주문수`,
SUM(prd_cnt) AS `총판매상품수`, -- 총 판매 상품 수
SUM(ord_amt) / COUNT(DISTINCT order_id) AS `주문당평균가격`, -- 주문 당 평균 가격
SUM(prd_cnt) / COUNT(DISTINCT order_id) AS `평균판매상품수`, -- 주문 당 평균 판매상품 수
SUM(ord_amt) / SUM(prd_cnt) AS `제품개당평균가격`, -- 판매 제품 당 평균 가격
COUNT(DISTINCT customer_unique_id) AS `주문고객수` ,
COUNT(DISTINCT order_id) / COUNT(DISTINCT customer_unique_id) AS `주문빈도`
FROM base
GROUP BY ord_month_t
ORDER BY ord_month_t
###유용한 함수
SELECT *
FROM `olist.olist_order_items`
LIMIT 100
;
-- EXCEPT 컬럼제외
SELECT * EXCEPT (shipping_limit_date, freight_value)
FROM `olist.olist_order_items`
LIMIT 100
;
-- REPLACE
-- price 에 * 10,000 해주고 싶어요!
SELECT
* REPLACE (price * 10000 AS price)
FROM `olist.olist_order_items`
LIMIT 100
;
###데이터 타입바꾸기
-- CAST & SAFE_CAST
SELECT
CAST('12345' AS INT64) AS cast_result,
SAFE_CAST('12345' AS INT64) AS safe_result
;
SELECT
SAFE_CAST('asdf' AS INT64) AS safe_result
;
-- DIVIDE & SAFE_DIVIDE
SELECT
SAFE_DIVIDE(5 , 0),
IFNULL(SAFE_DIVIDE(5 , 0), 0)
;
/*
1. in 을 사용해 특정 상태에서만 집계
2. 일자별 자료
3. round 를 활용해서 소수 둘째 자리까지만 표시, safe 연산 사용
4. 컬럼명 영문으로 대체
*/
/* 첫 번째 : 주문 1건당 주문금액, 판매상품의 수를 구합니다 */
WITH tb AS (
SELECT
item.order_id,
SUM(item.price) AS ord_amt,
COUNT(item.order_item_id) AS prd_cnt
FROM `olist.olist_order_items` AS item
GROUP BY item.order_id
)
/* 두 번째 : 주문 정보 테이블에 1번 결과와, 고객 unique ID를 붙입니다 */
, base AS (
SELECT
ord.order_approved_at,
-- 날짜 유형
DATE(ord.order_approved_at) as ord_date,
ord.order_id,
ord.customer_id,
cust.customer_unique_id,
tb.ord_amt,
tb.prd_cnt,
FROM `olist.olist_orders` AS ord
LEFT JOIN `olist.olist_customers` AS cust
ON ord.customer_id = cust.customer_id
INNER JOIN tb
ON ord.order_id = tb.order_id
where 1=1
and order_status in ('delievered', 'shipped')
and order_approved_at is not null
)
/* 세 번째 : 2번 결과를 집계합니다 */
SELECT
ord_date,
round(SUM(ord_amt),2) AS ord_amt,
COUNT(DISTINCT order_id) AS ord_cnt,
SUM(prd_cnt) AS prd_cnt, -- 총 판매 상품 수
round(ifnull(safe_divide(SUM(ord_amt),COUNT(DISTINCT order_id)),0),2) AS avg_ord_amt, -- 주문 당 평균 가격
round(ifnull(safe_divide(SUM(prd_cnt), COUNT(DISTINCT order_id)),0),2) AS avg_ord_cnt, -- 주문 당 평균 판매상품 수
round(ifnull(safe_divide(SUM(ord_amt),SUM(prd_cnt)),0),2) AS cust_cnt, -- 판매 제품 당 평균 가격
COUNT(DISTINCT customer_unique_id) AS `주문고객수` ,
round(ifnull(safe_divide(COUNT(DISTINCT order_id),COUNT(DISTINCT customer_unique_id)),0),2) AS cust_freq
FROM base
GROUP BY ord_date
ORDER BY ord_date
####대시보드용 결과 쿼리 저장
###빅쿼리 과제
https://console.cloud.google.com/bigquery?sq=664481595609:c4c2ea2a817f4f1ab506b832e2c7a801
https://console.cloud.google.com/bigquery?sq=664481595609:328ad410a4c741a796dbc3bb67d5c4f7