SELECT REGION AS "Region"
, (SELECT COUNT(DISTINCT ORDER_ID) FROM RECORDS A WHERE Z.REGION = A.REGION AND CATEGORY = 'Furniture') AS Furniture
, (SELECT COUNT(DISTINCT ORDER_ID) FROM RECORDS A WHERE Z.REGION = A.REGION AND CATEGORY = 'Office Supplies') AS "Office Supplies"
, (SELECT COUNT(DISTINCT ORDER_ID) FROM RECORDS A WHERE Z.REGION = A.REGION AND CATEGORY = 'Technology') AS Technology
FROM RECORDS Z
GROUP BY REGION
ORDER BY REGION;
한 주문에 같은 카테고리를 여러제품 구매했더라도 1개로 가정
https://solvesql.com/problems/characteristics-of-orders/
SELECT DAY
, TIME
, ROUND(AVG(TIP), 2) AS AVG_TIP
, ROUND(AVG(SIZE), 2) AS AVG_SIZE
FROM TIPS
GROUP BY DAY,TIME
ORDER BY DAY,TIME;
https://solvesql.com/problems/tip-analysis/
SELECT DATE(ORDER_DELIVERED_CARRIER_DATE) AS delivered_carrier_date
, COUNT(ORDER_ID) AS orders
FROM OLIST_ORDERS_DATASET
WHERE ORDER_DELIVERED_CUSTOMER_DATE IS NULL
AND ORDER_DELIVERED_CARRIER_DATE LIKE '2017-01%'
GROUP BY DATE(ORDER_DELIVERED_CARRIER_DATE)
ORDER BY ORDER_DELIVERED_CARRIER_DATE;
https://solvesql.com/problems/shipment-in-bermuda/
SELECT
order_date AS "order_date", Furniture, ROUND((furniture*1.0/ALL_ORDERS*100), 2) AS furniture_pct
FROM
(
SELECT ORDER_DATE
, COUNT(DISTINCT ORDER_ID) ALL_ORDERS
, (SELECT COUNT(DISTINCT ORDER_ID) FROM RECORDS A WHERE CATEGORY = 'Furniture' AND A.ORDER_DATE = Z.ORDER_DATE) AS furniture
FROM RECORDS Z
GROUP BY ORDER_DATE
HAVING ALL_ORDERS >= 10
)
WHERE ROUND((furniture*1.0/ALL_ORDERS*100), 2) >= 40
ORDER BY ROUND((furniture*1.0/ALL_ORDERS*100), 2) DESC , ORDER_DATE;
정수를 실수로 만들기 위해 1.0 곱하기
https://solvesql.com/problems/day-of-furniture/
SELECT DATE(A.ORDER_PURCHASE_TIMESTAMP) AS dt
, ROUND(SUM(PAYMENT_VALUE), 2) AS revenue_daily
FROM OLIST_ORDERS_DATASET A, OLIST_ORDER_PAYMENTS_DATASET B
WHERE A.ORDER_ID = B.ORDER_ID
AND A.ORDER_PURCHASE_TIMESTAMP > '2018-01-01'
GROUP BY DATE(A.ORDER_PURCHASE_TIMESTAMP)
ORDER BY ROUND(SUM(PAYMENT_VALUE), 2) ;
https://solvesql.com/problems/olist-daily-revenue/
SELECT DATE(A.order_purchase_timestamp) AS dt
, COUNT(DISTINCT B.ORDER_ID) AS pu, ROUND(SUM(B.payment_value), 2) AS revenue_daily
, ROUND((SUM(B.payment_value) / COUNT(DISTINCT B.ORDER_ID)), 2) AS arppu
FROM olist_orders_dataset A, olist_order_payments_dataset B
WHERE A.order_id = B.order_id
AND A.order_purchase_timestamp > '2018-01-01'
GROUP BY DATE(A.order_purchase_timestamp)
ORDER BY DATE(A.order_purchase_timestamp);
https://solvesql.com/problems/daily-arppu/
SELECT A.ATHLETE_ID
FROM RECORDS A, EVENTS B
WHERE A.EVENT_ID = B.ID
AND B.SPORT = 'Golf'
GROUP BY A.ATHLETE_ID;
https://solvesql.com/problems/join/
SELECT * FROM TIPS WHERE MOD(SIZE, 2) = 1;
https://solvesql.com/problems/size-of-table/
SELECT QUARTET
, ROUND(AVG(X), 2) AS x_mean
, ROUND(VARIANCE(X), 2) AS x_var
, ROUND(AVG(Y), 2) AS y_mean
, ROUND(VARIANCE(Y), 2) AS y_var
FROM POINTS
GROUP BY QUARTET;
https://solvesql.com/problems/group-by/
표본분산 함수 참고
http://bigdata.dongguk.ac.kr/lectures/DB/_book/MySQL1.html 의 3.5.1
SELECT YEAR, UPPER(SUBSTR(CITY, 0, 4)) AS CITY
FROM GAMES
WHERE YEAR >= 2000
GROUP BY YEAR, SUBSTR(CITY, 0, 4)
ORDER BY YEAR DESC ;
https://solvesql.com/problems/olympic-cities/
SELECT SELLER_ID, COUNT(DISTINCT ORDER_ID) AS ORDERS
FROM OLIST_ORDER_ITEMS_DATASET
GROUP BY SELLER_ID
HAVING COUNT(DISTINCT ORDER_ID) >= 100;
https://solvesql.com/problems/settled-sellers-1/
SELECT day, SUM(TIP) AS tip_daily
FROM TIPS
GROUP BY DAY
ORDER BY SUM(TIP) DESC
LIMIT 1
https://solvesql.com/problems/best-working-day/
SELECT A.EMPLOYEE_ID AS MENTEE_ID
, A.NAME AS MENTEE_NAME
, B.EMPLOYEE_ID AS MENTOR_ID
, B.NAME AS MENTOR_NAME
FROM
(SELECT * FROM EMPLOYEES WHERE JOIN_DATE BETWEEN '2021-09-31' AND '2021-12-31') A,
(SELECT * FROM EMPLOYEES WHERE JOIN_DATE <= '2019-12-31') B
WHERE A.DEPARTMENT <> B.DEPARTMENT
GROUP BY MENTEE_ID, MENTOR_ID
ORDER BY MENTEE_ID, MENTOR_ID;
https://solvesql.com/problems/mentor-mentee-list/
SELECT BIKE_ID
FROM RENTAL_HISTORY
WHERE RENT_AT LIKE '2021-01%'
GROUP BY BIKE_ID
HAVING SUM(DISTANCE) >= 50000 ;
https://solvesql.com/problems/inspection-needed-bike/
SELECT DATE(MIN(ORDER_PURCHASE_TIMESTAMP)) AS FIRST_ORDER_DATE
, DATE(MAX(ORDER_PURCHASE_TIMESTAMP)) AS LAST_ORDER_DATE
FROM OLIST_ORDERS_DATASET ;
https://solvesql.com/problems/first-and-last-orders/
SELECT DATE(X.ORDER_PURCHASE_TIMESTAMP) AS PURCHASE_DATE
, IFNULL(SUM(CASE WHEN ORDER_ESTIMATED_DELIVERY_DATE >= ORDER_DELIVERED_CUSTOMER_DATE THEN 1 END), 0) AS SUCCESS
, IFNULL(SUM(CASE WHEN ORDER_ESTIMATED_DELIVERY_DATE < ORDER_DELIVERED_CUSTOMER_DATE THEN 1 END), 0) AS FAIL
FROM OLIST_ORDERS_DATASET X
WHERE X.ORDER_PURCHASE_TIMESTAMP LIKE '2017-01%'
AND X.ORDER_ESTIMATED_DELIVERY_DATE IS NOT NULL
AND X.ORDER_DELIVERED_CUSTOMER_DATE IS NOT NULL
GROUP BY DATE(X.ORDER_PURCHASE_TIMESTAMP)
ORDER BY DATE(X.ORDER_PURCHASE_TIMESTAMP) ;
https://solvesql.com/problems/estimated-delivery-date/
SELECT TOTAL_BILL, TIP, SEX, SMOKER, DAY, TIME, SIZE
FROM
(
SELECT *, AVG(TOTAL_BILL) OVER() AS AVG_BILL
FROM TIPS
) WHERE TOTAL_BILL > AVG_BILL;
https://solvesql.com/problems/find-tables-with-high-bill/
다른방법이 있을듯 보임..
OVER() : 집계함수 결과값을 전체 ROW에서 조회할때 쓰는 함수
SELECT * FROM TIPS
WHERE DAY IN (SELECT DAY FROM TIPS GROUP BY DAY HAVING SUM(TOTAL_BILL) >= 1500);
https://solvesql.com/problems/high-season-of-restaurant/
SELECT * FROM TIPS
WHERE TOTAL_BILL IN (SELECT MAX(TOTAL_BILL) FROM TIPS GROUP BY DAY);
https://solvesql.com/problems/restaurant-vip/
SELECT ROUND(AVG(SUM_TOTAL), 2) FROM
(SELECT SUM(TOTAL_BILL) AS SUM_TOTAL FROM TIPS GROUP BY DAY
);
https://solvesql.com/problems/sales-summary/
뭘 묻는건지..?
SELECT ARTIST_ID, NAME
FROM ARTISTS
WHERE ARTIST_ID NOT IN ( SELECT ARTIST_ID FROM ARTWORKS_ARTISTS )
AND DEATH_YEAR IS NOT NULL ;
https://solvesql.com/problems/artists-without-artworks/
SELECT A.NAME
FROM ATHLETES A
, (
SELECT ATHLETE_ID
FROM
(
SELECT ATHLETE_ID, TEAM_ID
FROM RECORDS
WHERE MEDAL IS NOT NULL
AND GAME_ID IN ( SELECT ID FROM GAMES WHERE YEAR >= 2000 )
GROUP BY ATHLETE_ID, TEAM_ID
)
GROUP BY ATHLETE_ID HAVING COUNT(TEAM_ID) > 1
) B
WHERE A.ID = B.ATHLETE_ID
ORDER BY NAME;
https://solvesql.com/problems/multiple-medalist/
SELECT Y.PAYMENT_INSTALLMENTS
, COUNT(DISTINCT X.ORDER_ID) AS ORDER_COUNT
, MIN(X.PAYMENT_VALUE) AS MIN_VALUE
, MAX(X.PAYMENT_VALUE) AS MAX_VALUE
, AVG(X.PAYMENT_VALUE) AS AVG_VALUE
FROM OLIST_ORDER_PAYMENTS_DATASET X
, (SELECT PAYMENT_INSTALLMENTS FROM OLIST_ORDER_PAYMENTS_DATASET Z GROUP BY PAYMENT_INSTALLMENTS) Y
WHERE X.PAYMENT_INSTALLMENTS = Y.PAYMENT_INSTALLMENTS
AND X.PAYMENT_TYPE = 'credit_card'
GROUP BY Y.PAYMENT_INSTALLMENTS;
https://solvesql.com/problems/installment-month/
SELECT EVENT_DATE_KST AS DT
, COUNT(DISTINCT USER_PSEUDO_ID) AS USERS
FROM GA
WHERE EVENT_DATE_KST BETWEEN '2021-08-02' AND '2021-08-09'
GROUP BY EVENT_DATE_KST;
https://solvesql.com/problems/blog-counter/
-- 거리 구하기 ...