1. JOIN
1. JOIN이란

2. JOIN 문법
SELECT A.*, B.*
FROM raw_data.table1 A
____ JOIN raw_data.table2 B ON A.key1 = B.key1 and A.key2 = B.key2
WHERE A.ts >= '2019-01-01';
____ = INNER, FULL, LEFT, RIGHT, CROSS
3. JOIN 종류
예시1.raw_data.Vital
| UserID | VitalID | Date | Weight |
|---|---|---|---|
| 100 | 1 | 2020-01-01 | 75 |
| 100 | 3 | 2020-01-02 | 78 |
| 101 | 2 | 2020-01-01 | 90 |
| 101 | 4 | 2020-01-02 | 95 |
예시2.raw_data.Alert
| AlertID | VitalID | AlertType | Date | UserID |
|---|---|---|---|---|
| 1 | 4 | WeightIncrease | 2020-01-02 | 101 |
| 2 | NULL | MissingVital | 2020-01-04 | 100 |
| 3 | NULL | MissingVital | 2020-01-04 | 101 |
→ VitalID 기준으로 JOIN
SELECT * FROM raw_data.Vital v
JOIN raw_data.Alert a ON v.vitalID = a.vitalID;
| v.UserID | v.VitalID | v.Date | v.Weight | a.AlertID | a.VitalID | a.AlertType | a.Date | a.UserID |
|---|---|---|---|---|---|---|---|---|
| 101 | 4 | 2020-01-02 | 95 | 1 | 4 | WeightIncrease | 2021-01-02 | 101 |
SELECT * FROM raw_data.Vital v
LEFT JOIN raw_data.Alert a ON v.vitalID = a.vitalID;
| v.UserID | v.VitalID | v.Date | v.Weight | a.AlertID | a.VitalID | a.AlertType | a.Date | a.UserID |
|---|---|---|---|---|---|---|---|---|
| 100 | 1 | 2020-01-01 | 75 | NULL | NULL | NULL | NULL | NULL |
| 100 | 3 | 2020-01-02 | 78 | NULL | NULL | NULL | NULL | NULL |
| 101 | 2 | 2020-01-01 | 90 | NULL | NULL | NULL | NULL | NULL |
| 101 | 4 | 2020-01-02 | 95 | 1 | 4 | WeightIncrease | 2021-01-02 | 101 |
SELECT * FROM raw_data.Vital v
FULL JOIN raw_data.Alert a ON v.vitalID = a.vitalID;
| v.UserID | v.VitalID | v.Date | v.Weight | a.AlertID | a.VitalID | a.AlertType | a.Date | a.UserID |
|---|---|---|---|---|---|---|---|---|
| 100 | 1 | 2020-01-01 | 75 | NULL | NULL | NULL | NULL | NULL |
| 100 | 3 | 2020-01-02 | 78 | NULL | NULL | NULL | NULL | NULL |
| 101 | 2 | 2020-01-01 | 90 | NULL | NULL | NULL | NULL | NULL |
| 101 | 4 | 2020-01-02 | 95 | 1 | 4 | WeightIncrease | 2021-01-02 | 101 |
| NULL | NULL | NULL | NULL | 2 | NULL | MissingVital | 2020-01-04 | 100 |
| NULL | NULL | NULL | NULL | 3 | NULL | MissingVital | 2020-01-04 | 101 |
SELECT * FROM raw_data.Vital v CROSS JOIN raw_data.Alert a;
| v.UserID | v.VitalID | v.Date | v.Weight | a.AlertID | a.VitalID | a.AlertType | a.Date | a.UserID |
|---|---|---|---|---|---|---|---|---|
| 100 | 1 | 2020-01-01 | 75 | 1 | 4WeightIncrease | 2020-01-01 | 101 | |
| 100 | 3 | 2020-01-02 | 78 | 1 | 4WeightIncrease | 2020-01-01 | 101 | |
| 101 | 2 | 2020-01-01 | 90 | 1 | 4WeightIncrease | 2020-01-01 | 101 | |
| 101 | 4 | 2020-01-02 | 95 | 1 | 4WeightIncrease | 2020-01-01 | 101 | |
| 100 | 1 | 2020-01-01 | 75 | 2 | MissingVital | 2020-01-04 | 100 | |
| 100 | 3 | 2020-01-02 | 78 | 2 | MissingVital | 2020-01-04 | 100 | |
| 101 | 2 | 2020-01-01 | 90 | 2 | MissingVital | 2020-01-04 | 100 | |
| 101 | 4 | 2020-01-02 | 95 | 2 | MissingVital | 2020-01-04 | 100 | |
| 100 | 1 | 2020-01-01 | 75 | 3 | MissingVital | 2020-01-04 | 101 | |
| 100 | 3 | 2020-01-02 | 78 | 3 | MissingVital | 2020-01-04 | 101 | |
| 101 | 2 | 2020-01-01 | 90 | 3 | MissingVital | 2020-01-04 | 101 | |
| 101 | 4 | 2020-01-02 | 95 | 3 | MissingVital | 2020-01-04 | 101 |
- BOOLEAN 타입 처리
| flag (raw_data.boolean_test) |
|---|
| True |
| False |
| True |
| NULL |
| False |
SELECT
COUNT(CASE WHEN flag = True THEN 1 END) true_cnt1, # 2
COUNT(CASE WHEN flag is True THEN 1 END) true_cnt2, # 2
COUNT(CASE WHEN flag is not False THEN 1 END) not_false_cnt # 3
FROM raw_data.boolean_test;
- NULL 비교
SELECT COUNT(1)
FROM raw_data.boolean_test
WHERE flag is NULL; # 1
SELECT COUNT(1)
FROM raw_data.boolean_test
WHERE flag = NULL; # NULL(매칭이안됨)
- NULLIF
- 숙제풀이
- 채널별 월 매출액 테이블 만들기 - 이제 conversionRate을 추가해보자
- 첫 번째 시도:
- paidUsers/uniqueUsers AS conversionRate
# 정수 타입이기 때문에 0 또는 1 출력. 따라서 type 변경 필요
- 두 번째 시도:
- paidUsers::float/uniqueUsers AS conversionRate
# ::float float으로 변경
- 세 번째 시도:
- ROUND(paidUsers*100.0/uniqueUsers, 2) AS conversionRate
# 실수 * 정수 = 실수
- 네 번째 시도:
- ROUND(paidUsers*100.0/NULLIF(uniqueUsers, 0), 2) AS conversionRate
# 분모가 0인 경우를 고려
- COALESCE
| value(raw_data.count_test) |
|---|
| NULL |
| 1 |
| 1 |
| 0 |
| 0 |
| 4 |
| 3 |
SELECT value,
COALESCE(value, 0) -- value가 NULL이면 0을 리턴
FROM raw_data.count_test;
- 공백 혹은 예약키워드를 필드 이름으로 사용하려면?
CREATE TABLE test (
group int primary key,
'mailing address' varchar(32)
);