


๐ก ์ฑ๋ํก JD์ BigQuery๊ฐ ์๋ค! MySQL๊ณผ ๋ค๋ฅธ ๋ฌธ๋ฒ์ ์ ๋ฆฌํ๋ค.
Google Cloud์ ๋ฐ์ดํฐ ์จ์ดํ์ฐ์ค.
์ฑ๋ํก JD:
"BigQuery ๋ฐ์ดํฐ ๋งํธ๋ฅผ ์ง์ ์ค๊ณํ๊ณ , Dataform์ ํ์ฉํ์ฌ ๋ฐ์ดํฐ ๋ชจ๋ธ๋ง"
โ BigQuery ๋ฌธ๋ฒ ํ์!
| ๊ธฐ๋ฅ | MySQL | BigQuery |
|---|---|---|
| ๋ฌธ์์ด ์ฐ๊ฒฐ | CONCAT() | CONCAT() ๋๋ ` |
| ๋ ์ง ํฌ๋งท | DATE_FORMAT() | FORMAT_DATE() |
| ๋ ์ง ์ถ์ถ | YEAR(), MONTH() | EXTRACT() |
| ๋ฌธ์์ด ๋ถ๋ฆฌ | SUBSTRING_INDEX() | SPLIT() |
| ๋ฐฐ์ด | ์์ | ARRAY, UNNEST() |
| ๊ตฌ์กฐ์ฒด | ์์ | STRUCT |
| ์ ๊ท์ | REGEXP | REGEXP_CONTAINS() |
-- MySQL
SELECT * FROM database.table;
-- BigQuery
SELECT * FROM `project.dataset.table`;
-- ๋ฐฑํฑ(`) ์ฌ์ฉ!
-- BigQuery์์๋ AS ์๋ต ๊ฐ๋ฅ
SELECT
name AS user_name, -- AS ์์
age user_age -- AS ์์ (๊ฐ๋ฅ)
FROM users;
-- ๋ ๋ค ๋์ผ
SELECT * FROM users LIMIT 10;
-- MySQL
SELECT CURDATE(), NOW();
-- BigQuery
SELECT CURRENT_DATE(), CURRENT_TIMESTAMP();
-- MySQL
SELECT DATE_FORMAT(order_date, '%Y-%m');
-- BigQuery
SELECT FORMAT_DATE('%Y-%m', order_date);
-- ์์๊ฐ ๋ฐ๋! (ํฌ๋งท, ๋ ์ง)
-- MySQL
SELECT YEAR(order_date), MONTH(order_date);
-- BigQuery
SELECT
EXTRACT(YEAR FROM order_date),
EXTRACT(MONTH FROM order_date);
-- MySQL
SELECT DATE_ADD(order_date, INTERVAL 7 DAY);
SELECT DATE_SUB(order_date, INTERVAL 1 MONTH);
-- BigQuery
SELECT DATE_ADD(order_date, INTERVAL 7 DAY);
SELECT DATE_SUB(order_date, INTERVAL 1 MONTH);
-- ๋์ผ!
-- MySQL
SELECT DATEDIFF(date1, date2);
-- BigQuery
SELECT DATE_DIFF(date1, date2, DAY);
-- ๋จ์ ๋ช
์ ํ์ (DAY, MONTH, YEAR ๋ฑ)
-- ๋ ๋ค ๋์ผ
SELECT
LENGTH(name),
UPPER(name),
LOWER(name),
TRIM(name),
CONCAT(first, ' ', last)
FROM users;
-- MySQL
SELECT SUBSTRING(email, 1, 5);
SELECT LEFT(email, 5);
-- BigQuery
SELECT SUBSTR(email, 1, 5); -- SUBSTRING๋ ๊ฐ๋ฅ
SELECT LEFT(email, 5); -- ๋์ผ
-- MySQL
SELECT SUBSTRING_INDEX(email, '@', 1); -- @ ์๋ถ๋ถ
-- BigQuery (๋ฐฐ์ด ๋ฐํ!)
SELECT SPLIT(email, '@')[OFFSET(0)]; -- @ ์๋ถ๋ถ
SELECT SPLIT(email, '@')[OFFSET(1)]; -- @ ๋ท๋ถ๋ถ
-- OFFSET์ 0๋ถํฐ ์์!
-- MySQL
SELECT LOCATE('@', email);
-- BigQuery
SELECT STRPOS(email, '@');
-- ๊ฒฐ๊ณผ ๋์ผ (1๋ถํฐ ์์)
-- MySQL
SELECT * FROM users WHERE email REGEXP '@gmail';
-- BigQuery
SELECT * FROM users WHERE REGEXP_CONTAINS(email, r'@gmail');
-- r'' ์ raw string (์ด์ค์ผ์ดํ ํธํจ)
-- BigQuery ์ ์ฉ
SELECT REGEXP_EXTRACT(email, r'@(.+)\.') AS domain;
-- ์บก์ฒ ๊ทธ๋ฃน ์ถ์ถ
-- BigQuery
SELECT REGEXP_REPLACE(phone, r'[^0-9]', '') AS clean_phone;
-- ์ซ์๋ง ๋จ๊ธฐ๊ธฐ
BigQuery์ ๊ฐ๋ ฅํ ๊ธฐ๋ฅ!
SELECT [1, 2, 3] AS numbers;
SELECT ['a', 'b', 'c'] AS letters;
SELECT
arr[OFFSET(0)] AS first, -- 0๋ถํฐ ์์
arr[ORDINAL(1)] AS first2, -- 1๋ถํฐ ์์
arr[SAFE_OFFSET(10)] AS safe_access -- ๋ฒ์ ๋ฒ์ด๋๋ฉด NULL
FROM (SELECT [1,2,3] AS arr);
-- ๋ฐฐ์ด์ ํ์ผ๋ก ๋ณํ
SELECT num
FROM UNNEST([1, 2, 3]) AS num;
| num |
|---|
| 1 |
| 2 |
| 3 |
-- tags ์ปฌ๋ผ์ด ['์์ธ', '๊ฐ๋ฐ', 'AI'] ํํ์ผ ๋
SELECT
user_id,
tag
FROM users, UNNEST(tags) AS tag;
| user_id | tag |
|---|---|
| 1 | ์์ธ |
| 1 | ๊ฐ๋ฐ |
| 1 | AI |
SELECT STRUCT(1 AS id, 'kim' AS name) AS user_info;
SELECT
user_info.id,
user_info.name
FROM (
SELECT STRUCT(1 AS id, 'kim' AS name) AS user_info
);
-- ๋ ๋ค ๋์ผ
SELECT
*,
ROW_NUMBER() OVER(PARTITION BY user_id ORDER BY amount DESC) AS rn
FROM orders;
-- MySQL: CTE ํ์
WITH ranked AS (
SELECT *, ROW_NUMBER() OVER(PARTITION BY user_id ORDER BY amount DESC) AS rn
FROM orders
)
SELECT * FROM ranked WHERE rn = 1;
-- BigQuery: QUALIFY๋ก ๊ฐ๋จํ๊ฒ!
SELECT *
FROM orders
QUALIFY ROW_NUMBER() OVER(PARTITION BY user_id ORDER BY amount DESC) = 1;
โ ์ฝ๋๊ฐ ํจ์ฌ ๊ฐ๊ฒฐ!
-- ๊ทธ๋ฃน๋ณ๋ก ๊ฐ๋ค์ ๋ฐฐ์ด๋ก
SELECT
user_id,
ARRAY_AGG(product) AS products
FROM orders
GROUP BY user_id;
| user_id | products |
|---|---|
| 1 | ['๋ ธํธ๋ถ', '๋ง์ฐ์ค', 'ํค๋ณด๋'] |
| 2 | ['๋ชจ๋ํฐ'] |
-- MySQL: GROUP_CONCAT
SELECT user_id, GROUP_CONCAT(product) FROM orders GROUP BY user_id;
-- BigQuery: STRING_AGG
SELECT user_id, STRING_AGG(product, ', ') AS products
FROM orders
GROUP BY user_id;
-- MySQL
SELECT IF(amount > 100000, 'VIP', '์ผ๋ฐ') AS tier;
-- BigQuery (๋์ผ)
SELECT IF(amount > 100000, 'VIP', '์ผ๋ฐ') AS tier;
SELECT
CASE
WHEN amount >= 1000000 THEN 'VIP'
WHEN amount >= 100000 THEN '์ฐ์'
ELSE '์ผ๋ฐ'
END AS tier
FROM orders;
-- MySQL
SELECT IFNULL(amount, 0);
-- BigQuery (IFNULL๋ ๋์ง๋ง COALESCE ๊ถ์ฅ)
SELECT COALESCE(amount, 0);
-- ์กฐ์ง๋, ์นดํ
๊ณ ๋ฆฌ ํธ๋ฆฌ ๋ฑ
WITH RECURSIVE org_tree AS (
SELECT id, name, manager_id, 1 AS level
FROM employees WHERE manager_id IS NULL
UNION ALL
SELECT e.id, e.name, e.manager_id, t.level + 1
FROM employees e
JOIN org_tree t ON e.manager_id = t.id
)
SELECT * FROM org_tree;
-- ํ โ ์ด ๋ณํ
SELECT * FROM sales
PIVOT(SUM(amount) FOR month IN ('Jan', 'Feb', 'Mar'));
-- ์ํ๋ง (๋์ฉ๋ ํ
์ด๋ธ์์ ์ผ๋ถ๋ง)
SELECT * FROM `bigquery-public-data.samples.natality`
TABLESAMPLE SYSTEM (1 PERCENT);
์ ๋ต ๋ณด๊ธฐ์๋ณ ์ฃผ๋ฌธ ๊ฑด์๋ฅผ BigQuery ๋ฌธ๋ฒ์ผ๋ก ๊ตฌํ์ธ์.
SELECT
FORMAT_DATE('%Y-%m', order_date) AS month,
COUNT(*) AS order_count
FROM `project.dataset.orders`
GROUP BY FORMAT_DATE('%Y-%m', order_date)
ORDER BY month;
์ ๋ต ๋ณด๊ธฐ์ด๋ฉ์ผ์์ ๋๋ฉ์ธ์ ์ถ์ถํ์ธ์. (BigQuery)
SELECT
email,
SPLIT(email, '@')[OFFSET(1)] AS domain
FROM users;
-- ๋๋ ์ ๊ท์
SELECT
email,
REGEXP_EXTRACT(email, r'@(.+)') AS domain
FROM users;
์ ๋ต ๋ณด๊ธฐ๊ฐ ์ ์ ๋ณ ๊ฐ์ฅ ํฐ ์ฃผ๋ฌธ์ QUALIFY๋ก ๊ตฌํ์ธ์.
SELECT *
FROM orders
QUALIFY ROW_NUMBER() OVER(PARTITION BY user_id ORDER BY amount DESC) = 1;
์ ๋ต ๋ณด๊ธฐ์ ์ ๋ณ ๊ตฌ๋งคํ ์ํ๋ค์ ๋ฐฐ์ด๋ก ์ง๊ณํ์ธ์.
SELECT
user_id,
ARRAY_AGG(DISTINCT product ORDER BY product) AS products,
COUNT(DISTINCT product) AS product_count
FROM orders
GROUP BY user_id;
| MySQL | BigQuery |
|---|---|
DATE_FORMAT(d, '%Y-%m') | FORMAT_DATE('%Y-%m', d) |
YEAR(d) | EXTRACT(YEAR FROM d) |
DATEDIFF(a, b) | DATE_DIFF(a, b, DAY) |
SUBSTRING_INDEX(s, '@', 1) | SPLIT(s, '@')[OFFSET(0)] |
LOCATE('@', s) | STRPOS(s, '@') |
GROUP_CONCAT(x) | STRING_AGG(x, ',') |
IFNULL(x, 0) | COALESCE(x, 0) |
| CTE + WHERE rn=1 | QUALIFY |
REGEXP 'ํจํด' | REGEXP_CONTAINS(col, r'ํจํด') |
-- โ ์ ์ฒด ์ค์บ (๋น์ฉ ํผ)
SELECT * FROM huge_table;
-- โ
ํ์ํ ์ปฌ๋ผ๋ง (๋น์ฉ ์ ์ฝ)
SELECT id, name FROM huge_table;
-- โ
ํํฐ์
ํํฐ (๋น์ฉ ์ ์ฝ)
SELECT * FROM huge_table
WHERE date >= '2024-01-01';
-- ํ๋ก์ ํธ.๋ฐ์ดํฐ์
.ํ
์ด๋ธ ํ์
SELECT * FROM `my-project.my_dataset.my_table`;
-- ์๋ฌ ๋์ NULL ๋ฐํ
SELECT
SAFE_DIVIDE(a, b), -- 0์ผ๋ก ๋๋ ๋ NULL
SAFE_CAST('abc' AS INT64) -- ๋ณํ ์คํจํด๋ NULL
FROM table;
๐ BigQuery๋ MySQL๊ณผ 90% ๋น์ทํ์ง๋ง, 10%์ ์ฐจ์ด๊ฐ ์ค์ํ๋ค.
FORMAT_DATE, SPLIT, QUALIFY๋ง ๊ธฐ์ตํด๋ ๋๋ถ๋ถ ์ปค๋ฒ ๊ฐ๋ฅ!