10. SQL - BigQuery

Jasonยท2026๋…„ 1์›” 12์ผ

SQL

๋ชฉ๋ก ๋ณด๊ธฐ
10/47


SQL BigQuery ๋ฌธ๋ฒ• โ€” ์ฑ„๋„ํ†ก์ด ์“ฐ๋Š” ๊ทธ DB

๐Ÿ’ก ์ฑ„๋„ํ†ก JD์— BigQuery๊ฐ€ ์žˆ๋‹ค! MySQL๊ณผ ๋‹ค๋ฅธ ๋ฌธ๋ฒ•์„ ์ •๋ฆฌํ•œ๋‹ค.


๐Ÿค” BigQuery๊ฐ€ ๋ญ์•ผ?

Google Cloud์˜ ๋ฐ์ดํ„ฐ ์›จ์–ดํ•˜์šฐ์Šค.

  • ํŽ˜ํƒ€๋ฐ”์ดํŠธ๊ธ‰ ๋ฐ์ดํ„ฐ ์ฒ˜๋ฆฌ
  • ์„œ๋ฒ„๋ฆฌ์Šค (๊ด€๋ฆฌ ํ•„์š” ์—†์Œ)
  • SQL ๊ธฐ๋ฐ˜์ด์ง€๋งŒ ๋ฌธ๋ฒ•์ด ์กฐ๊ธˆ ๋‹ค๋ฆ„

์ฑ„๋„ํ†ก JD:

"BigQuery ๋ฐ์ดํ„ฐ ๋งˆํŠธ๋ฅผ ์ง์ ‘ ์„ค๊ณ„ํ•˜๊ณ , Dataform์„ ํ™œ์šฉํ•˜์—ฌ ๋ฐ์ดํ„ฐ ๋ชจ๋ธ๋ง"

โ†’ BigQuery ๋ฌธ๋ฒ• ํ•„์ˆ˜!


๐Ÿ“Š MySQL vs BigQuery ํ•ต์‹ฌ ์ฐจ์ด

๊ธฐ๋ŠฅMySQLBigQuery
๋ฌธ์ž์—ด ์—ฐ๊ฒฐCONCAT()CONCAT() ๋˜๋Š” `
๋‚ ์งœ ํฌ๋งทDATE_FORMAT()FORMAT_DATE()
๋‚ ์งœ ์ถ”์ถœYEAR(), MONTH()EXTRACT()
๋ฌธ์ž์—ด ๋ถ„๋ฆฌSUBSTRING_INDEX()SPLIT()
๋ฐฐ์—ด์—†์ŒARRAY, UNNEST()
๊ตฌ์กฐ์ฒด์—†์ŒSTRUCT
์ •๊ทœ์‹REGEXPREGEXP_CONTAINS()

1๏ธโƒฃ ๊ธฐ๋ณธ ๋ฌธ๋ฒ• ์ฐจ์ด

ํ…Œ์ด๋ธ” ์ฐธ์กฐ

-- MySQL
SELECT * FROM database.table;

-- BigQuery
SELECT * FROM `project.dataset.table`;
-- ๋ฐฑํ‹ฑ(`) ์‚ฌ์šฉ!

๋ณ„์นญ (AS ์„ ํƒ)

-- BigQuery์—์„œ๋Š” AS ์ƒ๋žต ๊ฐ€๋Šฅ
SELECT 
    name AS user_name,  -- AS ์žˆ์Œ
    age user_age        -- AS ์—†์Œ (๊ฐ€๋Šฅ)
FROM users;

LIMIT

-- ๋‘˜ ๋‹ค ๋™์ผ
SELECT * FROM users LIMIT 10;

2๏ธโƒฃ ๋‚ ์งœ/์‹œ๊ฐ„ ํ•จ์ˆ˜

ํ˜„์žฌ ๋‚ ์งœ/์‹œ๊ฐ„

-- MySQL
SELECT CURDATE(), NOW();

-- BigQuery
SELECT CURRENT_DATE(), CURRENT_TIMESTAMP();

๋‚ ์งœ ํฌ๋งท

-- MySQL
SELECT DATE_FORMAT(order_date, '%Y-%m');

-- BigQuery
SELECT FORMAT_DATE('%Y-%m', order_date);
-- ์ˆœ์„œ๊ฐ€ ๋ฐ˜๋Œ€! (ํฌ๋งท, ๋‚ ์งœ)

๋‚ ์งœ ์ถ”์ถœ

-- MySQL
SELECT YEAR(order_date), MONTH(order_date);

-- BigQuery
SELECT 
    EXTRACT(YEAR FROM order_date),
    EXTRACT(MONTH FROM order_date);

๋‚ ์งœ ์—ฐ์‚ฐ

-- MySQL
SELECT DATE_ADD(order_date, INTERVAL 7 DAY);
SELECT DATE_SUB(order_date, INTERVAL 1 MONTH);

-- BigQuery
SELECT DATE_ADD(order_date, INTERVAL 7 DAY);
SELECT DATE_SUB(order_date, INTERVAL 1 MONTH);
-- ๋™์ผ!

๋‚ ์งœ ์ฐจ์ด

-- MySQL
SELECT DATEDIFF(date1, date2);

-- BigQuery
SELECT DATE_DIFF(date1, date2, DAY);
-- ๋‹จ์œ„ ๋ช…์‹œ ํ•„์š” (DAY, MONTH, YEAR ๋“ฑ)

3๏ธโƒฃ ๋ฌธ์ž์—ด ํ•จ์ˆ˜

๊ธฐ๋ณธ ํ•จ์ˆ˜ (๋™์ผ)

-- ๋‘˜ ๋‹ค ๋™์ผ
SELECT 
    LENGTH(name),
    UPPER(name),
    LOWER(name),
    TRIM(name),
    CONCAT(first, ' ', last)
FROM users;

๋ถ€๋ถ„ ๋ฌธ์ž์—ด

-- MySQL
SELECT SUBSTRING(email, 1, 5);
SELECT LEFT(email, 5);

-- BigQuery
SELECT SUBSTR(email, 1, 5);  -- SUBSTRING๋„ ๊ฐ€๋Šฅ
SELECT LEFT(email, 5);       -- ๋™์ผ

๋ฌธ์ž์—ด ๋ถ„๋ฆฌ โญ

-- MySQL
SELECT SUBSTRING_INDEX(email, '@', 1);  -- @ ์•ž๋ถ€๋ถ„

-- BigQuery (๋ฐฐ์—ด ๋ฐ˜ํ™˜!)
SELECT SPLIT(email, '@')[OFFSET(0)];    -- @ ์•ž๋ถ€๋ถ„
SELECT SPLIT(email, '@')[OFFSET(1)];    -- @ ๋’ท๋ถ€๋ถ„
-- OFFSET์€ 0๋ถ€ํ„ฐ ์‹œ์ž‘!

๋ฌธ์ž์—ด ์œ„์น˜

-- MySQL
SELECT LOCATE('@', email);

-- BigQuery
SELECT STRPOS(email, '@');
-- ๊ฒฐ๊ณผ ๋™์ผ (1๋ถ€ํ„ฐ ์‹œ์ž‘)

4๏ธโƒฃ ์ •๊ทœ์‹

ํŒจํ„ด ๋งค์นญ

-- MySQL
SELECT * FROM users WHERE email REGEXP '@gmail';

-- BigQuery
SELECT * FROM users WHERE REGEXP_CONTAINS(email, r'@gmail');
-- r'' ์€ raw string (์ด์Šค์ผ€์ดํ”„ ํŽธํ•จ)

ํŒจํ„ด ์ถ”์ถœ

-- BigQuery ์ „์šฉ
SELECT REGEXP_EXTRACT(email, r'@(.+)\.') AS domain;
-- ์บก์ฒ˜ ๊ทธ๋ฃน ์ถ”์ถœ

ํŒจํ„ด ์น˜ํ™˜

-- BigQuery
SELECT REGEXP_REPLACE(phone, r'[^0-9]', '') AS clean_phone;
-- ์ˆซ์ž๋งŒ ๋‚จ๊ธฐ๊ธฐ

5๏ธโƒฃ ๋ฐฐ์—ด (ARRAY) โญ

BigQuery์˜ ๊ฐ•๋ ฅํ•œ ๊ธฐ๋Šฅ!

๋ฐฐ์—ด ์ƒ์„ฑ

SELECT [1, 2, 3] AS numbers;
SELECT ['a', 'b', 'c'] AS letters;

๋ฐฐ์—ด ์ ‘๊ทผ

SELECT 
    arr[OFFSET(0)] AS first,    -- 0๋ถ€ํ„ฐ ์‹œ์ž‘
    arr[ORDINAL(1)] AS first2,  -- 1๋ถ€ํ„ฐ ์‹œ์ž‘
    arr[SAFE_OFFSET(10)] AS safe_access  -- ๋ฒ”์œ„ ๋ฒ—์–ด๋‚˜๋ฉด NULL
FROM (SELECT [1,2,3] AS arr);

UNNEST โ€” ๋ฐฐ์—ด ํŽผ์น˜๊ธฐ

-- ๋ฐฐ์—ด์„ ํ–‰์œผ๋กœ ๋ณ€ํ™˜
SELECT num
FROM UNNEST([1, 2, 3]) AS num;
num
1
2
3

์‹ค์ „ ํ™œ์šฉ: ํƒœ๊ทธ ๋ถ„์„

-- tags ์ปฌ๋Ÿผ์ด ['์„œ์šธ', '๊ฐœ๋ฐœ', 'AI'] ํ˜•ํƒœ์ผ ๋•Œ
SELECT 
    user_id,
    tag
FROM users, UNNEST(tags) AS tag;
user_idtag
1์„œ์šธ
1๊ฐœ๋ฐœ
1AI

6๏ธโƒฃ ๊ตฌ์กฐ์ฒด (STRUCT)

๊ตฌ์กฐ์ฒด ์ƒ์„ฑ

SELECT STRUCT(1 AS id, 'kim' AS name) AS user_info;

๊ตฌ์กฐ์ฒด ์ ‘๊ทผ

SELECT 
    user_info.id,
    user_info.name
FROM (
    SELECT STRUCT(1 AS id, 'kim' AS name) AS user_info
);

7๏ธโƒฃ ์œˆ๋„์šฐ ํ•จ์ˆ˜

๊ธฐ๋ณธ (๋™์ผ)

-- ๋‘˜ ๋‹ค ๋™์ผ
SELECT 
    *,
    ROW_NUMBER() OVER(PARTITION BY user_id ORDER BY amount DESC) AS rn
FROM orders;

BigQuery ์ „์šฉ: QUALIFY โญ

-- MySQL: CTE ํ•„์š”
WITH ranked AS (
    SELECT *, ROW_NUMBER() OVER(PARTITION BY user_id ORDER BY amount DESC) AS rn
    FROM orders
)
SELECT * FROM ranked WHERE rn = 1;

-- BigQuery: QUALIFY๋กœ ๊ฐ„๋‹จํ•˜๊ฒŒ!
SELECT *
FROM orders
QUALIFY ROW_NUMBER() OVER(PARTITION BY user_id ORDER BY amount DESC) = 1;

โ†’ ์ฝ”๋“œ๊ฐ€ ํ›จ์”ฌ ๊ฐ„๊ฒฐ!


8๏ธโƒฃ ์ง‘๊ณ„ ํ•จ์ˆ˜

๋ฐฐ์—ด ์ง‘๊ณ„

-- ๊ทธ๋ฃน๋ณ„๋กœ ๊ฐ’๋“ค์„ ๋ฐฐ์—ด๋กœ
SELECT 
    user_id,
    ARRAY_AGG(product) AS products
FROM orders
GROUP BY user_id;
user_idproducts
1['๋…ธํŠธ๋ถ', '๋งˆ์šฐ์Šค', 'ํ‚ค๋ณด๋“œ']
2['๋ชจ๋‹ˆํ„ฐ']

๋ฌธ์ž์—ด ์ง‘๊ณ„

-- MySQL: GROUP_CONCAT
SELECT user_id, GROUP_CONCAT(product) FROM orders GROUP BY user_id;

-- BigQuery: STRING_AGG
SELECT user_id, STRING_AGG(product, ', ') AS products
FROM orders
GROUP BY user_id;

9๏ธโƒฃ ์กฐ๊ฑด๋ฌธ

IF

-- MySQL
SELECT IF(amount > 100000, 'VIP', '์ผ๋ฐ˜') AS tier;

-- BigQuery (๋™์ผ)
SELECT IF(amount > 100000, 'VIP', '์ผ๋ฐ˜') AS tier;

CASE WHEN (๋™์ผ)

SELECT 
    CASE 
        WHEN amount >= 1000000 THEN 'VIP'
        WHEN amount >= 100000 THEN '์šฐ์ˆ˜'
        ELSE '์ผ๋ฐ˜'
    END AS tier
FROM orders;

IFNULL / COALESCE

-- MySQL
SELECT IFNULL(amount, 0);

-- BigQuery (IFNULL๋„ ๋˜์ง€๋งŒ COALESCE ๊ถŒ์žฅ)
SELECT COALESCE(amount, 0);

๐Ÿ”Ÿ ์œ ์šฉํ•œ BigQuery ์ „์šฉ ๊ธฐ๋Šฅ

WITH RECURSIVE (๊ณ„์ธต ์ฟผ๋ฆฌ)

-- ์กฐ์ง๋„, ์นดํ…Œ๊ณ ๋ฆฌ ํŠธ๋ฆฌ ๋“ฑ
WITH RECURSIVE org_tree AS (
    SELECT id, name, manager_id, 1 AS level
    FROM employees WHERE manager_id IS NULL
    
    UNION ALL
    
    SELECT e.id, e.name, e.manager_id, t.level + 1
    FROM employees e
    JOIN org_tree t ON e.manager_id = t.id
)
SELECT * FROM org_tree;

PIVOT / UNPIVOT

-- ํ–‰ โ†’ ์—ด ๋ณ€ํ™˜
SELECT * FROM sales
PIVOT(SUM(amount) FOR month IN ('Jan', 'Feb', 'Mar'));

TABLESAMPLE

-- ์ƒ˜ํ”Œ๋ง (๋Œ€์šฉ๋Ÿ‰ ํ…Œ์ด๋ธ”์—์„œ ์ผ๋ถ€๋งŒ)
SELECT * FROM `bigquery-public-data.samples.natality`
TABLESAMPLE SYSTEM (1 PERCENT);

๐ŸŽฏ ์‹ค์ „ ๋ฌธ์ œ

๋ฌธ์ œ 1: ๋‚ ์งœ ํฌ๋งท

์›”๋ณ„ ์ฃผ๋ฌธ ๊ฑด์ˆ˜๋ฅผ BigQuery ๋ฌธ๋ฒ•์œผ๋กœ ๊ตฌํ•˜์„ธ์š”.

์ •๋‹ต ๋ณด๊ธฐ
SELECT 
    FORMAT_DATE('%Y-%m', order_date) AS month,
    COUNT(*) AS order_count
FROM `project.dataset.orders`
GROUP BY FORMAT_DATE('%Y-%m', order_date)
ORDER BY month;

๋ฌธ์ œ 2: ์ด๋ฉ”์ผ ๋„๋ฉ”์ธ ์ถ”์ถœ

์ด๋ฉ”์ผ์—์„œ ๋„๋ฉ”์ธ์„ ์ถ”์ถœํ•˜์„ธ์š”. (BigQuery)

์ •๋‹ต ๋ณด๊ธฐ
SELECT 
    email,
    SPLIT(email, '@')[OFFSET(1)] AS domain
FROM users;

-- ๋˜๋Š” ์ •๊ทœ์‹
SELECT 
    email,
    REGEXP_EXTRACT(email, r'@(.+)') AS domain
FROM users;

๋ฌธ์ œ 3: QUALIFY ํ™œ์šฉ

๊ฐ ์œ ์ €๋ณ„ ๊ฐ€์žฅ ํฐ ์ฃผ๋ฌธ์„ QUALIFY๋กœ ๊ตฌํ•˜์„ธ์š”.

์ •๋‹ต ๋ณด๊ธฐ
SELECT *
FROM orders
QUALIFY ROW_NUMBER() OVER(PARTITION BY user_id ORDER BY amount DESC) = 1;

๋ฌธ์ œ 4: ๋ฐฐ์—ด ํ™œ์šฉ

์œ ์ €๋ณ„ ๊ตฌ๋งคํ•œ ์ƒํ’ˆ๋“ค์„ ๋ฐฐ์—ด๋กœ ์ง‘๊ณ„ํ•˜์„ธ์š”.

์ •๋‹ต ๋ณด๊ธฐ
SELECT 
    user_id,
    ARRAY_AGG(DISTINCT product ORDER BY product) AS products,
    COUNT(DISTINCT product) AS product_count
FROM orders
GROUP BY user_id;

๐Ÿ“ MySQL โ†’ BigQuery ๋ณ€ํ™˜ ์น˜ํŠธ์‹œํŠธ

MySQLBigQuery
DATE_FORMAT(d, '%Y-%m')FORMAT_DATE('%Y-%m', d)
YEAR(d)EXTRACT(YEAR FROM d)
DATEDIFF(a, b)DATE_DIFF(a, b, DAY)
SUBSTRING_INDEX(s, '@', 1)SPLIT(s, '@')[OFFSET(0)]
LOCATE('@', s)STRPOS(s, '@')
GROUP_CONCAT(x)STRING_AGG(x, ',')
IFNULL(x, 0)COALESCE(x, 0)
CTE + WHERE rn=1QUALIFY
REGEXP 'ํŒจํ„ด'REGEXP_CONTAINS(col, r'ํŒจํ„ด')

๐Ÿ’ก BigQuery ํŒ

1. ๋น„์šฉ ์ฃผ์˜

-- โŒ ์ „์ฒด ์Šค์บ” (๋น„์šฉ ํผ)
SELECT * FROM huge_table;

-- โœ… ํ•„์š”ํ•œ ์ปฌ๋Ÿผ๋งŒ (๋น„์šฉ ์ ˆ์•ฝ)
SELECT id, name FROM huge_table;

-- โœ… ํŒŒํ‹ฐ์…˜ ํ•„ํ„ฐ (๋น„์šฉ ์ ˆ์•ฝ)
SELECT * FROM huge_table
WHERE date >= '2024-01-01';

2. ๋ฐฑํ‹ฑ ์‚ฌ์šฉ

-- ํ”„๋กœ์ ํŠธ.๋ฐ์ดํ„ฐ์…‹.ํ…Œ์ด๋ธ” ํ˜•์‹
SELECT * FROM `my-project.my_dataset.my_table`;

3. SAFE ํ•จ์ˆ˜

-- ์—๋Ÿฌ ๋Œ€์‹  NULL ๋ฐ˜ํ™˜
SELECT 
    SAFE_DIVIDE(a, b),           -- 0์œผ๋กœ ๋‚˜๋ˆ ๋„ NULL
    SAFE_CAST('abc' AS INT64)    -- ๋ณ€ํ™˜ ์‹คํŒจํ•ด๋„ NULL
FROM table;

๐Ÿ”— ์ฐธ๊ณ  ์ž๋ฃŒ


๐Ÿ“ BigQuery๋Š” MySQL๊ณผ 90% ๋น„์Šทํ•˜์ง€๋งŒ, 10%์˜ ์ฐจ์ด๊ฐ€ ์ค‘์š”ํ•˜๋‹ค.
FORMAT_DATE, SPLIT, QUALIFY๋งŒ ๊ธฐ์–ตํ•ด๋„ ๋Œ€๋ถ€๋ถ„ ์ปค๋ฒ„ ๊ฐ€๋Šฅ!

profile
Data Analyst | Thoughts Become Things. ํ•  ์ˆ˜ ์žˆ๋‹ค๊ณ  ์ƒ๊ฐํ•˜๋ฉด ํ•  ์ˆ˜ ์žˆ๋‹ค. ํ•  ์ˆ˜ ์—†๋‹ค๊ณ  ์ƒ๊ฐํ•˜๋ฉด ํ•  ์ˆ˜ ์—†๋‹ค. | www.linkedin.com/in/๋ช…์ˆ˜-์ œ-7ab843200

0๊ฐœ์˜ ๋Œ“๊ธ€