이어서 계속
데이터를 배열로 저장하면 스토리지의 오버헤드를 줄일 수 있으며, 카디널리티에 따라 필드를 반복할 필요가 없는 쿼리의 속도가 눈에 띄게 향상한다. 그러므로 배열의 특징을 잘 이해하고 익히는 것이 중요하다.
WITH numtrips AS (
SELECT
bike_id AS id,
COUNT(*) AS num_trips
FROM
`bigquery-public-data`.london_bicycles.cycle_hire
GROUP BY
bike_id
)
SELECT
ARRAY_AGG(STRUCT(id,num_trips) ORDER BY num_trips DESC LIMIT 100) AS bike
FROM
numtrips
2. 반복되는 필드를 저장하기 위해 배열 사용하기
-- 원본 데이터
SELECT *
FROM `bigquery-public-data`.irs_990.irs_990_2015
LIMIT 3
-- 수정 데이터
SELECT
ein
, ARRAY_AGG(STRUCT(elf, tax_pd, subseccd)) AS filing
FROM `bigquery-public-data`.irs_990.irs_990_2015
WHERE ein BETWEEN '390' AND '399'
GROUP BY ein
LIMIT 3
3. 데이터를 생성하기 위해 배열 사용하기
SELECT
GENERATE_DATE_ARRAY('2019-06-23', '2019-08-22', INTERVAL 10 DAY) AS summer
WITH days AS (
SELECT
GENERATE_DATE_ARRAY('2019-06-23', '2019-08-22', INTERVAL 10 DAY) AS summer
)
SELECT summer_day
FROM days, UNNEST(summer) AS summer_day
SELECT ['Lak', 'Jordan', 'Graham'] AS minions
WITH days AS (
SELECT
GENERATE_DATE_ARRAY('2019-06-23', '2019-08-22', INTERVAL 10 DAY) AS summer,
['Lak', 'Jordan', 'Graham'] AS minions
)
SELECT
summer[ORDINAL(dayno)] AS summer_day
, minions[OFFSET(MOD(dayno, ARRAY_LENGTH(minions)))] AS minion
FROM
days, UNNEST(GENERATE_ARRAY(1,ARRAY_LENGTH(summer),1)) dayno
ORDER BY summer_day ASC
SELECT
ARRAY_CONCAT(
GENERATE_DATE_ARRAY('2019-03-23', '2019-06-22', INTERVAL 20 DAY)
, GENERATE_DATE_ARRAY('2019-08-23', '2019-11-22', INTERVAL 20 DAY)
) AS shoulder_season
SELECT
ARRAY_TO_STRING(['A', 'B', NULL, 'D'], '*', 'na') AS arr
디버깅 목적으로 배열을 하나의 문자열로 출력하는 것은 도움이 될 수 있다. ARRAY_TO_STRING 함수는 STRING 타입의 배열만 처리하지만, TO_JSON_STRING 함수는 모든 타입의 배열을 처리 할 수 있다.
SELECT
AVG(duration)
OVER(ORDER BY start_date ASC
ROWS BETWEEN 100 PRECEDING AND 1 PRECEDING) AS average_duration
FROM
`bigquery-public-data`.london_bicycles.cycle_hire
LIMIT 5
ROWS BETWEEN 50 PRECEDING AND 50 FOLLOWING
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
SELECT
start_date
, end_date
, LAST_VALUE(start_date)
OVER(PARTITION BY bike_id
ORDER BY start_date ASC
ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING) AS next_rental_start
FROM
`bigquery-public-data`.london_bicycles.cycle_hire
LIMIT 5
SELECT
start_date
, end_date
, LEAD(start_date, 1)
OVER(PARTITION BY bike_id
ORDER BY start_date ASC) AS next_rental_start
FROM
`bigquery-public-data`.london_bicycles.cycle_hire
LIMIT 5
SELECT
start_station_id
, duration
, RANK()
OVER(PARTITION BY start_station_id ORDER BY duration DESC) AS nth_longest
FROM
`bigquery-public-data`.london_bicycles.cycle_hire
LIMIT 5
WITH longest_trips AS (
SELECT
start_station_id
, duration
, RANK()
OVER(PARTITION BY start_station_id ORDER BY duration DESC) AS nth_longest
FROM
`bigquery-public-data`.london_bicycles.cycle_hire
)
SELECT
start_station_id
, ARRAY_AGG(duration ORDER BY nth_longest LIMIT 3) AS durations
FROM
longest_trips
GROUP BY start_station_id
LIMIT 5
SELECT column_name
FROM `bigquery-public-data`.irs_990.INFORMATION_SCHEMA.COLUMNS
WHERE table_name = 'irs_990_2015'
WITH columns AS (
SELECT column_name
FROM `bigquery-public-data`.irs_990.INFORMATION_SCHEMA.COLUMNS
WHERE table_name = 'irs_990_2015' AND column_name != 'ein'
)
SELECT CONCAT(
'SELECT ein, ARRAY_AGG(STRUCT(',
ARRAY_TO_STRING(ARRAY(SELECT column_name FROM columns), ',\n '),
'\n) FROM `bigquery-public-data`.irs_990.irs_990_2015\n',
'GROUP BY ein')
------ 위의 결과 값 ------
SELECT ein, ARRAY_AGG(STRUCT(elf,
tax_pd,
subseccd,
...
) FROM `bigquery-public-data`.irs_990.irs_990_2015
GROUP BY ein
--------------------------
뒤에서 이어짐