Cannot access field name on a value with type ARRAY
와 같은 Error을 접하게 된다비정규화를 위해 중첩(Nested)및 반복(Repeated)열을 사용한다.
중첩된 레코드를 사용할 때 장점
- 빅쿼리 퍼포먼스 개선
- 데이터 저장 용량의 효율
- 스키마가 바뀌어도 유연하게 대응 가능
(출처 : https://cloud.google.com/bigquery/docs/nested-repeated)
파이썬의 LIST와 유사(완전히 동일하지는 않음)
하나의 행에 데이터 타입이 동일한 여러 값이 저장 됨
빅쿼리 UI에서 배열을 보여줄 때 세로로 나열 됨
SELECT
[1,2,3] AS array_sample, 1 AS int_value
UNION ALL
SELECT
[3,5,7] AS array_sample, 2 AS int_value
[
,]
) 사용SELECT [1, 2, 3] AS array_sample
SELECT ARRAY<INT64>[1, 2, 3] AS int_array
GENERATE_ARRAY
, GENERATE_DATE_ARRAY
, GENERATE_TIMESTAMP_ARRAY
등 사용GENERATE_ARRAY(시작, 종료, 간격)
: python에서 range(start, end, step)
과 동일SELECT GENERATE_ARRAY(1, 10, 2) AS generate_array_date
GENERATE_DATE_ARRAY
도 GENERATE_ARRAY
와 동일
SELECT GENERATE_DATE_ARRAY('2020-01-01', '2020-02-01', INTERVAL 1 WEEK) AS generate_date_array_data
WITH programing_languages AS
(SELECT "python" AS programing_language
UNION ALL SELECT "go" AS programing_language
UNION ALL SELECT "scala" AS programing_language)
SELECT ARRAY_AGG(programing_language) AS programing_languages_array
FROM programing_languages
- 배열 내 접근
- 배열의 N번째 값을 가져오고 싶은 경우 OFFSET, ORDINAL을 사용할 수 있다
- OFFSET: 0부터 시작
- ORDINAL: 1부터 시작
- 존재하지 않는 N을 지정하면 에러가 발생하는데, 이럴 경우 SAFE_를 앞에 붙여주면 (SAFE_OFFSET, SAFE_ORDICAL)에러가 발생하지 않고 NULL이 return된다
WITH programming_languages AS
(SELECT "python" AS programming_language
UNION ALL SELECT "go" AS programming_language
UNION ALL SELECT "scala" AS programming_language)
SELECT
ARRAY_AGG(programming_language)[OFFSET(0)] AS programming_languages_array,
ARRAY_AGG(programming_language)[ORDINAL(1)] AS programming_languages_array2
FROM programming_languages
- 배열 역순
- ARRAY_REVERSE 사용
WITH programming_languages AS
(SELECT "python" AS programming_language
UNION ALL SELECT "go" AS programming_language
UNION ALL SELECT "scala" AS programming_language)
SELECT
ARRAY_REVERSE(ARRAY_AGG(programming_language)) AS programming_languages_array_reverse
FROM programming_languages
- 배열의 길이
- ARRAY_LENGTH 함수 사용
WITH example_data AS(
SELECT
'kyle' AS name,
['Python', 'SQL', 'R', 'Julia', 'Go'] AS preferred_language,
31 AS age
UNION ALL
SELECT
'max' AS name,
['Python', 'SQL', 'Scala', 'Java', 'Kotlin'] AS preferred_language,
29 AS age
UNION ALL
SELECT
'yun' AS name,
['Python', 'SQL'] AS preferred_language,
28 AS age
)
SELECT
*,
ARRAY_LENGTH(preferred_language) AS preferred_language_len
FROM example_data
(
, )
) 사용SELECT (1,2,3) AS struct_test
- STRUCT <타입> 사용
SELECT STRUCT<INT64, INT64, STRING>(1, 2, 'HI') AS struct_test;
- ARRAY 안에 여러 STRUCT를 사용하고 싶은 경우
- ARRAY(SELECT AS STRUCT) 이런 형태로 사용
SELECT
ARRAY(
SELECT AS STRUCT 1 as hi, 2, 3
UNION ALL
SELECT AS STRUCT 4 as hi, 5, 6
) AS new_array
WITH example_data AS(
SELECT
'kyle' AS name,
['Python', 'SQL', 'R', 'Julia', 'Go'] AS preferred_language,
31 AS age
UNION ALL
SELECT
'max' AS name,
['Python', 'SQL', 'Scala', 'Java', 'Kotlin'] AS preferred_language,
29 AS age
UNION ALL
SELECT
'yun' AS name,
['Python', 'SQL'] AS preferred_language,
28 AS age
)
SELECT
name, preferred_language, age
FROM example_data
WITH example_data AS(
SELECT
'kyle' AS name,
['Python', 'SQL', 'R', 'Julia', 'Go'] AS preferred_language,
31 AS age
UNION ALL
SELECT
'max' AS name,
['Python', 'SQL', 'Scala', 'Java', 'Kotlin'] AS preferred_language,
29 AS age
UNION ALL
SELECT
'yun' AS name,
['Python', 'SQL'] AS preferred_language,
28 AS age
)
SELECT
name, prefer_lang, age
FROM example_data, UNNEST(example_data.preferred_language) as prefer_lang
여기서 where로 julia 필터링 해주면 찾을 수 있다
출처