SELECT
EXTRACT(dayofweek FROM start_date) AS dayofweek
, AVG(duration) AS duration
FROM `bigquery-public-data`.london_bicycles.cycle_hire
GROUP BY dayofweek
SELECT
EXTRACT(dayofweek FROM start_date) AS dayofweek
, AVG(duration) AS duration
FROM `bigquery-public-data`.london_bicycles.cycle_hire
GROUP BY dayofweek
SELECT
bikes_count
, AVG(duration) AS duration
FROM `bigquery-public-data`.london_bicycles.cycle_hire
JOIN `bigquery-public-data`.london_bicycles.cycle_stations
ON cycle_hire.start_station_name = cycle_stations.name
GROUP BY bikes_count
SELECT
CORR(bikes_count, duration) AS corr
FROM `bigquery-public-data`.london_bicycles.cycle_hire
JOIN `bigquery-public-data`.london_bicycles.cycle_stations
ON cycle_hire.start_station_name = cycle_stations.name
SELECT
duration
, start_station_name
, CAST(EXTRACT(dayofweek FROM start_date) AS STRING) as dayofweek
, CAST(EXTRACT(hour FROM start_date) AS STRING) AS hourofday
FROM `bigquery-public-data`.london_bicycles.cycle_hire
CREATE OR REPLACE MODEL `smart-ruler-315411`.test.bicycle_model
OPTIONS(input_label_cols=['duration'], model_type='linear_reg')
AS
SELECT
duration
, start_station_name
, CAST(EXTRACT(dayofweek FROM start_date) AS STRING) AS dayofweek
, CAST(EXTRACT(hour FROM start_date) AS STRING) AS hourofday
FROM `bigquery-public-data`.london_bicycles.cycle_hire
SELECT * FROM ML.EVALUATE(MODEL ch09eu.bicycle_model)
CREATE OR REPLACE MODEL `smart-ruler-315411`.test.bicycle_model_weekday
OPTIONS(input_label_cols=['duration'], model_type='linear_reg')
AS
SELECT
duration
, start_station_name
, IF(EXTRACT(dayofweek FROM start_date) BETWEEN 2 AND 6, 'weekday', 'weekend') as dayofweek
, CAST(EXTRACT(hour FROM start_date) AS STRING) AS hourofday
FROM `bigquery-public-data`.london_bicycles.cycle_hire
CREATE OR REPLACE MODEL ch09eu.bicycle_model_bucketized
OPTIONS(input_label_cols=['duration'], model_type='linear_reg')
AS
SELECT
duration
, start_station_name
, IF(EXTRACT(dayofweek FROM start_date) BETWEEN 2 AND 6, 'weekday', 'weekend') as dayofweek
, ML.BUCKETIZE(EXTRACT(hour FROM start_date), [5, 10, 17]) AS hourofday
FROM `bigquery-public-data`.london_bicycles.cycle_hire
CREATE OR REPLACE MODEL test.bicycle_model_bucketized
TRANSFORM(* EXCEPT(start_date)
, IF(EXTRACT(dayofweek FROM start_date) BETWEEN 2 AND 6, 'weekday', 'weekend') as dayofweek
, ML.BUCKETIZE(EXTRACT(HOUR FROM start_date), [5, 10, 17]) AS hourofday
)
OPTIONS(input_label_cols=['duration'], model_type='linear_reg')
AS
SELECT
duration
, start_station_name
, start_date
FROM `bigquery-public-data`.london_bicycles.cycle_hire
SELECT * FROM ML.PREDICT(MODEL ch09eu.bicycle_model_bucketized,
(SELECT 'Park Lane , Hyde Park' AS start_station_name
, CURRENT_TIMESTAMP() AS start_date)
)
2. 배치 예측 생성하기
DECLARE tomorrow_3am TIMESTAMP;
SET tomorrow_3am = TIMESTAMP_ADD(
TIMESTAMP(DATE_ADD(CURRENT_DATE(), INTERVAL 1 DAY)),
INTERVAL 3 HOUR);
WITH generated AS (
SELECT
name AS start_station_name
, GENERATE_TIMESTAMP_ARRAY(tomorrow_3am, TIMESTAMP_ADD(tomorrow_3am, INTERVAL 24 HOUR), INTERVAL 1 HOUR) AS dates
FROM
`bigquery-public-data`.london_bicycles.cycle_stations
),
features AS (
SELECT
start_station_name
, start_date
FROM
generated
, UNNEST(dates) AS start_date
)
SELECT * FROM ML.PREDICT(MODEL ch09eu.bicycle_model_bucketized,
(SELECT * FROM features)
)
SELECT * FROM ML.WEIGHTS(MODEL test.bicycle_model_bucketized)
빅쿼리는 dnn_regressor 및 xgboost 모델도 지원한다.
ARIMA 지원한다.
이후 9장, 10장 생략