1) 데이터 분할: x, y
x = data.loc[:, ['AvgTemp']]
y = data.loc[:,'y']
2) 스케일링 (만약 데이터가 많으면 분할 전 스케일링 가능)
# x 스케일링
scaler = MinMaxScaler()
x = scaler.fit_transform(x)
# 데이터 확인
x.shape, y.shape
3) 3차원 데이터셋 만들기
# 시계열 데이터 전처리 2차원 --> 3차원으로 변환 함수
def temporalize(x, y, timesteps):
nfeature = x.shape[1]
output_x = []
output_y = []
for i in range(len(x) - timesteps + 1):
t = []
for j in range(timesteps):
t.append(x[[(i + j)], :])
output_x.append(t)
output_y.append(y[i + timesteps - 1])
return np.array(output_x).reshape(-1,timesteps, nfeature), np.array(output_y)
x2, y2 = temporalize(x, y, 4) # 3차원 구조
x2.shape, y2.shape
4) 데이터 분할2: train, val
x_train, x_val, y_train, y_val = train_test_split(x2, y2, test_size= 53, shuffle = False)
SimpleRNNSimpleRNNreturn_sequences 출력데이터를 다음 레이어에 전달할 크기 결정# 한개일 경우
model = Sequential([SimpleRNN(1, input_shape(timesteps, nfeatures), return_sequences=False),
Dense(1) ])
# 두개 이상일 경우
model = Sequential([SimpleRNN(16, input_shape=(timesteps, nfeatures), return_sequences=True),
SimpleRNN(8, return_sequences=True),
SimpleRNN(1, return_sequences=False),
Dense(1) ])
Dense로 연결함model = Sequential(SimpleRNN(16, input_shape=(timesteps, nfeatures), return_sequences=True),
SimpleRNN(8, return_sequences=True),
Flatten(),
Dense(8, activation='relu'),
Dense(1) ])
tanh# 3차원 나눔
timesteps = 8
x2, y2 = temporalize(x, y, timesteps)
# 데이터 분할
x_train, x_val, y_train, y_val = train_test_split(x2, y2, test_size= 53, shuffle = False)
timesteps = x_train.shape[1]
nfeatures = x_train.shape[2]
clear_session()
model4 = Sequential([LSTM(10, input_shape= (timesteps, nfeatures), return_sequences=True),
LSTM(8, return_sequences=True),
LSTM(4, return_sequences=True),
Flatten(),
Dense(8, activation='relu'),
Dense(1)])
model4.summary()
model4.compile(optimizer=Adam(0.001), loss='mse')
hist = model4.fit(x_train, y_train, epochs=200, verbose=0, validation_split=.2).history
pred4 = model4.predict(x_val)
print(mean_absolute_error(y_val, pred4))