from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from pmdarima.arima import ndiffs
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
def adf_test(series):
result = adfuller(series)
print(f'p-value: {result[1]}')
for key, value in result[4].items():
print(f'Critical Value {key}: {value}')
adf_test(df['amp'].diff().dropna())
# p-value >0.05 라면 차분 차수를 더 늘려서 테스트 해본다.
ADF(Augment Dickey-Fuller) 테스트는 시계열 데이터가 정상성(stationary)을 갖는지 판별하기 위해 사용되는 통계적 검정 방법.
ADF 테스트 결과 중 p-value값이 유의 수준(보통 0.05)보다 작을 경우, 귀무 가설을 기각하고 데이터가 정상성을 가질 가능성이 높다고 판단함.
# ACF와 PACF plot 그리기
def plot_acf_pacf(series):
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plot_acf(series, lags=20, ax=plt.gca())
plt.subplot(1,2,2)
plot_pacf(series, lags=20, ax=plt.gca())
plt.show()
plot_acf_pacf(df['amp'].diff().dropna())
# plot을 통해 p, q 값을 선정한다.
# Train, Test
codes, rmses, r2scores = [], [], []
for i, (key, df) in enumerate(df_dict.items()):
tr_len = 1200
train = df.iloc[:tr_len]['amp']
test = df.iloc[tr_len:]['amp']
model = ARIMA(train, order=(2,1,1).fit()
future_range = pd.date_range(start=test.index[0], end=test.index[-1], freq='H')
forecast = model.forecast(steps=len(future_range), alpha=0.05)
rmse = np.sqrt(mean_squared_error(test, forecast))
r2= r2_score(test, forecast)
rmses.append(rmse)
r2scores.append(r2)
codes.append(key)
arima_results = pd.DataFrame({'Code':codes, 'R2': r2scores, 'RMSE': rmses})
#시각화
plt.figure(figsize=(12,6))
plt.plot(df.index, df['amp'], label='Actual')
plt.plot(future_range, forecast, label='Forecast', color='red')
plt.title('ARIMA Forecast')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()
import statsmodels.tas.holtwinters import ExponetialSmoothing
# Train, Test
codes, rmses, r2scores = [], [], []
for i, (key, df) in enumerate(df_dict.items()):
tr_len = 1200
train = df.iloc[:tr_len]['amp']
test = df.iloc[tr_len:]['amp']
model = ExponentialSmoothing(train, trend='add', seasonal='add', seasonal_periods=test.shape[0]).fit()
future_range = pd.date_range(start=test.index[0], end=test.index[-1], freq='H')
forecast = model.forecast(steps=len(future_range))
rmse = np.sqrt(mean_squared_error(test, forecast))
r2= r2_score(test, forecast)
rmses.append(rmse)
r2scores.append(r2)
codes.append(key)
ets_results = pd.DataFrame({'Code':codes, 'R2': r2scores, 'RMSE': rmses})
#시각화
plt.figure(figsize=(12,6))
plt.plot(df.index, df['amp'], label='Actual')
plt.plot(future_range, forecast, label='Forecast', color='red')
plt.title('Holt-Winters Exponential Smoothing Forecast')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()
import prophet import Prophet
# Train, Test
codes, rmses, r2scores = [], [], []
for i, (key, df) in enumerate(df_dict.items()):
df.index.name ='ds'
df = df.reset_index()
df.rename(columns = {'amp':'y'}, inplace=True)
tr_len = 1200
train = df.iloc[:tr_len]['amp']
test = df.iloc[tr_len:]['amp']
model = Prophet()
model.fit()
future_range = pd.date_range(start=test.iloc[0]['ds'], end=test.iloc[-1]['ds'], freq='H')
future_range = pd.DataFrame({'ds': future_range})
forecast = model.predict(future_range)
rmse = np.sqrt(mean_squared_error(test['y'], forecast['yhat']))
r2= r2_score(test['y'], forecast['yhat'])
rmses.append(rmse)
r2scores.append(r2)
codes.append(key)
pro_results = pd.DataFrame({'Code':codes, 'R2': r2scores, 'RMSE': rmses})
#시각화
plt.figure(figsize=(12,6))
plt.plot(df['ds'], df['amp'], label='Actual')
plt.plot(future_range, forecast['yhat'], label='Forecast', color='red')
plt.title('Prophet Forecast')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()