ModuleLoad
import pandas as pd
import pandas_datareader as web
import numpy as np
import matplotlib.pyplot as plt
from prophet import Prophet
from datetime import datetime
%matplotlib inline
Data
pinkwink_web = pd.read_csv(
"data/05_PinkWink_Web_Traffic.csv",
encoding="utf-8",
thousands=",",
names=["date", "hit"],
index_col=0
)
pinkwink_web = pinkwink_web[pinkwink_web["hit"].notnull()]
pinkwink_web.head()
# 전체 데이터 그려보기
pinkwink_web["hit"].plot(figsize=(12, 4), grid=True);
# trend 분석을 시각화하기 위한 x축 값을 만들기
time = np.arange(0, len(pinkwink_web))
traffic = pinkwink_web["hit"].values
fx = np.linspace(0, time[-1], 1000)
# 에러를 계산할 함수
def error(f, x, y):
return np.sqrt(np.mean((f(x) - y) ** 2))
fp1 = np.polyfit(time, traffic, 1)
f1 = np.poly1d(fp1)
f2p = np.polyfit(time, traffic, 2)
f2 = np.poly1d(f2p)
f3p = np.polyfit(time, traffic, 3)
f3 = np.poly1d(f3p)
f15p = np.polyfit(time, traffic, 15)
f15 = np.poly1d(f15p)
print(error(f1, time, traffic))
print(error(f2, time, traffic))
print(error(f3, time, traffic))
print(error(f15, time, traffic))
plt.figure(figsize=(12, 4))
plt.scatter(time, traffic, s=10)
plt.plot(fx, f1(fx), lw=4, label='f1')
plt.plot(fx, f2(fx), lw=4, label='f2')
plt.plot(fx, f3(fx), lw=4, label='f3')
plt.plot(fx, f15(fx), lw=4, label='f15')
plt.grid(True, linestyle="-", color="0.75")
plt.legend(loc=2)
plt.show()
df = pd.DataFrame({"ds": pinkwink_web.index, "y": pinkwink_web["hit"]})
df.reset_index(inplace=True)
df["ds"] = pd.to_datetime(df["ds"], format="%y. %m. %d.")
del df["date"]
df.head()
m = Prophet(yearly_seasonality=True, daily_seasonality=True)
m.fit(df);
# 60일에 해당하는 데이터 예측
future = m.make_future_dataframe(periods=60)
future.tail()
# 예측 결과는 상한/하한의 범위를 포함해서 얻어진다
forecast = m.predict(future)
forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]].tail()
m.plot_components(forecast);