오늘은 진행중인 공모전 train 데이터를 사용해서 진행했다.
데이터 shape은 (748, 39)이다.
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
class ProPredictor:
def __init__(self, file_path: str):
self.df = pd.read_csv(file_path)
self.df = self.df[['time_input', 'total_class_count']].dropna()
self.model = LinearRegression()
def train_and_evaluate(self):
X = self.df[['time_input']]
y = self.df['total_class_count']
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
self.model.fit(X_train, y_train)
y_pred = self.model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"--- 분석 결과 (데이터 수: {len(self.df)}행) ---")
print(f"평균 제곱 오차(MSE: {mse: .4f}")
print(f"결정계수(R2 Score): {r2: .4f}")
# --- 실행부 ---
predictor = ProPredictor('train.csv')
predictor.train_and_evaluate()
