난방 부하 단계를 예측해주세요!
예측할 값(y): Heat_Load (Very Low, Low, Medium, High, Very High)
평가: f1-macro
data: train.csv, test.csv
제출 형식: result.csv파일을 아래와 같은 형식으로 제출
pred
Very Low
Low
High
...
Very High
# 데이터 불러오기
import pandas as pd
train = pd.read_csv("")
test = pd.read_csv("")
# 데이터 크기 확인
train.shape, test.shape
# train 샘플 확인
train.head(2)
# test 샘플 확인
test.head(1)
# type 확인
train.info()
train['Roof'].value_counts()
# 기초통계 train(object)
train.describe(include='O')
# 기초통계 test(object)
test.describe(include='O')
# 결측치 확인(train)
train.isnull().sum()
# 결측치 확인(test)
test.isnull().sum().sum()
# target 확인
train['Heat_Load'].value_counts()
# target컬럼 처리
target = train.pop('Heat_Load')
# 원핫 인코딩(판다스)
print(train.shape, test.shape)
train = pd.get_dummies(train)
test = pd.get_dummies(test)
print(train.shape, test.shape)
from sklearn.model_selection import train_test_split
X_tr, X_val, y_tr, y_val = train_test_split(train,
target,
test_size=0.2,
random_state=0)
X_tr.shape, X_val.shape, y_tr.shape, y_val.shape
# 평가 함수
from sklearn.metrics import f1_score
# 랜덤포레스트
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(random_state=0)
rf.fit(X_tr, y_tr)
pred = rf.predict(X_val)
f1_score(y_val, pred, average='macro')
# LightGBM
import lightgbm as lgb
lg = lgb.LGBMClassifier(random_state=0, verbose=-1)
lg.fit(X_tr, y_tr)
pred = lg.predict(X_val)
f1_score(y_val, pred, average='macro')
# test 예측
pred = rf.predict(test)
submit = pd.DataFrame({
'pred':pred
})
submit.to_csv('result.csv', index=False)
pd.read_csv("result.csv")