--11.LOL-필승전략 분석.ipynb--
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
base_path = r'/content/drive/MyDrive/dataset'
filename = r'high_diamond_ranked_10min.csv'
filepath = os.path.join(base_path, filename)
filepath
df = pd.read_csv(filepath)
df
df.columns
df[['blueFirstBlood', 'redFirstBlood']]
df[['blueDragons', 'blueHeralds', 'redDragons', 'redHeralds']]
df[['blueGoldDiff', 'redGoldDiff']]
df.info()
df.describe()
df.describe()[['blueDragons', 'blueHeralds', 'redDragons', 'redHeralds']]
df.corr()
fig = plt.figure(figsize=(4,10))
sns.heatmap(df.corr()[['blueWins']], annot=True)
sns.histplot(x='blueGoldDiff', data = df, hue='blueWins')
sns.histplot(x='blueKills', data = df, hue='blueWins', palette='RdBu', bins=8, kde=True)
sns.jointplot(x='blueKills', y='blueGoldDiff' ,data = df, hue='blueWins')
sns.jointplot(x='blueExperienceDiff', y='blueGoldDiff' ,data = df, hue='blueWins')
sns.countplot(x='blueDragons', data=df, hue='blueWins')
df 에서 다음의 불필요한 컬럼들을 제거합니다
'gameId', 'redFirstBlood', 'redKills', 'redDeaths','redTotalGold', 'redTotalExperience', 'redGoldDiff','redExperienceDiff'
컬럼이 제거된 df 의 df.head() 출력 하기
컬럼이 제거된 df 의 column 개수 출력 하기
df = df.drop(columns = ['gameId', 'redFirstBlood', 'redKills', 'redDeaths','redTotalGold', 'redTotalExperience', 'redGoldDiff','redExperienceDiff'])
df.head()
len(df.columns)
df.info()
df
df.columns
df['redTowersDestroyed'].unique()
X_num = df[['blueWardsPlaced', 'blueWardsDestroyed',
'blueKills', 'blueDeaths', 'blueAssists', 'blueEliteMonsters',
'blueTowersDestroyed', 'blueTotalGold',
'blueAvgLevel', 'blueTotalExperience', 'blueTotalMinionsKilled',
'blueTotalJungleMinionsKilled', 'blueGoldDiff', 'blueExperienceDiff',
'blueCSPerMin', 'blueGoldPerMin', 'redWardsPlaced', 'redWardsDestroyed',
'redAssists', 'redEliteMonsters',
'redTowersDestroyed', 'redAvgLevel', 'redTotalMinionsKilled',
'redTotalJungleMinionsKilled', 'redCSPerMin', 'redGoldPerMin']]
X_cat = df[['blueFirstBlood', 'blueDragons', 'blueHeralds', 'redDragons', 'redHeralds' ]]
from sklearn.preprocessing import StandardScaler
df['blueWardsPlaced']
scaler = StandardScaler()
scaler.fit(X_num)
X_scaled = scaler.transform(X_num)
X_scaled = pd.DataFrame(data = X_scaled, index = X_num.index, columns = X_num.columns)
X_scaled
X = pd.concat([X_scaled, X_cat], axis=1)
y = df['blueWins']
X.head()
X.describe()
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
param_grid = [{
'max_iter' : [100,200,300],
'C' : [0.5, 1.0, 1.5, 2.0],
}]
gs = GridSearchCV(estimator = LogisticRegression(), param_grid = param_grid, cv = 8)
result = gs.fit(X, y)
result.bestscore
bestparams = result.bestparams
bestparams
gs.bestestimator
pd.DataFrame(result.cvresults)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
len(X_train), len(X_test)
bestparams
modellr = LogisticRegression(**best_params) # 세개의 코드 전부 의미는 같다. 택 1하여 사용.
model_lr.fit(X_train, y_train)
교차검증의 정확도 score 출력
교차검증의 정확도 score 의 평균과 표준편차 출력
from sklearn.model_selection import cross_val_score, cross_validate
scores = cross_val_score(model_lr, X, y, cv = 4) # 첫번째는 학습된 모델을 넣고, 그 다음 할당한 X, y 입력.
scores
np.mean(scores), np.std(scores)
from sklearn.metrics import classification_report
pred = model_lr.predict(X_test)
print(classification_report(y_test, pred))
<예시>
from sklearn.metrics import roc_curve
from sklearn.metrics import RocCurveDisplay
fig = plt.figure()
ax = fig.gca()
RocCurveDisplay.from_estimator(model_lr, X_test, y_test, ax = ax)
from xgboost import XGBClassifier
model_xgb = XGBClassifier(use_label_encoder=False)
model_xgb.fit(X_train, y_train)
modelxgb.feature_importances
plt.bar(X.columns, modelxgb.feature_importances)
plt.xticks(rotation=90)
plt.show()