AIVLE School 미니프로젝트
# 여기에 답안코드를 작성하세요.
import sklearn as sk
# 여기에 답안코드를 작성하세요.
import pandas as pd
# 여기에 답안코드를 작성하세요.
df = pd.read_csv("churn_data.csv")
df
# 여기에 답안코드를 작성하세요.
df1 = df.drop('customerID', axis=1)
# 여기에 답안코드를 작성하세요.
df1['TotalCharges'].dtype
df1['TotalCharges'].replace([' '], ['0'], inplace=True)
df1['TotalCharges'] = df1['TotalCharges'].astype(float)
df2=df1.copy()
# 여기에 답안코드를 작성하세요.
df2['Churn'].value_counts()
df3 = df2.copy()
df3['Churn'] = df2['Churn'].replace(['Yes', 'No'], [1, 0])
# 여기에 답안코드를 작성하세요.
print(df3.isnull().sum())
df4 = df3.copy()
df4.drop('DeviceProtection', axis=1, inplace=True)
df4.dropna(inplace=True)
df4.info()
# 여기에 답안코드를 작성하세요.
df4['SeniorCitizen'].value_counts().plot(kind='bar')
df4.drop('SeniorCitizen', axis=1, inplace=True)
df4.info()
# 여기에 답안코드를 작성하세요.
import seaborn as sns
import matplotlib.pyplot as plt
sns.histplot(data=df4, x='tenure')
plt.show()
sns.kdeplot(data=df4, x='tenure', hue='Churn')
plt.show()
print('O')
sns.heatmap(df4[['tenure','MonthlyCharges','TotalCharges']].corr(), annot=True)
print(0.83)
# 여기에 답안코드를 작성하세요.
object_cols = df4.select_dtypes('object').columns.values
df5 = pd.get_dummies(data=df4, columns=object_cols)
# 여기에 답안코드를 작성하세요.
from sklearn.model_selection import train_test_split
X = df5.drop('Churn', axis=1).values
y = df5['Churn'].values
X_train, X_valid, y_train, y_valid = train_test_split(X, y,
test_size=0.2,
stratify=y,
random_state=42)
# 여기에 답안코드를 작성하세요.
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
# 여기에 답안코드를 작성하세요.
from sklearn.linear_model import LogisticRegression
lg = LogisticRegression()
lg.fit(X_train, y_train)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(max_depth=10, random_state=42)
dt.fit(X_train, y_train)
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=3, random_state=42)
rfc.fit(X_train, y_train)
!pip install xgboost
from xgboost import XGBClassifier
xgb = XGBClassifier(n_estimators=3, random_state=42)
xgb.fit(X_train, y_train)
!pip install lightgbm
from lightgbm import LGBMClassifier
lgbm = LGBMClassifier(n_estimators=3, random_state=42)
lgbm.fit(X_train, y_train)
# 여기에 답안코드를 작성하세요.
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
y_pred = lgbm.predict(X_valid)
cm = confusion_matrix(y_valid, y_pred)
sns.heatmap(cm, annot=True)
print(classification_report(y_valid, y_pred, zero_division=1))
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
tf.random.set_seed(1)
# 여기에 답안코드를 작성하세요.
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
es = EarlyStopping(monitor='val_loss', patience=5)
checkpoint_path = 'best_model.keras'
mc = ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True)
history = model.fit(X_train, y_train, epochs=30, batch_size=16,
validation_data = (X_valid, y_valid),
callbacks=[es, mc]
)
# 여기에 답안코드를 작성하세요.
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Acc')
plt.legend(['acc', 'val_acc'])
plt.show()