은행의 전화 마케팅에 대해 고객의 반응 여부
data
data
display(train.head())
display(test.head())
display(submission.head())
train
| ID | age | job | marital | education | default | balance | housing | loan | contact | day | month | campaign | pdays | previous | poutcome | y |
---|
0 | 13829 | 29 | technician | single | tertiary | no | 18254 | no | no | cellular | 11 | may | 2 | -1 | 0 | unknown | no |
1 | 22677 | 26 | services | single | secondary | no | 512 | yes | yes | unknown | 5 | jun | 3 | -1 | 0 | unknown | no |
2 | 10541 | 30 | management | single | secondary | no | 135 | no | no | cellular | 14 | aug | 2 | -1 | 0 | unknown | no |
3 | 13689 | 41 | technician | married | unknown | no | 30 | yes | no | cellular | 10 | jul | 1 | -1 | 0 | unknown | no |
4 | 11304 | 27 | admin. | single | secondary | no | 321 | no | yes | unknown | 2 | sep | 1 | -1 | 0 | unknown | no |
test
| ID | age | job | marital | education | default | balance | housing | loan | contact | day | month | campaign | pdays | previous | poutcome |
---|
0 | 53608 | 32 | management | single | tertiary | no | 12569 | no | no | cellular | 1 | jul | 2 | 295 | 2 | success |
1 | 51055 | 25 | services | single | secondary | no | 801 | no | no | cellular | 5 | jun | 2 | -1 | 0 | unknown |
2 | 52573 | 46 | blue-collar | married | secondary | no | 1728 | yes | no | unknown | 26 | may | 2 | -1 | 0 | unknown |
3 | 50458 | 39 | management | divorced | secondary | no | 51 | no | no | unknown | 17 | jun | 2 | -1 | 0 | unknown |
4 | 52272 | 31 | services | single | tertiary | no | 1626 | no | no | unknown | 31 | jul | 1 | -1 | 0 | unknown |
submission
| ID |
---|
0 | 53608 |
1 | 51055 |
2 | 52573 |
3 | 50458 |
4 | 52272 |
데이터 분리
from sklearn.model_selection import train_test_split
x = train.drop(columns = ['ID', 'y'])
xd = pd.get_dummies(x)
y = train['y']
X_train, X_test, y_train, y_test = train_test_split(xd, y, stratify = y, random_state = 1)
RandomForest
predict_proba(x_test)
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)
pred = model.predict_proba(x_test)
평가
from sklearn.metrics import roc_auc_score, classification_report
print('test roc score : ',roc_auc_score(y_test,pred[:,1]))
print(f'test roc score : {roc_auc_score(y_test, pred[:,1]):.3f}')
print(f'test matrix report : \n {classification_report(y_test, model.predict(X_test))}')
test roc score : 0.7756576420890937
test
pred_test = model.predict_proba(pd.get_dummies(test.drop('ID', axis = 1)))
submission['predict'] = pred_test[:,1]
submission
display(submission.head())
submission.to_csv('submission_.csv', index=False)
| ID | predict |
---|
0 | 53608 | 0.73 |
1 | 51055 | 0.82 |
2 | 52573 | 0.00 |
3 | 50458 | 0.14 |
4 | 52272 | 0.32 |