import numpy as np # 1.18.5
import pandas as pd # 0.25.1
import gc
import time
import matplotlib.pyplot as plt # 3.2.2
import seaborn as sns # 0.10.1
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 200)
app_train = pd.read_csv('application_train.csv')
app_test = pd.read_csv('application_test.csv')
def get_apps_dataset():
app_train = pd.read_csv('application_train.csv')
app_test = pd.read_csv('application_test.csv')
apps = pd.concat([app_train, app_test])
return apps
apps = get_apps_dataset()
<ipython-input-3-da4fa99e2eb3>:4: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.
To accept the future behavior, pass 'sort=False'.
To retain the current behavior and silence the warning, pass 'sort=True'.
apps = pd.concat([app_train, app_test])
def get_apps_processed(apps):
# EXT_SOURCE_X FEATURE 가공
apps['APPS_EXT_SOURCE_MEAN'] = apps[['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3']].mean(axis=1)
apps['APPS_EXT_SOURCE_STD'] = apps[['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3']].std(axis=1)
apps['APPS_EXT_SOURCE_STD'] = apps['APPS_EXT_SOURCE_STD'].fillna(apps['APPS_EXT_SOURCE_STD'].mean())
# AMT_CREDIT 비율로 Feature 가공
apps['APPS_ANNUITY_CREDIT_RATIO'] = apps['AMT_ANNUITY']/apps['AMT_CREDIT']
apps['APPS_GOODS_CREDIT_RATIO'] = apps['AMT_GOODS_PRICE']/apps['AMT_CREDIT']
# AMT_INCOME_TOTAL 비율로 Feature 가공
apps['APPS_ANNUITY_INCOME_RATIO'] = apps['AMT_ANNUITY']/apps['AMT_INCOME_TOTAL']
apps['APPS_CREDIT_INCOME_RATIO'] = apps['AMT_CREDIT']/apps['AMT_INCOME_TOTAL']
apps['APPS_GOODS_INCOME_RATIO'] = apps['AMT_GOODS_PRICE']/apps['AMT_INCOME_TOTAL']
apps['APPS_CNT_FAM_INCOME_RATIO'] = apps['AMT_INCOME_TOTAL']/apps['CNT_FAM_MEMBERS']
# DAYS_BIRTH, DAYS_EMPLOYED 비율로 Feature 가공
apps['APPS_EMPLOYED_BIRTH_RATIO'] = apps['DAYS_EMPLOYED']/apps['DAYS_BIRTH']
apps['APPS_INCOME_EMPLOYED_RATIO'] = apps['AMT_INCOME_TOTAL']/apps['DAYS_EMPLOYED']
apps['APPS_INCOME_BIRTH_RATIO'] = apps['AMT_INCOME_TOTAL']/apps['DAYS_BIRTH']
apps['APPS_CAR_BIRTH_RATIO'] = apps['OWN_CAR_AGE'] / apps['DAYS_BIRTH']
apps['APPS_CAR_EMPLOYED_RATIO'] = apps['OWN_CAR_AGE'] / apps['DAYS_EMPLOYED']
return apps
prev = pd.read_csv('previous_application.csv')
print(prev.shape, apps.shape)
(1670214, 37) (356255, 122)
prev.head()
| SK_ID_PREV | SK_ID_CURR | NAME_CONTRACT_TYPE | AMT_ANNUITY | AMT_APPLICATION | AMT_CREDIT | AMT_DOWN_PAYMENT | AMT_GOODS_PRICE | WEEKDAY_APPR_PROCESS_START | HOUR_APPR_PROCESS_START | FLAG_LAST_APPL_PER_CONTRACT | NFLAG_LAST_APPL_IN_DAY | RATE_DOWN_PAYMENT | RATE_INTEREST_PRIMARY | RATE_INTEREST_PRIVILEGED | NAME_CASH_LOAN_PURPOSE | NAME_CONTRACT_STATUS | DAYS_DECISION | NAME_PAYMENT_TYPE | CODE_REJECT_REASON | NAME_TYPE_SUITE | NAME_CLIENT_TYPE | NAME_GOODS_CATEGORY | NAME_PORTFOLIO | NAME_PRODUCT_TYPE | CHANNEL_TYPE | SELLERPLACE_AREA | NAME_SELLER_INDUSTRY | CNT_PAYMENT | NAME_YIELD_GROUP | PRODUCT_COMBINATION | DAYS_FIRST_DRAWING | DAYS_FIRST_DUE | DAYS_LAST_DUE_1ST_VERSION | DAYS_LAST_DUE | DAYS_TERMINATION | NFLAG_INSURED_ON_APPROVAL | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2030495 | 271877 | Consumer loans | 1730.430 | 17145.0 | 17145.0 | 0.0 | 17145.0 | SATURDAY | 15 | Y | 1 | 0.0 | 0.182832 | 0.867336 | XAP | Approved | -73 | Cash through the bank | XAP | NaN | Repeater | Mobile | POS | XNA | Country-wide | 35 | Connectivity | 12.0 | middle | POS mobile with interest | 365243.0 | -42.0 | 300.0 | -42.0 | -37.0 | 0.0 |
| 1 | 2802425 | 108129 | Cash loans | 25188.615 | 607500.0 | 679671.0 | NaN | 607500.0 | THURSDAY | 11 | Y | 1 | NaN | NaN | NaN | XNA | Approved | -164 | XNA | XAP | Unaccompanied | Repeater | XNA | Cash | x-sell | Contact center | -1 | XNA | 36.0 | low_action | Cash X-Sell: low | 365243.0 | -134.0 | 916.0 | 365243.0 | 365243.0 | 1.0 |
| 2 | 2523466 | 122040 | Cash loans | 15060.735 | 112500.0 | 136444.5 | NaN | 112500.0 | TUESDAY | 11 | Y | 1 | NaN | NaN | NaN | XNA | Approved | -301 | Cash through the bank | XAP | Spouse, partner | Repeater | XNA | Cash | x-sell | Credit and cash offices | -1 | XNA | 12.0 | high | Cash X-Sell: high | 365243.0 | -271.0 | 59.0 | 365243.0 | 365243.0 | 1.0 |
| 3 | 2819243 | 176158 | Cash loans | 47041.335 | 450000.0 | 470790.0 | NaN | 450000.0 | MONDAY | 7 | Y | 1 | NaN | NaN | NaN | XNA | Approved | -512 | Cash through the bank | XAP | NaN | Repeater | XNA | Cash | x-sell | Credit and cash offices | -1 | XNA | 12.0 | middle | Cash X-Sell: middle | 365243.0 | -482.0 | -152.0 | -182.0 | -177.0 | 1.0 |
| 4 | 1784265 | 202054 | Cash loans | 31924.395 | 337500.0 | 404055.0 | NaN | 337500.0 | THURSDAY | 9 | Y | 1 | NaN | NaN | NaN | Repairs | Refused | -781 | Cash through the bank | HC | NaN | Repeater | XNA | Cash | walk-in | Credit and cash offices | -1 | XNA | 24.0 | high | Cash Street: high | NaN | NaN | NaN | NaN | NaN | NaN |
prev_app_outer = prev.merge(apps['SK_ID_CURR'], on='SK_ID_CURR', how='outer', indicator=True)
| SK_ID_PREV | SK_ID_CURR | NAME_CONTRACT_TYPE | AMT_ANNUITY | AMT_APPLICATION | AMT_CREDIT | AMT_DOWN_PAYMENT | AMT_GOODS_PRICE | WEEKDAY_APPR_PROCESS_START | HOUR_APPR_PROCESS_START | FLAG_LAST_APPL_PER_CONTRACT | NFLAG_LAST_APPL_IN_DAY | RATE_DOWN_PAYMENT | RATE_INTEREST_PRIMARY | RATE_INTEREST_PRIVILEGED | NAME_CASH_LOAN_PURPOSE | NAME_CONTRACT_STATUS | DAYS_DECISION | NAME_PAYMENT_TYPE | CODE_REJECT_REASON | NAME_TYPE_SUITE | NAME_CLIENT_TYPE | NAME_GOODS_CATEGORY | NAME_PORTFOLIO | NAME_PRODUCT_TYPE | CHANNEL_TYPE | SELLERPLACE_AREA | NAME_SELLER_INDUSTRY | CNT_PAYMENT | NAME_YIELD_GROUP | PRODUCT_COMBINATION | DAYS_FIRST_DRAWING | DAYS_FIRST_DUE | DAYS_LAST_DUE_1ST_VERSION | DAYS_LAST_DUE | DAYS_TERMINATION | NFLAG_INSURED_ON_APPROVAL | _merge | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2030495.0 | 271877 | Consumer loans | 1730.430 | 17145.0 | 17145.0 | 0.0 | 17145.0 | SATURDAY | 15.0 | Y | 1.0 | 0.000000 | 0.182832 | 0.867336 | XAP | Approved | -73.0 | Cash through the bank | XAP | NaN | Repeater | Mobile | POS | XNA | Country-wide | 35.0 | Connectivity | 12.0 | middle | POS mobile with interest | 365243.0 | -42.0 | 300.0 | -42.0 | -37.0 | 0.0 | both |
| 1 | 1696966.0 | 271877 | Consumer loans | 68258.655 | 1800000.0 | 1754721.0 | 180000.0 | 1800000.0 | SATURDAY | 18.0 | Y | 1.0 | 0.101325 | NaN | NaN | XAP | Refused | -472.0 | Cash through the bank | SCO | NaN | Repeater | Clothing and Accessories | POS | XNA | Regional / Local | 55.0 | Furniture | 36.0 | low_normal | POS industry with interest | NaN | NaN | NaN | NaN | NaN | NaN | both |
| 2 | 2154916.0 | 271877 | Consumer loans | 12417.390 | 108400.5 | 119848.5 | 0.0 | 108400.5 | SUNDAY | 14.0 | Y | 1.0 | 0.000000 | NaN | NaN | XAP | Approved | -548.0 | Cash through the bank | XAP | NaN | New | Furniture | POS | XNA | Stone | 196.0 | Furniture | 12.0 | middle | POS industry with interest | 365243.0 | -512.0 | -182.0 | -392.0 | -387.0 | 0.0 | both |
| 3 | 2802425.0 | 108129 | Cash loans | 25188.615 | 607500.0 | 679671.0 | NaN | 607500.0 | THURSDAY | 11.0 | Y | 1.0 | NaN | NaN | NaN | XNA | Approved | -164.0 | XNA | XAP | Unaccompanied | Repeater | XNA | Cash | x-sell | Contact center | -1.0 | XNA | 36.0 | low_action | Cash X-Sell: low | 365243.0 | -134.0 | 916.0 | 365243.0 | 365243.0 | 1.0 | both |
| 4 | 1536272.0 | 108129 | Cash loans | 21709.125 | 450000.0 | 512370.0 | NaN | 450000.0 | WEDNESDAY | 9.0 | Y | 1.0 | NaN | NaN | NaN | XNA | Approved | -515.0 | XNA | XAP | NaN | Repeater | XNA | Cash | x-sell | AP+ (Cash loan) | 6.0 | XNA | 36.0 | low_normal | Cash X-Sell: low | 365243.0 | -485.0 | 565.0 | -155.0 | -147.0 | 1.0 | both |
prev_app_outer['_merge'].value_counts()
both 1670214
right_only 17398
left_only 0
Name: _merge, dtype: int64
| Table | 컬럼명 | 컬럼 대분류 | 컬럼 중분류 | 컬럼 설명 |
|---|---|---|---|---|
| previous_application.csv | SK_ID_PREV | 대출 | 고유ID | 과거 대출 고유 ID |
| previous_application.csv | SK_ID_CURR | 대출 | 고유ID | 현재 대출 고유 ID |
| previous_application.csv | NAME_CONTRACT_TYPE | 대출 | 대출 유형 | 대출 유형 |
| previous_application.csv | AMT_ANNUITY | 대출 | 대출 금액 | 월 대출 지급액 |
| previous_application.csv | AMT_APPLICATION | 대출 | 대출 금액 | 대출 신청 금액 |
| previous_application.csv | AMT_CREDIT | 대출 | 대출 금액 | 대출금액(허가) |
| previous_application.csv | AMT_DOWN_PAYMENT | 대출 | 대출 금액 | 대출 시 납부한 선금액 |
| previous_application.csv | AMT_GOODS_PRICE | 대출 | 대출 금액 | 소비자 대출상품액 |
| previous_application.csv | WEEKDAY_APPR_PROCESS_START | 고객 | 행동 | 대출 신청 시작 요일 |
| previous_application.csv | HOUR_APPR_PROCESS_START | 고객 | 행동 | 대출 신청 시작 시간대 |
| previous_application.csv | FLAG_LAST_APPL_PER_CONTRACT | 고객 | 행동 | 이전 계약의 마지막 대출 신청 여부 |
| previous_application.csv | NFLAG_LAST_APPL_IN_DAY | 고객 | 행동 | 하루중 마지막 대출 신청 여부(하루에 여러 번 대출 신청했을 경우) |
| previous_application.csv | NFLAG_MICRO_CASH | 대출 | 대출 유형 | 소액 대출 여부 |
| previous_application.csv | RATE_DOWN_PAYMENT | 대출 | 대출 금액 | 선금 비율(정규화됨) |
| previous_application.csv | RATE_INTEREST_PRIMARY | 대출 | 대출 금액 | 이자율 |
| previous_application.csv | RATE_INTEREST_PRIVILEGED | 대출 | 대출 금액 | 이자율 |
| previous_application.csv | NAME_CASH_LOAN_PURPOSE | 대출 | 대출 유형 | 현금 대출 목적 |
| previous_application.csv | NAME_CONTRACT_STATUS | 대출 | 대출 상태 | 대출 상태(허가, 취소) |
| previous_application.csv | DAYS_DECISION | 대출 | 대출 상태 | 과거 신청 대비 현재 신청 결정 기간 |
| previous_application.csv | NAME_PAYMENT_TYPE | 대출 | 대출 유형 | 과거 대출 신청의 납부 방법 |
| previous_application.csv | CODE_REJECT_REASON | 대출 | 대출 상태 | 과거 신청 거절 사유 |
| previous_application.csv | NAME_TYPE_SUITE | 고객 | 행동(추천) | 동행 고객 |
| previous_application.csv | NAME_CLIENT_TYPE | 고객 | 행동 | 신규 고객 또는 기존 대출 고객 여부 |
| previous_application.csv | NAME_GOODS_CATEGORY | 대출 | 대출 유형 | 대출 상품 중분류 유형 |
| previous_application.csv | NAME_PORTFOLIO | 대출 | 대출 유형 | 현금대출/POS/CAR 대출 유형 |
| previous_application.csv | NAME_PRODUCT_TYPE | 채널 | 판매 유형 | 고객이 찾아온 대출인가, 영업 대출인가 |
| previous_application.csv | CHANNEL_TYPE | 채널 | 채널 유형 | 채널 유형 |
| previous_application.csv | SELLERPLACE_AREA | 채널 | 채널 유형 | 판매자 판매 지역 |
| previous_application.csv | NAME_SELLER_INDUSTRY | 채널 | 채널 유형 | 판매자 Industry |
| previous_application.csv | CNT_PAYMENT | 대출 | 대출 금액 | 이전 대출 신청의 대출금액 관련 Term |
| previous_application.csv | NAME_YIELD_GROUP | 대출 | 대출 금액 | 집단 금리 적용 유형 |
| previous_application.csv | PRODUCT_COMBINATION | 대출 | 대출 유형 | 이전 대출 결합 상품 |
| previous_application.csv | DAYS_FIRST_DRAWING | 대출 | 상태 | 신청날짜부터 최초 대출 지급까지의 일자 |
| previous_application.csv | DAYS_FIRST_DUE | 대출 | 상태 | 신청날짜부터 마감일까지의 일자 |
| previous_application.csv | DAYS_LAST_DUE_1ST_VERSION | 대출 | 상태 | 신청날짜부터 첫 만기일까지의 일자 |
| previous_application.csv | DAYS_LAST_DUE | 대출 | 상태 | 신청날짜부터 마지막 만기일까지의 일자 |
| previous_application.csv | DAYS_TERMINATION | 대출 | 상태 | 현 대출 신청일자 대비 대출 예상 종료 일자 |
| previous_application.csv | NFLAG_INSURED_ON_APPROVAL | 대출 | 상태 | 대출 신청 시 보험가입 요청여부 |
prev.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1670214 entries, 0 to 1670213
Data columns (total 37 columns):
SK_ID_PREV 1670214 non-null int64
SK_ID_CURR 1670214 non-null int64
NAME_CONTRACT_TYPE 1670214 non-null object
AMT_ANNUITY 1297979 non-null float64
AMT_APPLICATION 1670214 non-null float64
AMT_CREDIT 1670213 non-null float64
AMT_DOWN_PAYMENT 774370 non-null float64
AMT_GOODS_PRICE 1284699 non-null float64
WEEKDAY_APPR_PROCESS_START 1670214 non-null object
HOUR_APPR_PROCESS_START 1670214 non-null int64
FLAG_LAST_APPL_PER_CONTRACT 1670214 non-null object
NFLAG_LAST_APPL_IN_DAY 1670214 non-null int64
RATE_DOWN_PAYMENT 774370 non-null float64
RATE_INTEREST_PRIMARY 5951 non-null float64
RATE_INTEREST_PRIVILEGED 5951 non-null float64
NAME_CASH_LOAN_PURPOSE 1670214 non-null object
NAME_CONTRACT_STATUS 1670214 non-null object
DAYS_DECISION 1670214 non-null int64
NAME_PAYMENT_TYPE 1670214 non-null object
CODE_REJECT_REASON 1670214 non-null object
NAME_TYPE_SUITE 849809 non-null object
NAME_CLIENT_TYPE 1670214 non-null object
NAME_GOODS_CATEGORY 1670214 non-null object
NAME_PORTFOLIO 1670214 non-null object
NAME_PRODUCT_TYPE 1670214 non-null object
CHANNEL_TYPE 1670214 non-null object
SELLERPLACE_AREA 1670214 non-null int64
NAME_SELLER_INDUSTRY 1670214 non-null object
CNT_PAYMENT 1297984 non-null float64
NAME_YIELD_GROUP 1670214 non-null object
PRODUCT_COMBINATION 1669868 non-null object
DAYS_FIRST_DRAWING 997149 non-null float64
DAYS_FIRST_DUE 997149 non-null float64
DAYS_LAST_DUE_1ST_VERSION 997149 non-null float64
DAYS_LAST_DUE 997149 non-null float64
DAYS_TERMINATION 997149 non-null float64
NFLAG_INSURED_ON_APPROVAL 997149 non-null float64
dtypes: float64(15), int64(6), object(16)
memory usage: 471.5+ MB
prev.groupby('SK_ID_CURR')['SK_ID_CURR'].count().mean()
4.928964135313716
sns.boxplot(prev.groupby('SK_ID_CURR')['SK_ID_CURR'].count())

app_prev = prev.merge(app_train[['SK_ID_CURR', 'TARGET']], on='SK_ID_CURR', how='left')
app_prev.shape
(1670214, 38)
def show_hist_by_target(df, columns):
cond_1 = (df['TARGET'] == 1)
cond_0 = (df['TARGET'] == 0)
for column in columns:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(12, 4), squeeze=False)
sns.violinplot(x='TARGET', y=column, data=df, ax=axs[0][0] )
sns.distplot(df[cond_0][column], ax=axs[0][1], label='0', color='blue')
sns.distplot(df[cond_1][column], ax=axs[0][1], label='1', color='red')
num_columns = app_prev.dtypes[app_prev.dtypes != 'object'].index.tolist()
num_columns = [ column for column in num_columns if column not in ['SK_ID_PREV', 'SK_ID_CURR', 'TARGET']]
print(num_columns)
['AMT_ANNUITY', 'AMT_APPLICATION', 'AMT_CREDIT', 'AMT_DOWN_PAYMENT', 'AMT_GOODS_PRICE', 'HOUR_APPR_PROCESS_START', 'NFLAG_LAST_APPL_IN_DAY', 'RATE_DOWN_PAYMENT', 'RATE_INTEREST_PRIMARY', 'RATE_INTEREST_PRIVILEGED', 'DAYS_DECISION', 'SELLERPLACE_AREA', 'CNT_PAYMENT', 'DAYS_FIRST_DRAWING', 'DAYS_FIRST_DUE', 'DAYS_LAST_DUE_1ST_VERSION', 'DAYS_LAST_DUE', 'DAYS_TERMINATION', 'NFLAG_INSURED_ON_APPROVAL']
show_hist_by_target(app_prev, num_columns)
/Users/andylee/opt/anaconda3/lib/python3.8/site-packages/seaborn/distributions.py:369: UserWarning: Default bandwidth for data is 0; skipping density estimation.
warnings.warn(msg, UserWarning)
/Users/andylee/opt/anaconda3/lib/python3.8/site-packages/seaborn/distributions.py:369: UserWarning: Default bandwidth for data is 0; skipping density estimation.
warnings.warn(msg, UserWarning)
/Users/andylee/opt/anaconda3/lib/python3.8/site-packages/seaborn/distributions.py:369: UserWarning: Default bandwidth for data is 0; skipping density estimation.
warnings.warn(msg, UserWarning)
/Users/andylee/opt/anaconda3/lib/python3.8/site-packages/seaborn/distributions.py:369: UserWarning: Default bandwidth for data is 0; skipping density estimation.
warnings.warn(msg, UserWarning)



















object_columns = app_prev.dtypes[app_prev.dtypes=='object'].index.tolist()
object_columns
['NAME_CONTRACT_TYPE',
'WEEKDAY_APPR_PROCESS_START',
'FLAG_LAST_APPL_PER_CONTRACT',
'NAME_CASH_LOAN_PURPOSE',
'NAME_CONTRACT_STATUS',
'NAME_PAYMENT_TYPE',
'CODE_REJECT_REASON',
'NAME_TYPE_SUITE',
'NAME_CLIENT_TYPE',
'NAME_GOODS_CATEGORY',
'NAME_PORTFOLIO',
'NAME_PRODUCT_TYPE',
'CHANNEL_TYPE',
'NAME_SELLER_INDUSTRY',
'NAME_YIELD_GROUP',
'PRODUCT_COMBINATION']
def show_category_by_target(df, columns):
for column in columns:
chart = sns.catplot(x=column, col="TARGET", data=df, kind="count")
chart.set_xticklabels(rotation=65)
show_category_by_target(app_prev, object_columns)
















select sk_id_curr, count(*), avg(amt_credit) , max(amt_credit), min(amt_credit) from previous group by sk_id_curr
prev.groupby('SK_ID_CURR')
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fcfc8bfba90>
prev_group = prev.groupby('SK_ID_CURR')
prev_agg = pd.DataFrame()
prev_agg['CNT'] = prev_group['SK_ID_CURR'].count()
prev_agg['AVG_CREDIT'] = prev_group['AMT_CREDIT'].mean()
prev_agg['MAX_CREDIT'] = prev_group['AMT_CREDIT'].max()
prev_agg['MIN_CREDIT'] = prev_group['AMT_CREDIT'].min()
prev_agg.head()
| CNT | AVG_CREDIT | MAX_CREDIT | MIN_CREDIT | |
|---|---|---|---|---|
| SK_ID_CURR | ||||
| 100001 | 1 | 23787.00 | 23787.0 | 23787.0 |
| 100002 | 1 | 179055.00 | 179055.0 | 179055.0 |
| 100003 | 3 | 484191.00 | 1035882.0 | 68053.5 |
| 100004 | 1 | 20106.00 | 20106.0 | 20106.0 |
| 100005 | 2 | 20076.75 | 40153.5 | 0.0 |
prev_group = prev.groupby('SK_ID_CURR')
prev_agg1 = prev_group['AMT_CREDIT'].agg(['mean', 'max', 'min'])
prev_agg2 = prev_group['AMT_ANNUITY'].agg(['mean', 'max', 'min'])
prev_agg = prev_agg1.merge(prev_agg2, on='SK_ID_CURR', how='inner')
prev_agg.head()
| mean_x | max_x | min_x | mean_y | max_y | min_y | |
|---|---|---|---|---|---|---|
| SK_ID_CURR | ||||||
| 100001 | 23787.00 | 23787.0 | 23787.0 | 3951.000 | 3951.000 | 3951.000 |
| 100002 | 179055.00 | 179055.0 | 179055.0 | 9251.775 | 9251.775 | 9251.775 |
| 100003 | 484191.00 | 1035882.0 | 68053.5 | 56553.990 | 98356.995 | 6737.310 |
| 100004 | 20106.00 | 20106.0 | 20106.0 | 5357.250 | 5357.250 | 5357.250 |
| 100005 | 20076.75 | 40153.5 | 0.0 | 4813.200 | 4813.200 | 4813.200 |
agg_dict = {
'SK_ID_CURR':['count'],
'AMT_CREDIT':['mean', 'max', 'sum'],
'AMT_ANNUITY':['mean', 'max', 'sum'],
'AMT_APPLICATION':['mean', 'max', 'sum'],
'AMT_DOWN_PAYMENT':['mean', 'max', 'sum'],
'AMT_GOODS_PRICE':['mean', 'max', 'sum']
}
prev_group = prev.groupby('SK_ID_CURR')
prev_amt_agg = prev_group.agg(agg_dict)
prev_amt_agg.head()
| SK_ID_CURR | AMT_CREDIT | AMT_ANNUITY | AMT_APPLICATION | AMT_DOWN_PAYMENT | AMT_GOODS_PRICE | |||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | mean | max | sum | mean | max | sum | mean | max | sum | mean | max | sum | mean | max | sum | |
| SK_ID_CURR | ||||||||||||||||
| 100001 | 1 | 23787.00 | 23787.0 | 23787.0 | 3951.000 | 3951.000 | 3951.000 | 24835.50 | 24835.5 | 24835.5 | 2520.0 | 2520.0 | 2520.0 | 24835.5 | 24835.5 | 24835.5 |
| 100002 | 1 | 179055.00 | 179055.0 | 179055.0 | 9251.775 | 9251.775 | 9251.775 | 179055.00 | 179055.0 | 179055.0 | 0.0 | 0.0 | 0.0 | 179055.0 | 179055.0 | 179055.0 |
| 100003 | 3 | 484191.00 | 1035882.0 | 1452573.0 | 56553.990 | 98356.995 | 169661.970 | 435436.50 | 900000.0 | 1306309.5 | 3442.5 | 6885.0 | 6885.0 | 435436.5 | 900000.0 | 1306309.5 |
| 100004 | 1 | 20106.00 | 20106.0 | 20106.0 | 5357.250 | 5357.250 | 5357.250 | 24282.00 | 24282.0 | 24282.0 | 4860.0 | 4860.0 | 4860.0 | 24282.0 | 24282.0 | 24282.0 |
| 100005 | 2 | 20076.75 | 40153.5 | 40153.5 | 4813.200 | 4813.200 | 4813.200 | 22308.75 | 44617.5 | 44617.5 | 4464.0 | 4464.0 | 4464.0 | 44617.5 | 44617.5 | 44617.5 |
칼럼 인덱스가 계층 구조로 되어있어서 예를 들면 AMT_CREDIT_MEAN 이런식으로 바꾸려 함
prev_amt_agg.columns
MultiIndex([( 'SK_ID_CURR', 'count'),
( 'AMT_CREDIT', 'mean'),
( 'AMT_CREDIT', 'max'),
( 'AMT_CREDIT', 'sum'),
( 'AMT_ANNUITY', 'mean'),
( 'AMT_ANNUITY', 'max'),
( 'AMT_ANNUITY', 'sum'),
( 'AMT_APPLICATION', 'mean'),
( 'AMT_APPLICATION', 'max'),
( 'AMT_APPLICATION', 'sum'),
('AMT_DOWN_PAYMENT', 'mean'),
('AMT_DOWN_PAYMENT', 'max'),
('AMT_DOWN_PAYMENT', 'sum'),
( 'AMT_GOODS_PRICE', 'mean'),
( 'AMT_GOODS_PRICE', 'max'),
( 'AMT_GOODS_PRICE', 'sum')],
)
# [(column[0]+'_'+column[1].upper()) for column in prev_amt_agg.columns]
prev_amt_agg.columns = ['PREV_' + ('_').join(column).upper() for column in prev_amt_agg.columns.ravel()]
prev_amt_agg.head()
| PREV_SK_ID_CURR_COUNT | PREV_AMT_CREDIT_MEAN | PREV_AMT_CREDIT_MAX | PREV_AMT_CREDIT_SUM | PREV_AMT_ANNUITY_MEAN | PREV_AMT_ANNUITY_MAX | PREV_AMT_ANNUITY_SUM | PREV_AMT_APPLICATION_MEAN | PREV_AMT_APPLICATION_MAX | PREV_AMT_APPLICATION_SUM | PREV_AMT_DOWN_PAYMENT_MEAN | PREV_AMT_DOWN_PAYMENT_MAX | PREV_AMT_DOWN_PAYMENT_SUM | PREV_AMT_GOODS_PRICE_MEAN | PREV_AMT_GOODS_PRICE_MAX | PREV_AMT_GOODS_PRICE_SUM | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SK_ID_CURR | ||||||||||||||||
| 100001 | 1 | 23787.00 | 23787.0 | 23787.0 | 3951.000 | 3951.000 | 3951.000 | 24835.50 | 24835.5 | 24835.5 | 2520.0 | 2520.0 | 2520.0 | 24835.5 | 24835.5 | 24835.5 |
| 100002 | 1 | 179055.00 | 179055.0 | 179055.0 | 9251.775 | 9251.775 | 9251.775 | 179055.00 | 179055.0 | 179055.0 | 0.0 | 0.0 | 0.0 | 179055.0 | 179055.0 | 179055.0 |
| 100003 | 3 | 484191.00 | 1035882.0 | 1452573.0 | 56553.990 | 98356.995 | 169661.970 | 435436.50 | 900000.0 | 1306309.5 | 3442.5 | 6885.0 | 6885.0 | 435436.5 | 900000.0 | 1306309.5 |
| 100004 | 1 | 20106.00 | 20106.0 | 20106.0 | 5357.250 | 5357.250 | 5357.250 | 24282.00 | 24282.0 | 24282.0 | 4860.0 | 4860.0 | 4860.0 | 24282.0 | 24282.0 | 24282.0 |
| 100005 | 2 | 20076.75 | 40153.5 | 40153.5 | 4813.200 | 4813.200 | 4813.200 | 22308.75 | 44617.5 | 44617.5 | 4464.0 | 4464.0 | 4464.0 | 44617.5 | 44617.5 | 44617.5 |
prev['PREV_CREDIT_DIFF'] = prev['AMT_APPLICATION'] - prev['AMT_CREDIT']
prev['PREV_GOODS_DIFF'] = prev['AMT_APPLICATION'] - prev['AMT_GOODS_PRICE']
prev['PREV_CREDIT_APPL_RATIO'] = prev['AMT_CREDIT']/prev['AMT_APPLICATION']
prev['PREV_ANNUITY_APPL_RATIO'] = prev['AMT_ANNUITY']/prev['AMT_APPLICATION']
prev['PREV_GOODS_APPL_RATIO'] = prev['AMT_GOODS_PRICE']/prev['AMT_APPLICATION']
prev['DAYS_FIRST_DRAWING'].replace(365243, np.nan, inplace=True)
prev['DAYS_FIRST_DUE'].replace(365243, np.nan, inplace= True)
prev['DAYS_LAST_DUE_1ST_VERSION'].replace(365243, np.nan, inplace= True)
prev['DAYS_LAST_DUE'].replace(365243, np.nan, inplace= True)
prev['DAYS_TERMINATION'].replace(365243, np.nan, inplace= True)
# 첫번째 만기일과 마지막 만기일까지의 기간
prev['PREV_DAYS_LAST_DUE_DIFF'] = prev['DAYS_LAST_DUE_1ST_VERSION'] - prev['DAYS_LAST_DUE']
prev.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1670214 entries, 0 to 1670213
Data columns (total 43 columns):
SK_ID_PREV 1670214 non-null int64
SK_ID_CURR 1670214 non-null int64
NAME_CONTRACT_TYPE 1670214 non-null object
AMT_ANNUITY 1297979 non-null float64
AMT_APPLICATION 1670214 non-null float64
AMT_CREDIT 1670213 non-null float64
AMT_DOWN_PAYMENT 774370 non-null float64
AMT_GOODS_PRICE 1284699 non-null float64
WEEKDAY_APPR_PROCESS_START 1670214 non-null object
HOUR_APPR_PROCESS_START 1670214 non-null int64
FLAG_LAST_APPL_PER_CONTRACT 1670214 non-null object
NFLAG_LAST_APPL_IN_DAY 1670214 non-null int64
RATE_DOWN_PAYMENT 774370 non-null float64
RATE_INTEREST_PRIMARY 5951 non-null float64
RATE_INTEREST_PRIVILEGED 5951 non-null float64
NAME_CASH_LOAN_PURPOSE 1670214 non-null object
NAME_CONTRACT_STATUS 1670214 non-null object
DAYS_DECISION 1670214 non-null int64
NAME_PAYMENT_TYPE 1670214 non-null object
CODE_REJECT_REASON 1670214 non-null object
NAME_TYPE_SUITE 849809 non-null object
NAME_CLIENT_TYPE 1670214 non-null object
NAME_GOODS_CATEGORY 1670214 non-null object
NAME_PORTFOLIO 1670214 non-null object
NAME_PRODUCT_TYPE 1670214 non-null object
CHANNEL_TYPE 1670214 non-null object
SELLERPLACE_AREA 1670214 non-null int64
NAME_SELLER_INDUSTRY 1670214 non-null object
CNT_PAYMENT 1297984 non-null float64
NAME_YIELD_GROUP 1670214 non-null object
PRODUCT_COMBINATION 1669868 non-null object
DAYS_FIRST_DRAWING 62705 non-null float64
DAYS_FIRST_DUE 956504 non-null float64
DAYS_LAST_DUE_1ST_VERSION 903285 non-null float64
DAYS_LAST_DUE 785928 non-null float64
DAYS_TERMINATION 771236 non-null float64
NFLAG_INSURED_ON_APPROVAL 997149 non-null float64
PREV_CREDIT_DIFF 1670213 non-null float64
PREV_GOODS_DIFF 1284699 non-null float64
PREV_CREDIT_APPL_RATIO 1333446 non-null float64
PREV_ANNUITY_APPL_RATIO 1296344 non-null float64
PREV_GOODS_APPL_RATIO 1277830 non-null float64
PREV_DAYS_LAST_DUE_DIFF 761917 non-null float64
dtypes: float64(21), int64(6), object(16)
memory usage: 547.9+ MB
CNT_PAYMENT 는 전체 납부해야하는 횟수, AMT_ANNUITY 월 별 납부해야하는 금액 -> 이것으로 간단하게 이자율 계산
all_pay = prev['AMT_ANNUITY'] * prev['CNT_PAYMENT']
prev['PREV_INTERESTS_RATE'] = (all_pay/prev['AMT_CREDIT'] - 1)/prev['CNT_PAYMENT']
prev.iloc[:, -7:].head(10)
| PREV_CREDIT_DIFF | PREV_GOODS_DIFF | PREV_CREDIT_APPL_RATIO | PREV_ANNUITY_APPL_RATIO | PREV_GOODS_APPL_RATIO | PREV_DAYS_LAST_DUE_DIFF | PREV_INTERESTS_RATE | |
|---|---|---|---|---|---|---|---|
| 0 | 0.0 | 0.0 | 1.000000 | 0.100929 | 1.0 | 342.0 | 0.017596 |
| 1 | -72171.0 | 0.0 | 1.118800 | 0.041463 | 1.0 | NaN | 0.009282 |
| 2 | -23944.5 | 0.0 | 1.212840 | 0.133873 | 1.0 | NaN | 0.027047 |
| 3 | -20790.0 | 0.0 | 1.046200 | 0.104536 | 1.0 | 30.0 | 0.016587 |
| 4 | -66555.0 | 0.0 | 1.197200 | 0.094591 | 1.0 | NaN | 0.037343 |
| 5 | -25573.5 | 0.0 | 1.081186 | 0.075251 | 1.0 | 0.0 | 0.014044 |
| 6 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN |
| 7 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN |
| 8 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN |
| 9 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN |
agg_dict = {
# 기존 컬럼.
'SK_ID_CURR':['count'],
'AMT_CREDIT':['mean', 'max', 'sum'],
'AMT_ANNUITY':['mean', 'max', 'sum'],
'AMT_APPLICATION':['mean', 'max', 'sum'],
'AMT_DOWN_PAYMENT':['mean', 'max', 'sum'],
'AMT_GOODS_PRICE':['mean', 'max', 'sum'],
'RATE_DOWN_PAYMENT': ['min', 'max', 'mean'],
'DAYS_DECISION': ['min', 'max', 'mean'],
'CNT_PAYMENT': ['mean', 'sum'],
# 가공 컬럼
'PREV_CREDIT_DIFF':['mean', 'max', 'sum'],
'PREV_CREDIT_APPL_RATIO':['mean', 'max'],
'PREV_GOODS_DIFF':['mean', 'max', 'sum'],
'PREV_GOODS_APPL_RATIO':['mean', 'max'],
'PREV_DAYS_LAST_DUE_DIFF':['mean', 'max', 'sum'],
'PREV_INTERESTS_RATE':['mean', 'max']
}
prev_group = prev.groupby('SK_ID_CURR')
prev_amt_agg = prev_group.agg(agg_dict)
prev_amt_agg.columns = ['PREV_'+('_').join(column).upper() for column in prev_amt_agg.columns.ravel()]
prev_amt_agg.head(10)
| PREV_SK_ID_CURR_COUNT | PREV_AMT_CREDIT_MEAN | PREV_AMT_CREDIT_MAX | PREV_AMT_CREDIT_SUM | PREV_AMT_ANNUITY_MEAN | PREV_AMT_ANNUITY_MAX | PREV_AMT_ANNUITY_SUM | PREV_AMT_APPLICATION_MEAN | PREV_AMT_APPLICATION_MAX | PREV_AMT_APPLICATION_SUM | PREV_AMT_DOWN_PAYMENT_MEAN | PREV_AMT_DOWN_PAYMENT_MAX | PREV_AMT_DOWN_PAYMENT_SUM | PREV_AMT_GOODS_PRICE_MEAN | PREV_AMT_GOODS_PRICE_MAX | PREV_AMT_GOODS_PRICE_SUM | PREV_RATE_DOWN_PAYMENT_MIN | PREV_RATE_DOWN_PAYMENT_MAX | PREV_RATE_DOWN_PAYMENT_MEAN | PREV_DAYS_DECISION_MIN | PREV_DAYS_DECISION_MAX | PREV_DAYS_DECISION_MEAN | PREV_CNT_PAYMENT_MEAN | PREV_CNT_PAYMENT_SUM | PREV_PREV_CREDIT_DIFF_MEAN | PREV_PREV_CREDIT_DIFF_MAX | PREV_PREV_CREDIT_DIFF_SUM | PREV_PREV_CREDIT_APPL_RATIO_MEAN | PREV_PREV_CREDIT_APPL_RATIO_MAX | PREV_PREV_GOODS_DIFF_MEAN | PREV_PREV_GOODS_DIFF_MAX | PREV_PREV_GOODS_DIFF_SUM | PREV_PREV_GOODS_APPL_RATIO_MEAN | PREV_PREV_GOODS_APPL_RATIO_MAX | PREV_PREV_DAYS_LAST_DUE_DIFF_MEAN | PREV_PREV_DAYS_LAST_DUE_DIFF_MAX | PREV_PREV_DAYS_LAST_DUE_DIFF_SUM | PREV_PREV_INTERESTS_RATE_MEAN | PREV_PREV_INTERESTS_RATE_MAX | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SK_ID_CURR | |||||||||||||||||||||||||||||||||||||||
| 100001 | 1 | 23787.000000 | 23787.0 | 23787.0 | 3951.000000 | 3951.000 | 3951.000 | 24835.500000 | 24835.5 | 24835.50 | 2520.000000 | 2520.0 | 2520.00 | 24835.500000 | 24835.5 | 24835.50 | 0.104326 | 0.104326 | 0.104326 | -1740 | -1740 | -1740.000000 | 8.000000 | 8.0 | 1048.500000 | 1048.5 | 1048.50 | 0.957782 | 0.957782 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 120.0 | 120.0 | 120.0 | 0.041099 | 0.041099 |
| 100002 | 1 | 179055.000000 | 179055.0 | 179055.0 | 9251.775000 | 9251.775 | 9251.775 | 179055.000000 | 179055.0 | 179055.00 | 0.000000 | 0.0 | 0.00 | 179055.000000 | 179055.0 | 179055.00 | 0.000000 | 0.000000 | 0.000000 | -606 | -606 | -606.000000 | 24.000000 | 24.0 | 0.000000 | 0.0 | 0.00 | 1.000000 | 1.000000 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 150.0 | 150.0 | 150.0 | 0.010003 | 0.010003 |
| 100003 | 3 | 484191.000000 | 1035882.0 | 1452573.0 | 56553.990000 | 98356.995 | 169661.970 | 435436.500000 | 900000.0 | 1306309.50 | 3442.500000 | 6885.0 | 6885.00 | 435436.500000 | 900000.0 | 1306309.50 | 0.000000 | 0.100061 | 0.050030 | -2341 | -746 | -1305.000000 | 10.000000 | 30.0 | -48754.500000 | 756.0 | -146263.50 | 1.057664 | 1.150980 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 50.0 | 150.0 | 150.0 | 0.015272 | 0.018533 |
| 100004 | 1 | 20106.000000 | 20106.0 | 20106.0 | 5357.250000 | 5357.250 | 5357.250 | 24282.000000 | 24282.0 | 24282.00 | 4860.000000 | 4860.0 | 4860.00 | 24282.000000 | 24282.0 | 24282.00 | 0.212008 | 0.212008 | 0.212008 | -815 | -815 | -815.000000 | 4.000000 | 4.0 | 4176.000000 | 4176.0 | 4176.00 | 0.828021 | 0.828021 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 30.0 | 30.0 | 30.0 | 0.016450 | 0.016450 |
| 100005 | 2 | 20076.750000 | 40153.5 | 40153.5 | 4813.200000 | 4813.200 | 4813.200 | 22308.750000 | 44617.5 | 44617.50 | 4464.000000 | 4464.0 | 4464.00 | 44617.500000 | 44617.5 | 44617.50 | 0.108964 | 0.108964 | 0.108964 | -757 | -315 | -536.000000 | 12.000000 | 12.0 | 2232.000000 | 4464.0 | 4464.00 | 0.899950 | 0.899950 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 90.0 | 90.0 | 90.0 | 0.036537 | 0.036537 |
| 100006 | 9 | 291695.500000 | 906615.0 | 2625259.5 | 23651.175000 | 39954.510 | 141907.050 | 272203.260000 | 688500.0 | 2449829.34 | 34840.170000 | 66987.0 | 69680.34 | 408304.890000 | 688500.0 | 2449829.34 | 0.108994 | 0.217830 | 0.163412 | -617 | -181 | -272.444444 | 23.000000 | 138.0 | -19492.240000 | 66987.0 | -175430.16 | 1.012684 | 1.316797 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 810.0 | 1410.0 | 1620.0 | -inf | 0.027424 |
| 100007 | 6 | 166638.750000 | 284400.0 | 999832.5 | 12278.805000 | 22678.785 | 73672.830 | 150530.250000 | 247500.0 | 903181.50 | 3390.750000 | 3676.5 | 6781.50 | 150530.250000 | 247500.0 | 903181.50 | 0.100143 | 0.218890 | 0.159516 | -2357 | -374 | -1222.833333 | 20.666667 | 124.0 | -16108.500000 | 2560.5 | -96651.00 | 1.046356 | 1.264000 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 7.5 | 30.0 | 30.0 | 0.026538 | 0.036164 |
| 100008 | 5 | 162767.700000 | 501975.0 | 813838.5 | 15839.696250 | 25309.575 | 63358.785 | 155701.800000 | 450000.0 | 778509.00 | 5548.500000 | 12145.5 | 16645.50 | 194627.250000 | 450000.0 | 778509.00 | 0.000000 | 0.110243 | 0.073051 | -2536 | -82 | -1192.000000 | 14.000000 | 56.0 | -7065.900000 | 12145.5 | -35329.50 | 0.978569 | 1.115500 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 165.0 | 660.0 | 660.0 | 0.018055 | 0.034034 |
| 100009 | 7 | 70137.642857 | 98239.5 | 490963.5 | 10051.412143 | 17341.605 | 70359.885 | 76741.714286 | 110160.0 | 537192.00 | 9203.142857 | 22032.0 | 64422.00 | 76741.714286 | 110160.0 | 537192.00 | 0.000000 | 0.209525 | 0.126602 | -1562 | -74 | -719.285714 | 8.000000 | 56.0 | 6604.071429 | 17671.5 | 46228.50 | 0.916226 | 1.000000 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.019536 | 0.030703 |
| 100010 | 1 | 260811.000000 | 260811.0 | 260811.0 | 27463.410000 | 27463.410 | 27463.410 | 247212.000000 | 247212.0 | 247212.00 | 0.000000 | 0.0 | 0.00 | 247212.000000 | 247212.0 | 247212.00 | 0.000000 | 0.000000 | 0.000000 | -1070 | -1070 | -1070.000000 | 10.000000 | 10.0 | -13599.000000 | -13599.0 | -13599.00 | 1.055009 | 1.055009 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.005300 | 0.005300 |
prev_amt_agg.shape
(338857, 39)
prev['NAME_CONTRACT_STATUS'].value_counts()
Approved 1036781
Canceled 316319
Refused 290678
Unused offer 26436
Name: NAME_CONTRACT_STATUS, dtype: int64
select sk_id_curr, cnt_refused/cnt
from
(
select sk_id_curr, count(*) cnt, count(case when name_contract_status == 'Refused' end) cnt_refused
from previous group by sk_id_curr
)
cond_refused = (prev['NAME_CONTRACT_STATUS'] == 'Refused')
prev_refused = prev[cond_refused]
prev_refused.shape, prev.shape
((290678, 44), (1670214, 44))
prev_refused_agg = prev_refused.groupby('SK_ID_CURR')['SK_ID_CURR'].count()
prev_refused_agg.shape, prev_amt_agg.shape
((118277,), (338857, 39))
prev_refused_agg
SK_ID_CURR
100006 1
100011 1
100027 1
100030 10
100035 8
..
456244 1
456247 1
456249 1
456250 1
456255 2
Name: SK_ID_CURR, Length: 118277, dtype: int64
# 칼럼과 인덱스 이름이 같아서 안된다. 일부러 error 냈음
prev_amt_agg.merge(prev_refused_agg, how='left', on='SK_ID_CURR')
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-54-3bccc0d949fd> in <module>
----> 1 prev_amt_agg.merge(prev_refused_agg, how='left', on='SK_ID_CURR')
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py in merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
7319 from pandas.core.reshape.merge import merge
7320
-> 7321 return merge(
7322 self,
7323 right,
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/reshape/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
66 validate=None,
67 ):
---> 68 op = _MergeOperation(
69 left,
70 right,
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/reshape/merge.py in __init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy, indicator, validate)
624 self.right_join_keys,
625 self.join_names,
--> 626 ) = self._get_merge_keys()
627
628 # validate the merge keys dtypes. We may need to coerce
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/reshape/merge.py in _get_merge_keys(self)
973 if not is_rkey(rk):
974 if rk is not None:
--> 975 right_keys.append(right._get_label_or_level_values(rk))
976 else:
977 # work-around for merge_asof(right_index=True)
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py in _get_label_or_level_values(self, key, axis)
1767
1768 if self._is_label_reference(key, axis=axis):
-> 1769 self._check_label_or_level_ambiguity(key, axis=axis)
1770 values = self.xs(key, axis=other_axes[0])._values
1771 elif self._is_level_reference(key, axis=axis):
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py in _check_label_or_level_ambiguity(self, key, axis)
1727 label_type=label_type,
1728 )
-> 1729 raise ValueError(msg)
1730
1731 def _get_label_or_level_values(self, key, axis=0):
ValueError: 'SK_ID_CURR' is both an index level and a column label, which is ambiguous.
pd.DataFrame(prev_refused_agg)
| SK_ID_CURR | |
|---|---|
| SK_ID_CURR | |
| 100006 | 1 |
| 100011 | 1 |
| 100027 | 1 |
| 100030 | 10 |
| 100035 | 8 |
| ... | ... |
| 456244 | 1 |
| 456247 | 1 |
| 456249 | 1 |
| 456250 | 1 |
| 456255 | 2 |
118277 rows × 1 columns
prev_refused_agg.reset_index(name='PREV_REFUSED_COUNT')
| SK_ID_CURR | PREV_REFUSED_COUNT | |
|---|---|---|
| 0 | 100006 | 1 |
| 1 | 100011 | 1 |
| 2 | 100027 | 1 |
| 3 | 100030 | 10 |
| 4 | 100035 | 8 |
| ... | ... | ... |
| 118272 | 456244 | 1 |
| 118273 | 456247 | 1 |
| 118274 | 456249 | 1 |
| 118275 | 456250 | 1 |
| 118276 | 456255 | 2 |
118277 rows × 2 columns
prev_refused_agg = prev_refused_agg.reset_index(name='PREV_REFUSED_COUNT')
prev_amt_agg = prev_amt_agg.reset_index()
prev_amt_refused_agg = prev_amt_agg.merge(prev_refused_agg, on='SK_ID_CURR', how='left')
prev_amt_refused_agg.head(10)
| SK_ID_CURR | PREV_SK_ID_CURR_COUNT | PREV_AMT_CREDIT_MEAN | PREV_AMT_CREDIT_MAX | PREV_AMT_CREDIT_SUM | PREV_AMT_ANNUITY_MEAN | PREV_AMT_ANNUITY_MAX | PREV_AMT_ANNUITY_SUM | PREV_AMT_APPLICATION_MEAN | PREV_AMT_APPLICATION_MAX | PREV_AMT_APPLICATION_SUM | PREV_AMT_DOWN_PAYMENT_MEAN | PREV_AMT_DOWN_PAYMENT_MAX | PREV_AMT_DOWN_PAYMENT_SUM | PREV_AMT_GOODS_PRICE_MEAN | PREV_AMT_GOODS_PRICE_MAX | PREV_AMT_GOODS_PRICE_SUM | PREV_RATE_DOWN_PAYMENT_MIN | PREV_RATE_DOWN_PAYMENT_MAX | PREV_RATE_DOWN_PAYMENT_MEAN | PREV_DAYS_DECISION_MIN | PREV_DAYS_DECISION_MAX | PREV_DAYS_DECISION_MEAN | PREV_CNT_PAYMENT_MEAN | PREV_CNT_PAYMENT_SUM | PREV_PREV_CREDIT_DIFF_MEAN | PREV_PREV_CREDIT_DIFF_MAX | PREV_PREV_CREDIT_DIFF_SUM | PREV_PREV_CREDIT_APPL_RATIO_MEAN | PREV_PREV_CREDIT_APPL_RATIO_MAX | PREV_PREV_GOODS_DIFF_MEAN | PREV_PREV_GOODS_DIFF_MAX | PREV_PREV_GOODS_DIFF_SUM | PREV_PREV_GOODS_APPL_RATIO_MEAN | PREV_PREV_GOODS_APPL_RATIO_MAX | PREV_PREV_DAYS_LAST_DUE_DIFF_MEAN | PREV_PREV_DAYS_LAST_DUE_DIFF_MAX | PREV_PREV_DAYS_LAST_DUE_DIFF_SUM | PREV_PREV_INTERESTS_RATE_MEAN | PREV_PREV_INTERESTS_RATE_MAX | PREV_REFUSED_COUNT | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 100001 | 1 | 23787.000000 | 23787.0 | 23787.0 | 3951.000000 | 3951.000 | 3951.000 | 24835.500000 | 24835.5 | 24835.50 | 2520.000000 | 2520.0 | 2520.00 | 24835.500000 | 24835.5 | 24835.50 | 0.104326 | 0.104326 | 0.104326 | -1740 | -1740 | -1740.000000 | 8.000000 | 8.0 | 1048.500000 | 1048.5 | 1048.50 | 0.957782 | 0.957782 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 120.0 | 120.0 | 120.0 | 0.041099 | 0.041099 | NaN |
| 1 | 100002 | 1 | 179055.000000 | 179055.0 | 179055.0 | 9251.775000 | 9251.775 | 9251.775 | 179055.000000 | 179055.0 | 179055.00 | 0.000000 | 0.0 | 0.00 | 179055.000000 | 179055.0 | 179055.00 | 0.000000 | 0.000000 | 0.000000 | -606 | -606 | -606.000000 | 24.000000 | 24.0 | 0.000000 | 0.0 | 0.00 | 1.000000 | 1.000000 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 150.0 | 150.0 | 150.0 | 0.010003 | 0.010003 | NaN |
| 2 | 100003 | 3 | 484191.000000 | 1035882.0 | 1452573.0 | 56553.990000 | 98356.995 | 169661.970 | 435436.500000 | 900000.0 | 1306309.50 | 3442.500000 | 6885.0 | 6885.00 | 435436.500000 | 900000.0 | 1306309.50 | 0.000000 | 0.100061 | 0.050030 | -2341 | -746 | -1305.000000 | 10.000000 | 30.0 | -48754.500000 | 756.0 | -146263.50 | 1.057664 | 1.150980 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 50.0 | 150.0 | 150.0 | 0.015272 | 0.018533 | NaN |
| 3 | 100004 | 1 | 20106.000000 | 20106.0 | 20106.0 | 5357.250000 | 5357.250 | 5357.250 | 24282.000000 | 24282.0 | 24282.00 | 4860.000000 | 4860.0 | 4860.00 | 24282.000000 | 24282.0 | 24282.00 | 0.212008 | 0.212008 | 0.212008 | -815 | -815 | -815.000000 | 4.000000 | 4.0 | 4176.000000 | 4176.0 | 4176.00 | 0.828021 | 0.828021 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 30.0 | 30.0 | 30.0 | 0.016450 | 0.016450 | NaN |
| 4 | 100005 | 2 | 20076.750000 | 40153.5 | 40153.5 | 4813.200000 | 4813.200 | 4813.200 | 22308.750000 | 44617.5 | 44617.50 | 4464.000000 | 4464.0 | 4464.00 | 44617.500000 | 44617.5 | 44617.50 | 0.108964 | 0.108964 | 0.108964 | -757 | -315 | -536.000000 | 12.000000 | 12.0 | 2232.000000 | 4464.0 | 4464.00 | 0.899950 | 0.899950 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 90.0 | 90.0 | 90.0 | 0.036537 | 0.036537 | NaN |
| 5 | 100006 | 9 | 291695.500000 | 906615.0 | 2625259.5 | 23651.175000 | 39954.510 | 141907.050 | 272203.260000 | 688500.0 | 2449829.34 | 34840.170000 | 66987.0 | 69680.34 | 408304.890000 | 688500.0 | 2449829.34 | 0.108994 | 0.217830 | 0.163412 | -617 | -181 | -272.444444 | 23.000000 | 138.0 | -19492.240000 | 66987.0 | -175430.16 | 1.012684 | 1.316797 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 810.0 | 1410.0 | 1620.0 | -inf | 0.027424 | 1.0 |
| 6 | 100007 | 6 | 166638.750000 | 284400.0 | 999832.5 | 12278.805000 | 22678.785 | 73672.830 | 150530.250000 | 247500.0 | 903181.50 | 3390.750000 | 3676.5 | 6781.50 | 150530.250000 | 247500.0 | 903181.50 | 0.100143 | 0.218890 | 0.159516 | -2357 | -374 | -1222.833333 | 20.666667 | 124.0 | -16108.500000 | 2560.5 | -96651.00 | 1.046356 | 1.264000 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 7.5 | 30.0 | 30.0 | 0.026538 | 0.036164 | NaN |
| 7 | 100008 | 5 | 162767.700000 | 501975.0 | 813838.5 | 15839.696250 | 25309.575 | 63358.785 | 155701.800000 | 450000.0 | 778509.00 | 5548.500000 | 12145.5 | 16645.50 | 194627.250000 | 450000.0 | 778509.00 | 0.000000 | 0.110243 | 0.073051 | -2536 | -82 | -1192.000000 | 14.000000 | 56.0 | -7065.900000 | 12145.5 | -35329.50 | 0.978569 | 1.115500 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 165.0 | 660.0 | 660.0 | 0.018055 | 0.034034 | NaN |
| 8 | 100009 | 7 | 70137.642857 | 98239.5 | 490963.5 | 10051.412143 | 17341.605 | 70359.885 | 76741.714286 | 110160.0 | 537192.00 | 9203.142857 | 22032.0 | 64422.00 | 76741.714286 | 110160.0 | 537192.00 | 0.000000 | 0.209525 | 0.126602 | -1562 | -74 | -719.285714 | 8.000000 | 56.0 | 6604.071429 | 17671.5 | 46228.50 | 0.916226 | 1.000000 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.019536 | 0.030703 | NaN |
| 9 | 100010 | 1 | 260811.000000 | 260811.0 | 260811.0 | 27463.410000 | 27463.410 | 27463.410 | 247212.000000 | 247212.0 | 247212.00 | 0.000000 | 0.0 | 0.00 | 247212.000000 | 247212.0 | 247212.00 | 0.000000 | 0.000000 | 0.000000 | -1070 | -1070 | -1070.000000 | 10.000000 | 10.0 | -13599.000000 | -13599.0 | -13599.00 | 1.055009 | 1.055009 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.005300 | 0.005300 | NaN |
prev_amt_refused_agg['PREV_REFUSED_COUNT'].value_counts(dropna=False)
NaN 220580
1.0 54616
2.0 26793
3.0 14025
4.0 8243
5.0 4859
6.0 3147
7.0 2069
8.0 1247
9.0 871
10.0 656
11.0 437
12.0 340
13.0 233
14.0 157
15.0 149
16.0 94
17.0 62
18.0 54
19.0 48
20.0 28
21.0 25
22.0 21
24.0 18
25.0 12
23.0 11
26.0 11
27.0 8
28.0 5
29.0 5
32.0 4
41.0 3
31.0 3
37.0 3
30.0 3
39.0 2
35.0 2
34.0 2
33.0 2
36.0 2
68.0 1
47.0 1
43.0 1
54.0 1
51.0 1
64.0 1
58.0 1
Name: PREV_REFUSED_COUNT, dtype: int64
prev_amt_refused_agg = prev_amt_refused_agg.fillna(0)
prev_amt_refused_agg['PREV_REFUSE_RATIO'] = prev_amt_refused_agg['PREV_REFUSED_COUNT'] / prev_amt_refused_agg['PREV_SK_ID_CURR_COUNT']
prev_amt_refused_agg.head(10)
| SK_ID_CURR | PREV_SK_ID_CURR_COUNT | PREV_AMT_CREDIT_MEAN | PREV_AMT_CREDIT_MAX | PREV_AMT_CREDIT_SUM | PREV_AMT_ANNUITY_MEAN | PREV_AMT_ANNUITY_MAX | PREV_AMT_ANNUITY_SUM | PREV_AMT_APPLICATION_MEAN | PREV_AMT_APPLICATION_MAX | PREV_AMT_APPLICATION_SUM | PREV_AMT_DOWN_PAYMENT_MEAN | PREV_AMT_DOWN_PAYMENT_MAX | PREV_AMT_DOWN_PAYMENT_SUM | PREV_AMT_GOODS_PRICE_MEAN | PREV_AMT_GOODS_PRICE_MAX | PREV_AMT_GOODS_PRICE_SUM | PREV_RATE_DOWN_PAYMENT_MIN | PREV_RATE_DOWN_PAYMENT_MAX | PREV_RATE_DOWN_PAYMENT_MEAN | PREV_DAYS_DECISION_MIN | PREV_DAYS_DECISION_MAX | PREV_DAYS_DECISION_MEAN | PREV_CNT_PAYMENT_MEAN | PREV_CNT_PAYMENT_SUM | PREV_PREV_CREDIT_DIFF_MEAN | PREV_PREV_CREDIT_DIFF_MAX | PREV_PREV_CREDIT_DIFF_SUM | PREV_PREV_CREDIT_APPL_RATIO_MEAN | PREV_PREV_CREDIT_APPL_RATIO_MAX | PREV_PREV_GOODS_DIFF_MEAN | PREV_PREV_GOODS_DIFF_MAX | PREV_PREV_GOODS_DIFF_SUM | PREV_PREV_GOODS_APPL_RATIO_MEAN | PREV_PREV_GOODS_APPL_RATIO_MAX | PREV_PREV_DAYS_LAST_DUE_DIFF_MEAN | PREV_PREV_DAYS_LAST_DUE_DIFF_MAX | PREV_PREV_DAYS_LAST_DUE_DIFF_SUM | PREV_PREV_INTERESTS_RATE_MEAN | PREV_PREV_INTERESTS_RATE_MAX | PREV_REFUSED_COUNT | PREV_REFUSE_RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 100001 | 1 | 23787.000000 | 23787.0 | 23787.0 | 3951.000000 | 3951.000 | 3951.000 | 24835.500000 | 24835.5 | 24835.50 | 2520.000000 | 2520.0 | 2520.00 | 24835.500000 | 24835.5 | 24835.50 | 0.104326 | 0.104326 | 0.104326 | -1740 | -1740 | -1740.000000 | 8.000000 | 8.0 | 1048.500000 | 1048.5 | 1048.50 | 0.957782 | 0.957782 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 120.0 | 120.0 | 120.0 | 0.041099 | 0.041099 | 0.0 | 0.000000 |
| 1 | 100002 | 1 | 179055.000000 | 179055.0 | 179055.0 | 9251.775000 | 9251.775 | 9251.775 | 179055.000000 | 179055.0 | 179055.00 | 0.000000 | 0.0 | 0.00 | 179055.000000 | 179055.0 | 179055.00 | 0.000000 | 0.000000 | 0.000000 | -606 | -606 | -606.000000 | 24.000000 | 24.0 | 0.000000 | 0.0 | 0.00 | 1.000000 | 1.000000 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 150.0 | 150.0 | 150.0 | 0.010003 | 0.010003 | 0.0 | 0.000000 |
| 2 | 100003 | 3 | 484191.000000 | 1035882.0 | 1452573.0 | 56553.990000 | 98356.995 | 169661.970 | 435436.500000 | 900000.0 | 1306309.50 | 3442.500000 | 6885.0 | 6885.00 | 435436.500000 | 900000.0 | 1306309.50 | 0.000000 | 0.100061 | 0.050030 | -2341 | -746 | -1305.000000 | 10.000000 | 30.0 | -48754.500000 | 756.0 | -146263.50 | 1.057664 | 1.150980 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 50.0 | 150.0 | 150.0 | 0.015272 | 0.018533 | 0.0 | 0.000000 |
| 3 | 100004 | 1 | 20106.000000 | 20106.0 | 20106.0 | 5357.250000 | 5357.250 | 5357.250 | 24282.000000 | 24282.0 | 24282.00 | 4860.000000 | 4860.0 | 4860.00 | 24282.000000 | 24282.0 | 24282.00 | 0.212008 | 0.212008 | 0.212008 | -815 | -815 | -815.000000 | 4.000000 | 4.0 | 4176.000000 | 4176.0 | 4176.00 | 0.828021 | 0.828021 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 30.0 | 30.0 | 30.0 | 0.016450 | 0.016450 | 0.0 | 0.000000 |
| 4 | 100005 | 2 | 20076.750000 | 40153.5 | 40153.5 | 4813.200000 | 4813.200 | 4813.200 | 22308.750000 | 44617.5 | 44617.50 | 4464.000000 | 4464.0 | 4464.00 | 44617.500000 | 44617.5 | 44617.50 | 0.108964 | 0.108964 | 0.108964 | -757 | -315 | -536.000000 | 12.000000 | 12.0 | 2232.000000 | 4464.0 | 4464.00 | 0.899950 | 0.899950 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 90.0 | 90.0 | 90.0 | 0.036537 | 0.036537 | 0.0 | 0.000000 |
| 5 | 100006 | 9 | 291695.500000 | 906615.0 | 2625259.5 | 23651.175000 | 39954.510 | 141907.050 | 272203.260000 | 688500.0 | 2449829.34 | 34840.170000 | 66987.0 | 69680.34 | 408304.890000 | 688500.0 | 2449829.34 | 0.108994 | 0.217830 | 0.163412 | -617 | -181 | -272.444444 | 23.000000 | 138.0 | -19492.240000 | 66987.0 | -175430.16 | 1.012684 | 1.316797 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 810.0 | 1410.0 | 1620.0 | -inf | 0.027424 | 1.0 | 0.111111 |
| 6 | 100007 | 6 | 166638.750000 | 284400.0 | 999832.5 | 12278.805000 | 22678.785 | 73672.830 | 150530.250000 | 247500.0 | 903181.50 | 3390.750000 | 3676.5 | 6781.50 | 150530.250000 | 247500.0 | 903181.50 | 0.100143 | 0.218890 | 0.159516 | -2357 | -374 | -1222.833333 | 20.666667 | 124.0 | -16108.500000 | 2560.5 | -96651.00 | 1.046356 | 1.264000 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 7.5 | 30.0 | 30.0 | 0.026538 | 0.036164 | 0.0 | 0.000000 |
| 7 | 100008 | 5 | 162767.700000 | 501975.0 | 813838.5 | 15839.696250 | 25309.575 | 63358.785 | 155701.800000 | 450000.0 | 778509.00 | 5548.500000 | 12145.5 | 16645.50 | 194627.250000 | 450000.0 | 778509.00 | 0.000000 | 0.110243 | 0.073051 | -2536 | -82 | -1192.000000 | 14.000000 | 56.0 | -7065.900000 | 12145.5 | -35329.50 | 0.978569 | 1.115500 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 165.0 | 660.0 | 660.0 | 0.018055 | 0.034034 | 0.0 | 0.000000 |
| 8 | 100009 | 7 | 70137.642857 | 98239.5 | 490963.5 | 10051.412143 | 17341.605 | 70359.885 | 76741.714286 | 110160.0 | 537192.00 | 9203.142857 | 22032.0 | 64422.00 | 76741.714286 | 110160.0 | 537192.00 | 0.000000 | 0.209525 | 0.126602 | -1562 | -74 | -719.285714 | 8.000000 | 56.0 | 6604.071429 | 17671.5 | 46228.50 | 0.916226 | 1.000000 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.019536 | 0.030703 | 0.0 | 0.000000 |
| 9 | 100010 | 1 | 260811.000000 | 260811.0 | 260811.0 | 27463.410000 | 27463.410 | 27463.410 | 247212.000000 | 247212.0 | 247212.00 | 0.000000 | 0.0 | 0.00 | 247212.000000 | 247212.0 | 247212.00 | 0.000000 | 0.000000 | 0.000000 | -1070 | -1070 | -1070.000000 | 10.000000 | 10.0 | -13599.000000 | -13599.0 | -13599.00 | 1.055009 | 1.055009 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.005300 | 0.005300 | 0.0 | 0.000000 |
SELECT COUNT(CASE WHEN == 'Approved' END) , COUNT(CASE WHEN == 'Refused' END) FROM PREV GROUP BY SK_ID_CURR
prev_refused_appr_group = prev[prev['NAME_CONTRACT_STATUS'].isin(['Approved', 'Refused'])].groupby(['SK_ID_CURR', 'NAME_CONTRACT_STATUS'])
prev_refused_appr_agg = prev_refused_appr_group['SK_ID_CURR'].count().unstack()
prev_refused_appr_agg.head(10)
| NAME_CONTRACT_STATUS | Approved | Refused |
|---|---|---|
| SK_ID_CURR | ||
| 100001 | 1.0 | NaN |
| 100002 | 1.0 | NaN |
| 100003 | 3.0 | NaN |
| 100004 | 1.0 | NaN |
| 100005 | 1.0 | NaN |
| 100006 | 5.0 | 1.0 |
| 100007 | 6.0 | NaN |
| 100008 | 4.0 | NaN |
| 100009 | 7.0 | NaN |
| 100010 | 1.0 | NaN |
prev_refused_appr_agg = prev_refused_appr_agg.fillna(0)
prev_refused_appr_agg.columns = ['PREV_APPROVED_COUNT', 'PREV_REFUSED_COUNT']
prev_refused_appr_agg = prev_refused_appr_agg.reset_index()
prev_refused_appr_agg.head(10)
| SK_ID_CURR | PREV_APPROVED_COUNT | PREV_REFUSED_COUNT | |
|---|---|---|---|
| 0 | 100001 | 1.0 | 0.0 |
| 1 | 100002 | 1.0 | 0.0 |
| 2 | 100003 | 3.0 | 0.0 |
| 3 | 100004 | 1.0 | 0.0 |
| 4 | 100005 | 1.0 | 0.0 |
| 5 | 100006 | 5.0 | 1.0 |
| 6 | 100007 | 6.0 | 0.0 |
| 7 | 100008 | 4.0 | 0.0 |
| 8 | 100009 | 7.0 | 0.0 |
| 9 | 100010 | 1.0 | 0.0 |
prev_amt_agg.head()
| SK_ID_CURR | PREV_SK_ID_CURR_COUNT | PREV_AMT_CREDIT_MEAN | PREV_AMT_CREDIT_MAX | PREV_AMT_CREDIT_SUM | PREV_AMT_ANNUITY_MEAN | PREV_AMT_ANNUITY_MAX | PREV_AMT_ANNUITY_SUM | PREV_AMT_APPLICATION_MEAN | PREV_AMT_APPLICATION_MAX | PREV_AMT_APPLICATION_SUM | PREV_AMT_DOWN_PAYMENT_MEAN | PREV_AMT_DOWN_PAYMENT_MAX | PREV_AMT_DOWN_PAYMENT_SUM | PREV_AMT_GOODS_PRICE_MEAN | PREV_AMT_GOODS_PRICE_MAX | PREV_AMT_GOODS_PRICE_SUM | PREV_RATE_DOWN_PAYMENT_MIN | PREV_RATE_DOWN_PAYMENT_MAX | PREV_RATE_DOWN_PAYMENT_MEAN | PREV_DAYS_DECISION_MIN | PREV_DAYS_DECISION_MAX | PREV_DAYS_DECISION_MEAN | PREV_CNT_PAYMENT_MEAN | PREV_CNT_PAYMENT_SUM | PREV_PREV_CREDIT_DIFF_MEAN | PREV_PREV_CREDIT_DIFF_MAX | PREV_PREV_CREDIT_DIFF_SUM | PREV_PREV_CREDIT_APPL_RATIO_MEAN | PREV_PREV_CREDIT_APPL_RATIO_MAX | PREV_PREV_GOODS_DIFF_MEAN | PREV_PREV_GOODS_DIFF_MAX | PREV_PREV_GOODS_DIFF_SUM | PREV_PREV_GOODS_APPL_RATIO_MEAN | PREV_PREV_GOODS_APPL_RATIO_MAX | PREV_PREV_DAYS_LAST_DUE_DIFF_MEAN | PREV_PREV_DAYS_LAST_DUE_DIFF_MAX | PREV_PREV_DAYS_LAST_DUE_DIFF_SUM | PREV_PREV_INTERESTS_RATE_MEAN | PREV_PREV_INTERESTS_RATE_MAX | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 100001 | 1 | 23787.00 | 23787.0 | 23787.0 | 3951.000 | 3951.000 | 3951.000 | 24835.50 | 24835.5 | 24835.5 | 2520.0 | 2520.0 | 2520.0 | 24835.5 | 24835.5 | 24835.5 | 0.104326 | 0.104326 | 0.104326 | -1740 | -1740 | -1740.0 | 8.0 | 8.0 | 1048.5 | 1048.5 | 1048.5 | 0.957782 | 0.957782 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 120.0 | 120.0 | 120.0 | 0.041099 | 0.041099 |
| 1 | 100002 | 1 | 179055.00 | 179055.0 | 179055.0 | 9251.775 | 9251.775 | 9251.775 | 179055.00 | 179055.0 | 179055.0 | 0.0 | 0.0 | 0.0 | 179055.0 | 179055.0 | 179055.0 | 0.000000 | 0.000000 | 0.000000 | -606 | -606 | -606.0 | 24.0 | 24.0 | 0.0 | 0.0 | 0.0 | 1.000000 | 1.000000 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 150.0 | 150.0 | 150.0 | 0.010003 | 0.010003 |
| 2 | 100003 | 3 | 484191.00 | 1035882.0 | 1452573.0 | 56553.990 | 98356.995 | 169661.970 | 435436.50 | 900000.0 | 1306309.5 | 3442.5 | 6885.0 | 6885.0 | 435436.5 | 900000.0 | 1306309.5 | 0.000000 | 0.100061 | 0.050030 | -2341 | -746 | -1305.0 | 10.0 | 30.0 | -48754.5 | 756.0 | -146263.5 | 1.057664 | 1.150980 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 50.0 | 150.0 | 150.0 | 0.015272 | 0.018533 |
| 3 | 100004 | 1 | 20106.00 | 20106.0 | 20106.0 | 5357.250 | 5357.250 | 5357.250 | 24282.00 | 24282.0 | 24282.0 | 4860.0 | 4860.0 | 4860.0 | 24282.0 | 24282.0 | 24282.0 | 0.212008 | 0.212008 | 0.212008 | -815 | -815 | -815.0 | 4.0 | 4.0 | 4176.0 | 4176.0 | 4176.0 | 0.828021 | 0.828021 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 30.0 | 30.0 | 30.0 | 0.016450 | 0.016450 |
| 4 | 100005 | 2 | 20076.75 | 40153.5 | 40153.5 | 4813.200 | 4813.200 | 4813.200 | 22308.75 | 44617.5 | 44617.5 | 4464.0 | 4464.0 | 4464.0 | 44617.5 | 44617.5 | 44617.5 | 0.108964 | 0.108964 | 0.108964 | -757 | -315 | -536.0 | 12.0 | 12.0 | 2232.0 | 4464.0 | 4464.0 | 0.899950 | 0.899950 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 90.0 | 90.0 | 90.0 | 0.036537 | 0.036537 |
# prev_amt_agg와 조인. prev_amt_agg와 prev_refused_appr_agg 모두 SK_ID_CURR을 INDEX로 가지고 있음.
prev_agg = prev_amt_agg.merge(prev_refused_appr_agg, on='SK_ID_CURR', how='left')
# SK_ID_CURR별 과거 대출건수 대비 APPROVED_COUNT 및 REFUSED_COUNT 비율 생성.
prev_agg['PREV_REFUSED_RATIO'] = prev_agg['PREV_REFUSED_COUNT']/prev_agg['PREV_SK_ID_CURR_COUNT']
prev_agg['PREV_APPROVED_RATIO'] = prev_agg['PREV_APPROVED_COUNT']/prev_agg['PREV_SK_ID_CURR_COUNT']
# 'PREV_REFUSED_COUNT', 'PREV_APPROVED_COUNT' 컬럼 drop
prev_agg = prev_agg.drop(['PREV_REFUSED_COUNT', 'PREV_APPROVED_COUNT'], axis=1)
# prev_amt_agg와 prev_refused_appr_agg INDEX인 SK_ID_CURR이 조인 후 정식 컬럼으로 생성됨.
prev_agg.head(30)
| SK_ID_CURR | PREV_SK_ID_CURR_COUNT | PREV_AMT_CREDIT_MEAN | PREV_AMT_CREDIT_MAX | PREV_AMT_CREDIT_SUM | PREV_AMT_ANNUITY_MEAN | PREV_AMT_ANNUITY_MAX | PREV_AMT_ANNUITY_SUM | PREV_AMT_APPLICATION_MEAN | PREV_AMT_APPLICATION_MAX | PREV_AMT_APPLICATION_SUM | PREV_AMT_DOWN_PAYMENT_MEAN | PREV_AMT_DOWN_PAYMENT_MAX | PREV_AMT_DOWN_PAYMENT_SUM | PREV_AMT_GOODS_PRICE_MEAN | PREV_AMT_GOODS_PRICE_MAX | PREV_AMT_GOODS_PRICE_SUM | PREV_RATE_DOWN_PAYMENT_MIN | PREV_RATE_DOWN_PAYMENT_MAX | PREV_RATE_DOWN_PAYMENT_MEAN | PREV_DAYS_DECISION_MIN | PREV_DAYS_DECISION_MAX | PREV_DAYS_DECISION_MEAN | PREV_CNT_PAYMENT_MEAN | PREV_CNT_PAYMENT_SUM | PREV_PREV_CREDIT_DIFF_MEAN | PREV_PREV_CREDIT_DIFF_MAX | PREV_PREV_CREDIT_DIFF_SUM | PREV_PREV_CREDIT_APPL_RATIO_MEAN | PREV_PREV_CREDIT_APPL_RATIO_MAX | PREV_PREV_GOODS_DIFF_MEAN | PREV_PREV_GOODS_DIFF_MAX | PREV_PREV_GOODS_DIFF_SUM | PREV_PREV_GOODS_APPL_RATIO_MEAN | PREV_PREV_GOODS_APPL_RATIO_MAX | PREV_PREV_DAYS_LAST_DUE_DIFF_MEAN | PREV_PREV_DAYS_LAST_DUE_DIFF_MAX | PREV_PREV_DAYS_LAST_DUE_DIFF_SUM | PREV_PREV_INTERESTS_RATE_MEAN | PREV_PREV_INTERESTS_RATE_MAX | PREV_REFUSED_RATIO | PREV_APPROVED_RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 100001 | 1 | 23787.000000 | 23787.0 | 23787.000 | 3951.000000 | 3951.000 | 3951.000 | 24835.500000 | 24835.5 | 24835.500 | 2520.000000 | 2520.0 | 2520.00 | 24835.500000 | 24835.5 | 24835.500 | 0.104326 | 0.104326 | 0.104326 | -1740 | -1740 | -1740.000000 | 8.000000 | 8.0 | 1048.500000 | 1048.5 | 1048.50 | 0.957782 | 0.957782 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 120.00 | 120.0 | 120.0 | 0.041099 | 0.041099 | 0.000000 | 1.000000 |
| 1 | 100002 | 1 | 179055.000000 | 179055.0 | 179055.000 | 9251.775000 | 9251.775 | 9251.775 | 179055.000000 | 179055.0 | 179055.000 | 0.000000 | 0.0 | 0.00 | 179055.000000 | 179055.0 | 179055.000 | 0.000000 | 0.000000 | 0.000000 | -606 | -606 | -606.000000 | 24.000000 | 24.0 | 0.000000 | 0.0 | 0.00 | 1.000000 | 1.000000 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 150.00 | 150.0 | 150.0 | 0.010003 | 0.010003 | 0.000000 | 1.000000 |
| 2 | 100003 | 3 | 484191.000000 | 1035882.0 | 1452573.000 | 56553.990000 | 98356.995 | 169661.970 | 435436.500000 | 900000.0 | 1306309.500 | 3442.500000 | 6885.0 | 6885.00 | 435436.500000 | 900000.0 | 1306309.500 | 0.000000 | 0.100061 | 0.050030 | -2341 | -746 | -1305.000000 | 10.000000 | 30.0 | -48754.500000 | 756.0 | -146263.50 | 1.057664 | 1.150980 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 50.00 | 150.0 | 150.0 | 0.015272 | 0.018533 | 0.000000 | 1.000000 |
| 3 | 100004 | 1 | 20106.000000 | 20106.0 | 20106.000 | 5357.250000 | 5357.250 | 5357.250 | 24282.000000 | 24282.0 | 24282.000 | 4860.000000 | 4860.0 | 4860.00 | 24282.000000 | 24282.0 | 24282.000 | 0.212008 | 0.212008 | 0.212008 | -815 | -815 | -815.000000 | 4.000000 | 4.0 | 4176.000000 | 4176.0 | 4176.00 | 0.828021 | 0.828021 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 30.00 | 30.0 | 30.0 | 0.016450 | 0.016450 | 0.000000 | 1.000000 |
| 4 | 100005 | 2 | 20076.750000 | 40153.5 | 40153.500 | 4813.200000 | 4813.200 | 4813.200 | 22308.750000 | 44617.5 | 44617.500 | 4464.000000 | 4464.0 | 4464.00 | 44617.500000 | 44617.5 | 44617.500 | 0.108964 | 0.108964 | 0.108964 | -757 | -315 | -536.000000 | 12.000000 | 12.0 | 2232.000000 | 4464.0 | 4464.00 | 0.899950 | 0.899950 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 90.00 | 90.0 | 90.0 | 0.036537 | 0.036537 | 0.000000 | 0.500000 |
| 5 | 100006 | 9 | 291695.500000 | 906615.0 | 2625259.500 | 23651.175000 | 39954.510 | 141907.050 | 272203.260000 | 688500.0 | 2449829.340 | 34840.170000 | 66987.0 | 69680.34 | 408304.890000 | 688500.0 | 2449829.340 | 0.108994 | 0.217830 | 0.163412 | -617 | -181 | -272.444444 | 23.000000 | 138.0 | -19492.240000 | 66987.0 | -175430.16 | 1.012684 | 1.316797 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 810.00 | 1410.0 | 1620.0 | -inf | 0.027424 | 0.111111 | 0.555556 |
| 6 | 100007 | 6 | 166638.750000 | 284400.0 | 999832.500 | 12278.805000 | 22678.785 | 73672.830 | 150530.250000 | 247500.0 | 903181.500 | 3390.750000 | 3676.5 | 6781.50 | 150530.250000 | 247500.0 | 903181.500 | 0.100143 | 0.218890 | 0.159516 | -2357 | -374 | -1222.833333 | 20.666667 | 124.0 | -16108.500000 | 2560.5 | -96651.00 | 1.046356 | 1.264000 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 7.50 | 30.0 | 30.0 | 0.026538 | 0.036164 | 0.000000 | 1.000000 |
| 7 | 100008 | 5 | 162767.700000 | 501975.0 | 813838.500 | 15839.696250 | 25309.575 | 63358.785 | 155701.800000 | 450000.0 | 778509.000 | 5548.500000 | 12145.5 | 16645.50 | 194627.250000 | 450000.0 | 778509.000 | 0.000000 | 0.110243 | 0.073051 | -2536 | -82 | -1192.000000 | 14.000000 | 56.0 | -7065.900000 | 12145.5 | -35329.50 | 0.978569 | 1.115500 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 165.00 | 660.0 | 660.0 | 0.018055 | 0.034034 | 0.000000 | 0.800000 |
| 8 | 100009 | 7 | 70137.642857 | 98239.5 | 490963.500 | 10051.412143 | 17341.605 | 70359.885 | 76741.714286 | 110160.0 | 537192.000 | 9203.142857 | 22032.0 | 64422.00 | 76741.714286 | 110160.0 | 537192.000 | 0.000000 | 0.209525 | 0.126602 | -1562 | -74 | -719.285714 | 8.000000 | 56.0 | 6604.071429 | 17671.5 | 46228.50 | 0.916226 | 1.000000 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.00 | 0.0 | 0.0 | 0.019536 | 0.030703 | 0.000000 | 1.000000 |
| 9 | 100010 | 1 | 260811.000000 | 260811.0 | 260811.000 | 27463.410000 | 27463.410 | 27463.410 | 247212.000000 | 247212.0 | 247212.000 | 0.000000 | 0.0 | 0.00 | 247212.000000 | 247212.0 | 247212.000 | 0.000000 | 0.000000 | 0.000000 | -1070 | -1070 | -1070.000000 | 10.000000 | 10.0 | -13599.000000 | -13599.0 | -13599.00 | 1.055009 | 1.055009 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.00 | 0.0 | 0.0 | 0.005300 | 0.005300 | 0.000000 | 1.000000 |
| 10 | 100011 | 4 | 261840.375000 | 732915.0 | 1047361.500 | 18303.195000 | 31295.250 | 54909.585 | 202732.875000 | 675000.0 | 810931.500 | 6797.250000 | 13594.5 | 13594.50 | 270310.500000 | 675000.0 | 810931.500 | 0.000000 | 0.100010 | 0.050005 | -2508 | -1162 | -1784.500000 | 14.000000 | 42.0 | -59107.500000 | 1485.0 | -236430.00 | inf | inf | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.00 | 0.0 | 0.0 | -inf | 0.025367 | 0.250000 | 0.750000 |
| 11 | 100012 | 4 | 74119.500000 | 158508.0 | 296478.000 | 7894.155000 | 11188.035 | 23682.465 | 60930.000000 | 135000.0 | 243720.000 | 0.000000 | 0.0 | 0.00 | 81240.000000 | 135000.0 | 243720.000 | 0.000000 | 0.000000 | 0.000000 | -1673 | -107 | -779.750000 | 18.000000 | 54.0 | -13189.500000 | 0.0 | -52758.00 | 1.236566 | 1.269700 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 100.00 | 180.0 | 300.0 | 0.033372 | 0.043775 | 0.000000 | 0.750000 |
| 12 | 100013 | 4 | 146134.125000 | 512370.0 | 584536.500 | 11478.195000 | 23153.985 | 34434.585 | 130871.250000 | 450000.0 | 523485.000 | 3375.000000 | 6750.0 | 6750.00 | 174495.000000 | 450000.0 | 523485.000 | 0.000000 | 0.134434 | 0.067217 | -1999 | -222 | -837.500000 | 17.333333 | 52.0 | -15262.875000 | 3096.0 | -61051.50 | 1.052363 | 1.138600 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 200.00 | 570.0 | 600.0 | 0.027617 | 0.036400 | 0.000000 | 0.750000 |
| 13 | 100014 | 2 | 102834.000000 | 131868.0 | 205668.000 | 12806.550000 | 14045.625 | 25613.100 | 96536.250000 | 119272.5 | 193072.500 | 0.000000 | 0.0 | 0.00 | 96536.250000 | 119272.5 | 193072.500 | 0.000000 | 0.000000 | 0.000000 | -844 | -102 | -473.000000 | 9.000000 | 18.0 | -6297.750000 | 0.0 | -12595.50 | 1.052801 | 1.105603 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.00 | 0.0 | 0.0 | 0.014020 | 0.023653 | 0.000000 | 1.000000 |
| 14 | 100015 | 1 | 67077.000000 | 67077.0 | 67077.000 | 7666.920000 | 7666.920 | 7666.920 | 68850.000000 | 68850.0 | 68850.000 | 6885.000000 | 6885.0 | 6885.00 | 68850.000000 | 68850.0 | 68850.000 | 0.101382 | 0.101382 | 0.101382 | -2396 | -2396 | -2396.000000 | 10.000000 | 10.0 | 1773.000000 | 1773.0 | 1773.00 | 0.974248 | 0.974248 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.00 | 0.0 | 0.0 | 0.014300 | 0.014300 | 0.000000 | 1.000000 |
| 15 | 100016 | 4 | 106221.375000 | 232200.0 | 424885.500 | 8593.155000 | 14480.460 | 34372.620 | 120745.125000 | 290250.0 | 482980.500 | 15690.375000 | 58050.0 | 62761.50 | 120745.125000 | 290250.0 | 482980.500 | 0.000000 | 0.217818 | 0.081682 | -2370 | -1049 | -1779.500000 | 17.000000 | 68.0 | 14523.750000 | 58050.0 | 58095.00 | 0.949761 | 1.099045 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 70.00 | 210.0 | 210.0 | 0.029580 | 0.033600 | 0.000000 | 1.000000 |
| 16 | 100017 | 2 | 175565.250000 | 198409.5 | 351130.500 | 14375.407500 | 16967.295 | 28750.815 | 176665.500000 | 195291.0 | 353331.000 | 19152.000000 | 22500.0 | 38304.00 | 176665.500000 | 195291.0 | 353331.000 | 0.102133 | 0.110926 | 0.106529 | -2441 | -612 | -1526.500000 | 17.000000 | 34.0 | 1100.250000 | 5319.0 | 2200.50 | 0.991156 | 1.015968 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.00 | 0.0 | 0.0 | 0.014412 | 0.017723 | 0.000000 | 1.000000 |
| 17 | 100018 | 4 | 285989.591250 | 536364.0 | 1143958.365 | 38669.703750 | 94906.035 | 154678.815 | 298363.466250 | 571774.5 | 1193453.865 | 14295.375000 | 57181.5 | 57181.50 | 298363.466250 | 571774.5 | 1193453.865 | 0.000000 | 0.108917 | 0.027229 | -1648 | -188 | -755.250000 | 10.000000 | 40.0 | 12373.875000 | 57181.5 | 49495.50 | 1.014609 | 1.158442 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | -9.00 | 0.0 | -18.0 | 0.018384 | 0.038257 | 0.000000 | 1.000000 |
| 18 | 100019 | 1 | 104683.500000 | 104683.5 | 104683.500 | 10518.615000 | 10518.615 | 10518.615 | 84136.500000 | 84136.5 | 84136.500 | 0.000000 | 0.0 | 0.00 | 84136.500000 | 84136.5 | 84136.500 | 0.000000 | 0.000000 | 0.000000 | -925 | -925 | -925.000000 | 12.000000 | 12.0 | -20547.000000 | -20547.0 | -20547.00 | 1.244210 | 1.244210 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 150.00 | 150.0 | 150.0 | 0.017147 | 0.017147 | 0.000000 | 1.000000 |
| 19 | 100020 | 2 | 41706.000000 | 48325.5 | 83412.000 | 7254.517500 | 8779.770 | 14509.035 | 39206.250000 | 45900.0 | 78412.500 | 0.000000 | 0.0 | 0.00 | 39206.250000 | 45900.0 | 78412.500 | 0.000000 | 0.000000 | 0.000000 | -472 | -243 | -357.500000 | 7.000000 | 14.0 | -2499.750000 | -2425.5 | -4999.50 | 1.066006 | 1.079170 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.00 | 0.0 | 0.0 | 0.026651 | 0.038290 | 0.000000 | 1.000000 |
| 20 | 100021 | 6 | 153615.750000 | 675000.0 | 921694.500 | 10686.165000 | 33750.000 | 64116.990 | 40961.250000 | 99189.0 | 245767.500 | 3770.100000 | 9922.5 | 18850.50 | 49153.500000 | 99189.0 | 245767.500 | 0.000000 | 0.433539 | 0.106606 | -2811 | -261 | -943.833333 | 7.000000 | 42.0 | -112654.500000 | 8928.0 | -675927.00 | inf | inf | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 90.00 | 180.0 | 270.0 | -inf | 0.034033 | 0.000000 | 1.000000 |
| 21 | 100022 | 1 | 103203.000000 | 103203.0 | 103203.000 | 6765.975000 | 6765.975 | 6765.975 | 89091.000000 | 89091.0 | 89091.000 | 0.000000 | 0.0 | 0.00 | 89091.000000 | 89091.0 | 89091.000 | 0.000000 | 0.000000 | 0.000000 | -239 | -239 | -239.000000 | 18.000000 | 18.0 | -14112.000000 | -14112.0 | -14112.00 | 1.158400 | 1.158400 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | NaN | NaN | 0.0 | 0.010004 | 0.010004 | 0.000000 | 1.000000 |
| 22 | 100023 | 4 | 113517.000000 | 239242.5 | 454068.000 | 8895.892500 | 16822.440 | 35583.570 | 98746.875000 | 180000.0 | 394987.500 | 8435.250000 | 9000.0 | 16870.50 | 98746.875000 | 180000.0 | 394987.500 | 0.095959 | 0.101380 | 0.098669 | -2424 | -262 | -1215.750000 | 12.500000 | 50.0 | -14770.125000 | 2025.0 | -59080.50 | 1.080951 | 1.329125 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 240.00 | 600.0 | 720.0 | -inf | 0.028649 | 0.000000 | 1.000000 |
| 23 | 100025 | 8 | 295425.781875 | 1042560.0 | 2363406.255 | 27645.595714 | 55678.095 | 193519.170 | 259795.344375 | 900000.0 | 2078362.755 | 5845.500000 | 23382.0 | 23382.00 | 296908.965000 | 900000.0 | 2078362.755 | 0.000000 | 0.423065 | 0.105766 | -2060 | -7 | -731.000000 | 13.714286 | 96.0 | -35630.437500 | 21645.0 | -285043.50 | 1.073930 | 1.269700 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 174.25 | 367.0 | 697.0 | 0.022884 | 0.027871 | 0.000000 | 0.875000 |
| 24 | 100026 | 3 | 292281.000000 | 744498.0 | 876843.000 | 33193.665000 | 42778.845 | 66387.330 | 269115.000000 | 675000.0 | 807345.000 | 0.000000 | 0.0 | 0.00 | 403672.500000 | 675000.0 | 807345.000 | 0.000000 | 0.000000 | 0.000000 | -1427 | -152 | -682.333333 | 21.000000 | 42.0 | -23166.000000 | 0.0 | -69498.00 | 1.051480 | 1.102960 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 510.00 | 1020.0 | 1020.0 | 0.020701 | 0.029682 | 0.000000 | 0.666667 |
| 25 | 100027 | 4 | 173795.625000 | 239850.0 | 695182.500 | 24067.755000 | 28077.570 | 72203.265 | 163621.125000 | 226984.5 | 654484.500 | 0.000000 | 0.0 | 0.00 | 218161.500000 | 226984.5 | 654484.500 | 0.000000 | 0.000000 | 0.000000 | -795 | -181 | -407.000000 | 11.333333 | 34.0 | -10174.500000 | 0.0 | -40698.00 | 1.062332 | 1.066000 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.00 | 0.0 | 0.0 | 0.014849 | 0.017250 | 0.250000 | 0.500000 |
| 26 | 100028 | 5 | 92920.500000 | 225000.0 | 464602.500 | 8091.585000 | 11250.000 | 24274.755 | 49207.500000 | 130765.5 | 246037.500 | 3750.000000 | 11250.0 | 11250.00 | 82012.500000 | 130765.5 | 246037.500 | 0.000000 | 0.173124 | 0.057708 | -1805 | -531 | -1124.200000 | 11.333333 | 34.0 | -43713.000000 | 6435.0 | -218565.00 | inf | inf | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 75.00 | 150.0 | 150.0 | -inf | 0.017550 | 0.000000 | 0.600000 |
| 27 | 100029 | 1 | 126000.000000 | 126000.0 | 126000.000 | 22891.680000 | 22891.680 | 22891.680 | 126000.000000 | 126000.0 | 126000.000 | 0.000000 | 0.0 | 0.00 | 126000.000000 | 126000.0 | 126000.000 | 0.000000 | 0.000000 | 0.000000 | -324 | -324 | -324.000000 | 6.000000 | 6.0 | 0.000000 | 0.0 | 0.00 | 1.000000 | 1.000000 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 60.00 | 60.0 | 60.0 | 0.015013 | 0.015013 | 0.000000 | 1.000000 |
| 28 | 100030 | 12 | 32534.643750 | 49734.0 | 390415.725 | 4731.041250 | 9035.685 | 37848.330 | 33497.268750 | 47241.0 | 401967.225 | 1262.625000 | 6714.0 | 15151.50 | 33497.268750 | 47241.0 | 401967.225 | 0.000000 | 0.210919 | 0.044804 | -2856 | -378 | -1981.083333 | 8.625000 | 69.0 | 962.625000 | 5607.0 | 11551.50 | 0.965475 | 1.052772 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 60.00 | 90.0 | 120.0 | 0.017772 | 0.037233 | 0.833333 | 0.166667 |
| 29 | 100032 | 1 | 42255.000000 | 42255.0 | 42255.000 | 7910.145000 | 7910.145 | 7910.145 | 69255.000000 | 69255.0 | 69255.000 | 27000.000000 | 27000.0 | 27000.00 | 69255.000000 | 69255.0 | 69255.000 | 0.424597 | 0.424597 | 0.424597 | -1497 | -1497 | -1497.000000 | 6.000000 | 6.0 | 27000.000000 | 27000.0 | 27000.00 | 0.610136 | 0.610136 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.00 | 0.0 | 0.0 | 0.020534 | 0.020534 | 0.000000 | 1.000000 |
apps_all = get_apps_processed(apps)
print(apps_all.shape, prev_agg.shape)
apps_all = apps_all.merge(prev_agg, on='SK_ID_CURR', how='left')
print(apps_all.shape)
(356255, 135) (338857, 42)
(356255, 176)
apps_all.head()
| AMT_ANNUITY | AMT_CREDIT | AMT_GOODS_PRICE | AMT_INCOME_TOTAL | AMT_REQ_CREDIT_BUREAU_DAY | AMT_REQ_CREDIT_BUREAU_HOUR | AMT_REQ_CREDIT_BUREAU_MON | AMT_REQ_CREDIT_BUREAU_QRT | AMT_REQ_CREDIT_BUREAU_WEEK | AMT_REQ_CREDIT_BUREAU_YEAR | APARTMENTS_AVG | APARTMENTS_MEDI | APARTMENTS_MODE | BASEMENTAREA_AVG | BASEMENTAREA_MEDI | BASEMENTAREA_MODE | CNT_CHILDREN | CNT_FAM_MEMBERS | CODE_GENDER | COMMONAREA_AVG | COMMONAREA_MEDI | COMMONAREA_MODE | DAYS_BIRTH | DAYS_EMPLOYED | DAYS_ID_PUBLISH | DAYS_LAST_PHONE_CHANGE | DAYS_REGISTRATION | DEF_30_CNT_SOCIAL_CIRCLE | DEF_60_CNT_SOCIAL_CIRCLE | ELEVATORS_AVG | ELEVATORS_MEDI | ELEVATORS_MODE | EMERGENCYSTATE_MODE | ENTRANCES_AVG | ENTRANCES_MEDI | ENTRANCES_MODE | EXT_SOURCE_1 | EXT_SOURCE_2 | EXT_SOURCE_3 | FLAG_CONT_MOBILE | FLAG_DOCUMENT_10 | FLAG_DOCUMENT_11 | FLAG_DOCUMENT_12 | FLAG_DOCUMENT_13 | FLAG_DOCUMENT_14 | FLAG_DOCUMENT_15 | FLAG_DOCUMENT_16 | FLAG_DOCUMENT_17 | FLAG_DOCUMENT_18 | FLAG_DOCUMENT_19 | FLAG_DOCUMENT_2 | FLAG_DOCUMENT_20 | FLAG_DOCUMENT_21 | FLAG_DOCUMENT_3 | FLAG_DOCUMENT_4 | FLAG_DOCUMENT_5 | FLAG_DOCUMENT_6 | FLAG_DOCUMENT_7 | FLAG_DOCUMENT_8 | FLAG_DOCUMENT_9 | FLAG_EMAIL | FLAG_EMP_PHONE | FLAG_MOBIL | FLAG_OWN_CAR | FLAG_OWN_REALTY | FLAG_PHONE | FLAG_WORK_PHONE | FLOORSMAX_AVG | FLOORSMAX_MEDI | FLOORSMAX_MODE | FLOORSMIN_AVG | FLOORSMIN_MEDI | FLOORSMIN_MODE | FONDKAPREMONT_MODE | HOUR_APPR_PROCESS_START | HOUSETYPE_MODE | LANDAREA_AVG | LANDAREA_MEDI | LANDAREA_MODE | LIVE_CITY_NOT_WORK_CITY | LIVE_REGION_NOT_WORK_REGION | LIVINGAPARTMENTS_AVG | LIVINGAPARTMENTS_MEDI | LIVINGAPARTMENTS_MODE | LIVINGAREA_AVG | LIVINGAREA_MEDI | LIVINGAREA_MODE | NAME_CONTRACT_TYPE | NAME_EDUCATION_TYPE | NAME_FAMILY_STATUS | NAME_HOUSING_TYPE | NAME_INCOME_TYPE | NAME_TYPE_SUITE | NONLIVINGAPARTMENTS_AVG | NONLIVINGAPARTMENTS_MEDI | NONLIVINGAPARTMENTS_MODE | NONLIVINGAREA_AVG | NONLIVINGAREA_MEDI | NONLIVINGAREA_MODE | OBS_30_CNT_SOCIAL_CIRCLE | OBS_60_CNT_SOCIAL_CIRCLE | OCCUPATION_TYPE | ORGANIZATION_TYPE | OWN_CAR_AGE | REGION_POPULATION_RELATIVE | REGION_RATING_CLIENT | REGION_RATING_CLIENT_W_CITY | REG_CITY_NOT_LIVE_CITY | REG_CITY_NOT_WORK_CITY | REG_REGION_NOT_LIVE_REGION | REG_REGION_NOT_WORK_REGION | SK_ID_CURR | TARGET | TOTALAREA_MODE | WALLSMATERIAL_MODE | WEEKDAY_APPR_PROCESS_START | YEARS_BEGINEXPLUATATION_AVG | YEARS_BEGINEXPLUATATION_MEDI | YEARS_BEGINEXPLUATATION_MODE | YEARS_BUILD_AVG | YEARS_BUILD_MEDI | YEARS_BUILD_MODE | APPS_EXT_SOURCE_MEAN | APPS_EXT_SOURCE_STD | APPS_ANNUITY_CREDIT_RATIO | APPS_GOODS_CREDIT_RATIO | APPS_ANNUITY_INCOME_RATIO | APPS_CREDIT_INCOME_RATIO | APPS_GOODS_INCOME_RATIO | APPS_CNT_FAM_INCOME_RATIO | APPS_EMPLOYED_BIRTH_RATIO | APPS_INCOME_EMPLOYED_RATIO | APPS_INCOME_BIRTH_RATIO | APPS_CAR_BIRTH_RATIO | APPS_CAR_EMPLOYED_RATIO | PREV_SK_ID_CURR_COUNT | PREV_AMT_CREDIT_MEAN | PREV_AMT_CREDIT_MAX | PREV_AMT_CREDIT_SUM | PREV_AMT_ANNUITY_MEAN | PREV_AMT_ANNUITY_MAX | PREV_AMT_ANNUITY_SUM | PREV_AMT_APPLICATION_MEAN | PREV_AMT_APPLICATION_MAX | PREV_AMT_APPLICATION_SUM | PREV_AMT_DOWN_PAYMENT_MEAN | PREV_AMT_DOWN_PAYMENT_MAX | PREV_AMT_DOWN_PAYMENT_SUM | PREV_AMT_GOODS_PRICE_MEAN | PREV_AMT_GOODS_PRICE_MAX | PREV_AMT_GOODS_PRICE_SUM | PREV_RATE_DOWN_PAYMENT_MIN | PREV_RATE_DOWN_PAYMENT_MAX | PREV_RATE_DOWN_PAYMENT_MEAN | PREV_DAYS_DECISION_MIN | PREV_DAYS_DECISION_MAX | PREV_DAYS_DECISION_MEAN | PREV_CNT_PAYMENT_MEAN | PREV_CNT_PAYMENT_SUM | PREV_PREV_CREDIT_DIFF_MEAN | PREV_PREV_CREDIT_DIFF_MAX | PREV_PREV_CREDIT_DIFF_SUM | PREV_PREV_CREDIT_APPL_RATIO_MEAN | PREV_PREV_CREDIT_APPL_RATIO_MAX | PREV_PREV_GOODS_DIFF_MEAN | PREV_PREV_GOODS_DIFF_MAX | PREV_PREV_GOODS_DIFF_SUM | PREV_PREV_GOODS_APPL_RATIO_MEAN | PREV_PREV_GOODS_APPL_RATIO_MAX | PREV_PREV_DAYS_LAST_DUE_DIFF_MEAN | PREV_PREV_DAYS_LAST_DUE_DIFF_MAX | PREV_PREV_DAYS_LAST_DUE_DIFF_SUM | PREV_PREV_INTERESTS_RATE_MEAN | PREV_PREV_INTERESTS_RATE_MAX | PREV_REFUSED_RATIO | PREV_APPROVED_RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 24700.5 | 406597.5 | 351000.0 | 202500.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0247 | 0.0250 | 0.0252 | 0.0369 | 0.0369 | 0.0383 | 0 | 1.0 | M | 0.0143 | 0.0144 | 0.0144 | -9461 | -637 | -2120 | -1134.0 | -3648.0 | 2.0 | 2.0 | 0.00 | 0.00 | 0.0000 | No | 0.0690 | 0.0690 | 0.0690 | 0.083037 | 0.262949 | 0.139376 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | N | Y | 1 | 0 | 0.0833 | 0.0833 | 0.0833 | 0.1250 | 0.1250 | 0.1250 | reg oper account | 10 | block of flats | 0.0369 | 0.0375 | 0.0377 | 0 | 0 | 0.0202 | 0.0205 | 0.022 | 0.0190 | 0.0193 | 0.0198 | Cash loans | Secondary / secondary special | Single / not married | House / apartment | Working | Unaccompanied | 0.0000 | 0.0000 | 0.0 | 0.0000 | 0.00 | 0.0 | 2.0 | 2.0 | Laborers | Business Entity Type 3 | NaN | 0.018801 | 2 | 2 | 0 | 0 | 0 | 0 | 100002 | 1.0 | 0.0149 | Stone, brick | WEDNESDAY | 0.9722 | 0.9722 | 0.9722 | 0.6192 | 0.6243 | 0.6341 | 0.161787 | 0.092026 | 0.060749 | 0.863262 | 0.121978 | 2.007889 | 1.733333 | 202500.0 | 0.067329 | -317.896389 | -21.403657 | NaN | NaN | 1.0 | 179055.00 | 179055.0 | 179055.0 | 9251.775 | 9251.775 | 9251.775 | 179055.00 | 179055.0 | 179055.00 | 0.00 | 0.0 | 0.00 | 179055.00 | 179055.0 | 179055.00 | 0.000000 | 0.000000 | 0.000000 | -606.0 | -606.0 | -606.000000 | 24.000000 | 24.0 | 0.00 | 0.0 | 0.00 | 1.000000 | 1.000000 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 150.0 | 150.0 | 150.0 | 0.010003 | 0.010003 | 0.000000 | 1.000000 |
| 1 | 35698.5 | 1293502.5 | 1129500.0 | 270000.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0959 | 0.0968 | 0.0924 | 0.0529 | 0.0529 | 0.0538 | 0 | 2.0 | F | 0.0605 | 0.0608 | 0.0497 | -16765 | -1188 | -291 | -828.0 | -1186.0 | 0.0 | 0.0 | 0.08 | 0.08 | 0.0806 | No | 0.0345 | 0.0345 | 0.0345 | 0.311267 | 0.622246 | NaN | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | N | N | 1 | 0 | 0.2917 | 0.2917 | 0.2917 | 0.3333 | 0.3333 | 0.3333 | reg oper account | 11 | block of flats | 0.0130 | 0.0132 | 0.0128 | 0 | 0 | 0.0773 | 0.0787 | 0.079 | 0.0549 | 0.0558 | 0.0554 | Cash loans | Higher education | Married | House / apartment | State servant | Family | 0.0039 | 0.0039 | 0.0 | 0.0098 | 0.01 | 0.0 | 1.0 | 1.0 | Core staff | School | NaN | 0.003541 | 1 | 1 | 0 | 0 | 0 | 0 | 100003 | 0.0 | 0.0714 | Block | MONDAY | 0.9851 | 0.9851 | 0.9851 | 0.7960 | 0.7987 | 0.8040 | 0.466757 | 0.219895 | 0.027598 | 0.873211 | 0.132217 | 4.790750 | 4.183333 | 135000.0 | 0.070862 | -227.272727 | -16.104981 | NaN | NaN | 3.0 | 484191.00 | 1035882.0 | 1452573.0 | 56553.990 | 98356.995 | 169661.970 | 435436.50 | 900000.0 | 1306309.50 | 3442.50 | 6885.0 | 6885.00 | 435436.50 | 900000.0 | 1306309.50 | 0.000000 | 0.100061 | 0.050030 | -2341.0 | -746.0 | -1305.000000 | 10.000000 | 30.0 | -48754.50 | 756.0 | -146263.50 | 1.057664 | 1.150980 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 50.0 | 150.0 | 150.0 | 0.015272 | 0.018533 | 0.000000 | 1.000000 |
| 2 | 6750.0 | 135000.0 | 135000.0 | 67500.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | 0 | 1.0 | M | NaN | NaN | NaN | -19046 | -225 | -2531 | -815.0 | -4260.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.555912 | 0.729567 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | Y | Y | 1 | 1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 9 | NaN | NaN | NaN | NaN | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | Revolving loans | Secondary / secondary special | Single / not married | House / apartment | Working | Unaccompanied | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | Laborers | Government | 26.0 | 0.010032 | 2 | 2 | 0 | 0 | 0 | 0 | 100004 | 0.0 | NaN | NaN | MONDAY | NaN | NaN | NaN | NaN | NaN | NaN | 0.642739 | 0.122792 | 0.050000 | 1.000000 | 0.100000 | 2.000000 | 2.000000 | 67500.0 | 0.011814 | -300.000000 | -3.544051 | -0.001365 | -0.115556 | 1.0 | 20106.00 | 20106.0 | 20106.0 | 5357.250 | 5357.250 | 5357.250 | 24282.00 | 24282.0 | 24282.00 | 4860.00 | 4860.0 | 4860.00 | 24282.00 | 24282.0 | 24282.00 | 0.212008 | 0.212008 | 0.212008 | -815.0 | -815.0 | -815.000000 | 4.000000 | 4.0 | 4176.00 | 4176.0 | 4176.00 | 0.828021 | 0.828021 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 30.0 | 30.0 | 30.0 | 0.016450 | 0.016450 | 0.000000 | 1.000000 |
| 3 | 29686.5 | 312682.5 | 297000.0 | 135000.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0 | 2.0 | F | NaN | NaN | NaN | -19005 | -3039 | -2437 | -617.0 | -9833.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.650442 | NaN | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | N | Y | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 17 | NaN | NaN | NaN | NaN | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | Cash loans | Secondary / secondary special | Civil marriage | House / apartment | Working | Unaccompanied | NaN | NaN | NaN | NaN | NaN | NaN | 2.0 | 2.0 | Laborers | Business Entity Type 3 | NaN | 0.008019 | 2 | 2 | 0 | 0 | 0 | 0 | 100006 | 0.0 | NaN | NaN | WEDNESDAY | NaN | NaN | NaN | NaN | NaN | NaN | 0.650442 | 0.151008 | 0.094941 | 0.949845 | 0.219900 | 2.316167 | 2.200000 | 67500.0 | 0.159905 | -44.422507 | -7.103394 | NaN | NaN | 9.0 | 291695.50 | 906615.0 | 2625259.5 | 23651.175 | 39954.510 | 141907.050 | 272203.26 | 688500.0 | 2449829.34 | 34840.17 | 66987.0 | 69680.34 | 408304.89 | 688500.0 | 2449829.34 | 0.108994 | 0.217830 | 0.163412 | -617.0 | -181.0 | -272.444444 | 23.000000 | 138.0 | -19492.24 | 66987.0 | -175430.16 | 1.012684 | 1.316797 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 810.0 | 1410.0 | 1620.0 | -inf | 0.027424 | 0.111111 | 0.555556 |
| 4 | 21865.5 | 513000.0 | 513000.0 | 121500.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | 0 | 1.0 | M | NaN | NaN | NaN | -19932 | -3038 | -3458 | -1106.0 | -4311.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.322738 | NaN | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | N | Y | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 11 | NaN | NaN | NaN | NaN | 1 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | Cash loans | Secondary / secondary special | Single / not married | House / apartment | Working | Unaccompanied | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | Core staff | Religion | NaN | 0.028663 | 2 | 2 | 0 | 1 | 0 | 0 | 100007 | 0.0 | NaN | NaN | THURSDAY | NaN | NaN | NaN | NaN | NaN | NaN | 0.322738 | 0.151008 | 0.042623 | 1.000000 | 0.179963 | 4.222222 | 4.222222 | 121500.0 | 0.152418 | -39.993417 | -6.095725 | NaN | NaN | 6.0 | 166638.75 | 284400.0 | 999832.5 | 12278.805 | 22678.785 | 73672.830 | 150530.25 | 247500.0 | 903181.50 | 3390.75 | 3676.5 | 6781.50 | 150530.25 | 247500.0 | 903181.50 | 0.100143 | 0.218890 | 0.159516 | -2357.0 | -374.0 | -1222.833333 | 20.666667 | 124.0 | -16108.50 | 2560.5 | -96651.00 | 1.046356 | 1.264000 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 7.5 | 30.0 | 30.0 | 0.026538 | 0.036164 | 0.000000 | 1.000000 |
apps_all.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 356255 entries, 0 to 356254
Columns: 176 entries, AMT_ANNUITY to PREV_APPROVED_RATIO
dtypes: float64(120), int64(40), object(16)
memory usage: 481.1+ MB
object_columns = apps_all.dtypes[apps_all.dtypes == 'object'].index.tolist()
for column in object_columns:
apps_all[column] = pd.factorize(apps_all[column])[0]
apps_all.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 356255 entries, 0 to 356254
Columns: 176 entries, AMT_ANNUITY to PREV_APPROVED_RATIO
dtypes: float64(120), int64(56)
memory usage: 481.1 MB
apps_all_train = apps_all[~apps_all['TARGET'].isnull()]
apps_all_test = apps_all[apps_all['TARGET'].isnull()]
apps_all_test = apps_all_test.drop('TARGET', axis=1)
apps_all_train.columns.tolist()
['AMT_ANNUITY',
'AMT_CREDIT',
'AMT_GOODS_PRICE',
'AMT_INCOME_TOTAL',
'AMT_REQ_CREDIT_BUREAU_DAY',
'AMT_REQ_CREDIT_BUREAU_HOUR',
'AMT_REQ_CREDIT_BUREAU_MON',
'AMT_REQ_CREDIT_BUREAU_QRT',
'AMT_REQ_CREDIT_BUREAU_WEEK',
'AMT_REQ_CREDIT_BUREAU_YEAR',
'APARTMENTS_AVG',
'APARTMENTS_MEDI',
'APARTMENTS_MODE',
'BASEMENTAREA_AVG',
'BASEMENTAREA_MEDI',
'BASEMENTAREA_MODE',
'CNT_CHILDREN',
'CNT_FAM_MEMBERS',
'CODE_GENDER',
'COMMONAREA_AVG',
'COMMONAREA_MEDI',
'COMMONAREA_MODE',
'DAYS_BIRTH',
'DAYS_EMPLOYED',
'DAYS_ID_PUBLISH',
'DAYS_LAST_PHONE_CHANGE',
'DAYS_REGISTRATION',
'DEF_30_CNT_SOCIAL_CIRCLE',
'DEF_60_CNT_SOCIAL_CIRCLE',
'ELEVATORS_AVG',
'ELEVATORS_MEDI',
'ELEVATORS_MODE',
'EMERGENCYSTATE_MODE',
'ENTRANCES_AVG',
'ENTRANCES_MEDI',
'ENTRANCES_MODE',
'EXT_SOURCE_1',
'EXT_SOURCE_2',
'EXT_SOURCE_3',
'FLAG_CONT_MOBILE',
'FLAG_DOCUMENT_10',
'FLAG_DOCUMENT_11',
'FLAG_DOCUMENT_12',
'FLAG_DOCUMENT_13',
'FLAG_DOCUMENT_14',
'FLAG_DOCUMENT_15',
'FLAG_DOCUMENT_16',
'FLAG_DOCUMENT_17',
'FLAG_DOCUMENT_18',
'FLAG_DOCUMENT_19',
'FLAG_DOCUMENT_2',
'FLAG_DOCUMENT_20',
'FLAG_DOCUMENT_21',
'FLAG_DOCUMENT_3',
'FLAG_DOCUMENT_4',
'FLAG_DOCUMENT_5',
'FLAG_DOCUMENT_6',
'FLAG_DOCUMENT_7',
'FLAG_DOCUMENT_8',
'FLAG_DOCUMENT_9',
'FLAG_EMAIL',
'FLAG_EMP_PHONE',
'FLAG_MOBIL',
'FLAG_OWN_CAR',
'FLAG_OWN_REALTY',
'FLAG_PHONE',
'FLAG_WORK_PHONE',
'FLOORSMAX_AVG',
'FLOORSMAX_MEDI',
'FLOORSMAX_MODE',
'FLOORSMIN_AVG',
'FLOORSMIN_MEDI',
'FLOORSMIN_MODE',
'FONDKAPREMONT_MODE',
'HOUR_APPR_PROCESS_START',
'HOUSETYPE_MODE',
'LANDAREA_AVG',
'LANDAREA_MEDI',
'LANDAREA_MODE',
'LIVE_CITY_NOT_WORK_CITY',
'LIVE_REGION_NOT_WORK_REGION',
'LIVINGAPARTMENTS_AVG',
'LIVINGAPARTMENTS_MEDI',
'LIVINGAPARTMENTS_MODE',
'LIVINGAREA_AVG',
'LIVINGAREA_MEDI',
'LIVINGAREA_MODE',
'NAME_CONTRACT_TYPE',
'NAME_EDUCATION_TYPE',
'NAME_FAMILY_STATUS',
'NAME_HOUSING_TYPE',
'NAME_INCOME_TYPE',
'NAME_TYPE_SUITE',
'NONLIVINGAPARTMENTS_AVG',
'NONLIVINGAPARTMENTS_MEDI',
'NONLIVINGAPARTMENTS_MODE',
'NONLIVINGAREA_AVG',
'NONLIVINGAREA_MEDI',
'NONLIVINGAREA_MODE',
'OBS_30_CNT_SOCIAL_CIRCLE',
'OBS_60_CNT_SOCIAL_CIRCLE',
'OCCUPATION_TYPE',
'ORGANIZATION_TYPE',
'OWN_CAR_AGE',
'REGION_POPULATION_RELATIVE',
'REGION_RATING_CLIENT',
'REGION_RATING_CLIENT_W_CITY',
'REG_CITY_NOT_LIVE_CITY',
'REG_CITY_NOT_WORK_CITY',
'REG_REGION_NOT_LIVE_REGION',
'REG_REGION_NOT_WORK_REGION',
'SK_ID_CURR',
'TARGET',
'TOTALAREA_MODE',
'WALLSMATERIAL_MODE',
'WEEKDAY_APPR_PROCESS_START',
'YEARS_BEGINEXPLUATATION_AVG',
'YEARS_BEGINEXPLUATATION_MEDI',
'YEARS_BEGINEXPLUATATION_MODE',
'YEARS_BUILD_AVG',
'YEARS_BUILD_MEDI',
'YEARS_BUILD_MODE',
'APPS_EXT_SOURCE_MEAN',
'APPS_EXT_SOURCE_STD',
'APPS_ANNUITY_CREDIT_RATIO',
'APPS_GOODS_CREDIT_RATIO',
'APPS_ANNUITY_INCOME_RATIO',
'APPS_CREDIT_INCOME_RATIO',
'APPS_GOODS_INCOME_RATIO',
'APPS_CNT_FAM_INCOME_RATIO',
'APPS_EMPLOYED_BIRTH_RATIO',
'APPS_INCOME_EMPLOYED_RATIO',
'APPS_INCOME_BIRTH_RATIO',
'APPS_CAR_BIRTH_RATIO',
'APPS_CAR_EMPLOYED_RATIO',
'PREV_SK_ID_CURR_COUNT',
'PREV_AMT_CREDIT_MEAN',
'PREV_AMT_CREDIT_MAX',
'PREV_AMT_CREDIT_SUM',
'PREV_AMT_ANNUITY_MEAN',
'PREV_AMT_ANNUITY_MAX',
'PREV_AMT_ANNUITY_SUM',
'PREV_AMT_APPLICATION_MEAN',
'PREV_AMT_APPLICATION_MAX',
'PREV_AMT_APPLICATION_SUM',
'PREV_AMT_DOWN_PAYMENT_MEAN',
'PREV_AMT_DOWN_PAYMENT_MAX',
'PREV_AMT_DOWN_PAYMENT_SUM',
'PREV_AMT_GOODS_PRICE_MEAN',
'PREV_AMT_GOODS_PRICE_MAX',
'PREV_AMT_GOODS_PRICE_SUM',
'PREV_RATE_DOWN_PAYMENT_MIN',
'PREV_RATE_DOWN_PAYMENT_MAX',
'PREV_RATE_DOWN_PAYMENT_MEAN',
'PREV_DAYS_DECISION_MIN',
'PREV_DAYS_DECISION_MAX',
'PREV_DAYS_DECISION_MEAN',
'PREV_CNT_PAYMENT_MEAN',
'PREV_CNT_PAYMENT_SUM',
'PREV_PREV_CREDIT_DIFF_MEAN',
'PREV_PREV_CREDIT_DIFF_MAX',
'PREV_PREV_CREDIT_DIFF_SUM',
'PREV_PREV_CREDIT_APPL_RATIO_MEAN',
'PREV_PREV_CREDIT_APPL_RATIO_MAX',
'PREV_PREV_GOODS_DIFF_MEAN',
'PREV_PREV_GOODS_DIFF_MAX',
'PREV_PREV_GOODS_DIFF_SUM',
'PREV_PREV_GOODS_APPL_RATIO_MEAN',
'PREV_PREV_GOODS_APPL_RATIO_MAX',
'PREV_PREV_DAYS_LAST_DUE_DIFF_MEAN',
'PREV_PREV_DAYS_LAST_DUE_DIFF_MAX',
'PREV_PREV_DAYS_LAST_DUE_DIFF_SUM',
'PREV_PREV_INTERESTS_RATE_MEAN',
'PREV_PREV_INTERESTS_RATE_MAX',
'PREV_REFUSED_RATIO',
'PREV_APPROVED_RATIO']
from sklearn.model_selection import train_test_split
ftr_app = apps_all_train.drop(['SK_ID_CURR', 'TARGET'], axis=1)
target_app = apps_all_train['TARGET']
train_x, valid_x, train_y, valid_y = train_test_split(ftr_app, target_app, test_size=0.3, random_state=2020)
train_x.shape, valid_x.shape
((215257, 174), (92254, 174))
from lightgbm import LGBMClassifier
clf = LGBMClassifier(
n_jobs=-1,
n_estimators=1000,
learning_rate=0.02,
num_leaves=32,
subsample=0.8,
max_depth=12,
silent=-1,
verbose=-1
)
clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric= 'auc', verbose= 100,
early_stopping_rounds= 50)
Training until validation scores don't improve for 50 rounds
[100] training's auc: 0.76611 training's binary_logloss: 0.245964 valid_1's auc: 0.753701 valid_1's binary_logloss: 0.248684
[200] training's auc: 0.789025 training's binary_logloss: 0.237779 valid_1's auc: 0.766044 valid_1's binary_logloss: 0.244034
[300] training's auc: 0.804705 training's binary_logloss: 0.232402 valid_1's auc: 0.770975 valid_1's binary_logloss: 0.242307
[400] training's auc: 0.81739 training's binary_logloss: 0.228102 valid_1's auc: 0.773264 valid_1's binary_logloss: 0.241518
[500] training's auc: 0.828598 training's binary_logloss: 0.224327 valid_1's auc: 0.773917 valid_1's binary_logloss: 0.241253
[600] training's auc: 0.839337 training's binary_logloss: 0.220751 valid_1's auc: 0.77427 valid_1's binary_logloss: 0.241111
[700] training's auc: 0.848905 training's binary_logloss: 0.217397 valid_1's auc: 0.774436 valid_1's binary_logloss: 0.241006
[800] training's auc: 0.857485 training's binary_logloss: 0.214237 valid_1's auc: 0.774647 valid_1's binary_logloss: 0.240922
[900] training's auc: 0.866027 training's binary_logloss: 0.211076 valid_1's auc: 0.774776 valid_1's binary_logloss: 0.24087
Early stopping, best iteration is:
[853] training's auc: 0.862238 training's binary_logloss: 0.212526 valid_1's auc: 0.774799 valid_1's binary_logloss: 0.240865
LGBMClassifier(learning_rate=0.02, max_depth=12, n_estimators=1000,
num_leaves=32, silent=-1, subsample=0.8, verbose=-1)
from lightgbm import plot_importance
plot_importance(clf, figsize=(16, 32))

preds = clf.predict_proba(apps_all_test.drop('SK_ID_CURR', axis=1))[:, 1 ]
apps_all_test['TARGET'] = preds
apps_all_test[['SK_ID_CURR', 'TARGET']].to_csv('prev_baseline_01.csv', index=False)
| 차수 | Private Score | Public Score | 설명 |
|---|---|---|---|
| 1차 | 0.74088 | 0.74448 | application 데이터 세트 기본 preprocessing |
| 2차 | 0.75458 | 0.75882 | application 데이터 세트 Feature Engineering |
| 3차 | 0.76396 | 0.77579 | previous 데이터 세트 Feature Engineering |
from sklearn.model_selection import train_test_split
from lightgbm import LGBMClassifier
# 신청금액과 실제 대출액, 상품금액과의 차이 비율, 만기일 차이 비교, 이자율 계산등의 주요 컬럼 가공 생산.
def get_prev_processed(prev):
# 대출 신청 금액과 실제 대출액/대출 상품금액 차이 및 비율
prev['PREV_CREDIT_DIFF'] = prev['AMT_APPLICATION'] - prev['AMT_CREDIT']
prev['PREV_GOODS_DIFF'] = prev['AMT_APPLICATION'] - prev['AMT_GOODS_PRICE']
prev['PREV_CREDIT_APPL_RATIO'] = prev['AMT_CREDIT']/prev['AMT_APPLICATION']
# prev['PREV_ANNUITY_APPL_RATIO'] = prev['AMT_ANNUITY']/prev['AMT_APPLICATION']
prev['PREV_GOODS_APPL_RATIO'] = prev['AMT_GOODS_PRICE']/prev['AMT_APPLICATION']
prev['DAYS_FIRST_DRAWING'].replace(365243, np.nan, inplace= True)
prev['DAYS_FIRST_DUE'].replace(365243, np.nan, inplace= True)
prev['DAYS_LAST_DUE_1ST_VERSION'].replace(365243, np.nan, inplace= True)
prev['DAYS_LAST_DUE'].replace(365243, np.nan, inplace= True)
prev['DAYS_TERMINATION'].replace(365243, np.nan, inplace= True)
# 첫번째 만기일과 마지막 만기일까지의 기간
prev['PREV_DAYS_LAST_DUE_DIFF'] = prev['DAYS_LAST_DUE_1ST_VERSION'] - prev['DAYS_LAST_DUE']
# 매월 납부 금액과 납부 횟수 곱해서 전체 납부 금액 구함.
all_pay = prev['AMT_ANNUITY'] * prev['CNT_PAYMENT']
# 전체 납부 금액 대비 AMT_CREDIT 비율을 구하고 여기에 다시 납부횟수로 나누어서 이자율 계산.
prev['PREV_INTERESTS_RATE'] = (all_pay/prev['AMT_CREDIT'] - 1)/prev['CNT_PAYMENT']
return prev
# 기존 컬럼및 위에서 가공된 신규 컬럼들에 대해서 SK_ID_CURR 레벨로 Aggregation 수행.
def get_prev_amt_agg(prev):
agg_dict = {
# 기존 주요 컬럼들을 SK_ID_CURR 레벨로 Aggregation 수행. .
'SK_ID_CURR':['count'],
'AMT_CREDIT':['mean', 'max', 'sum'],
'AMT_ANNUITY':['mean', 'max', 'sum'],
'AMT_APPLICATION':['mean', 'max', 'sum'],
'AMT_DOWN_PAYMENT':['mean', 'max', 'sum'],
'AMT_GOODS_PRICE':['mean', 'max', 'sum'],
'RATE_DOWN_PAYMENT': ['min', 'max', 'mean'],
'DAYS_DECISION': ['min', 'max', 'mean'],
'CNT_PAYMENT': ['mean', 'sum'],
# 신규 가공 컬럼들을 SK_ID_CURR 레벨로 Aggregation 수행. .
'PREV_CREDIT_DIFF':['mean', 'max', 'sum'],
'PREV_CREDIT_APPL_RATIO':['mean', 'max'],
'PREV_GOODS_DIFF':['mean', 'max', 'sum'],
'PREV_GOODS_APPL_RATIO':['mean', 'max'],
'PREV_DAYS_LAST_DUE_DIFF':['mean', 'max', 'sum'],
'PREV_INTERESTS_RATE':['mean', 'max']
}
prev_group = prev.groupby('SK_ID_CURR')
prev_amt_agg = prev_group.agg(agg_dict)
# multi index 컬럼을 '_'로 연결하여 컬럼명 변경
prev_amt_agg.columns = ["PREV_"+ "_".join(x).upper() for x in prev_amt_agg.columns.ravel()]
# 'SK_ID_CURR'로 조인하기 위해 SK_ID_CURR을 컬럼으로 변환
prev_amt_agg = prev_amt_agg.reset_index()
return prev_amt_agg
# NAME_CONTRACT_STATUS의 SK_ID_CURR별 Approved, Refused의 건수 계산.
def get_prev_refused_appr_agg(prev):
# 원래 groupby 컬럼 + 세부 기준 컬럼으로 groupby 수행. 세분화된 레벨로 aggregation 수행 한 뒤에 unstack()으로 컬럼레벨로 변형.
prev_refused_appr_group = prev[prev['NAME_CONTRACT_STATUS'].isin(['Approved', 'Refused'])].groupby([ 'SK_ID_CURR', 'NAME_CONTRACT_STATUS'])
prev_refused_appr_agg = prev_refused_appr_group['SK_ID_CURR'].count().unstack()
# 컬럼명 변경.
prev_refused_appr_agg.columns = ['PREV_APPROVED_COUNT', 'PREV_REFUSED_COUNT' ]
# NaN값은 모두 0으로 변경.
prev_refused_appr_agg = prev_refused_appr_agg.fillna(0)
# 'SK_ID_CURR'로 조인하기 위해 SK_ID_CURR을 컬럼으로 변환
prev_refused_appr_agg = prev_refused_appr_agg.reset_index()
return prev_refused_appr_agg
# 앞에서 구한 prev_amt_agg와 prev_refused_appr_agg를 조인하고 SK_ID_CURR별 APPROVED_COUNT 및 REFUSED_COUNT 비율 생성
def get_prev_agg(prev):
prev = get_prev_processed(prev)
prev_amt_agg = get_prev_amt_agg(prev)
prev_refused_appr_agg = get_prev_refused_appr_agg(prev)
# prev_amt_agg와 조인.
prev_agg = prev_amt_agg.merge(prev_refused_appr_agg, on='SK_ID_CURR', how='left')
# SK_ID_CURR별 과거 대출건수 대비 APPROVED_COUNT 및 REFUSED_COUNT 비율 생성.
prev_agg['PREV_REFUSED_RATIO'] = prev_agg['PREV_REFUSED_COUNT']/prev_agg['PREV_SK_ID_CURR_COUNT']
prev_agg['PREV_APPROVED_RATIO'] = prev_agg['PREV_APPROVED_COUNT']/prev_agg['PREV_SK_ID_CURR_COUNT']
# 'PREV_REFUSED_COUNT', 'PREV_APPROVED_COUNT' 컬럼 drop
prev_agg = prev_agg.drop(['PREV_REFUSED_COUNT', 'PREV_APPROVED_COUNT'], axis=1)
return prev_agg
# apps와 previous 데이터 세트를 SK_ID_CURR레벨로 다양한 컬럼이 aggregation되어 있는 prev_agg 조인
def get_apps_all_with_prev_agg(apps, prev):
apps_all = get_apps_processed(apps)
prev_agg = get_prev_agg(prev)
print('prev_agg shape:', prev_agg.shape)
print('apps_all before merge shape:', apps_all.shape)
apps_all = apps_all.merge(prev_agg, on='SK_ID_CURR', how='left')
print('apps_all after merge with prev_agg shape:', apps_all.shape)
return apps_all
# Label 인코딩 수행.
def get_apps_all_encoded(apps_all):
object_columns = apps_all.dtypes[apps_all.dtypes == 'object'].index.tolist()
for column in object_columns:
apps_all[column] = pd.factorize(apps_all[column])[0]
return apps_all
# 학습 데이터와 테스트 데이터 세트 분리.
def get_apps_all_train_test(apps_all):
apps_all_train = apps_all[~apps_all['TARGET'].isnull()]
apps_all_test = apps_all[apps_all['TARGET'].isnull()]
apps_all_test = apps_all_test.drop('TARGET', axis=1)
return apps_all_train, apps_all_test
# 학습 수행.
def train_apps_all(apps_all_train):
ftr_app = apps_all_train.drop(['SK_ID_CURR', 'TARGET'], axis=1)
target_app = apps_all_train['TARGET']
train_x, valid_x, train_y, valid_y = train_test_split(ftr_app, target_app, test_size=0.3, random_state=2020)
print('train shape:', train_x.shape, 'valid shape:', valid_x.shape)
clf = LGBMClassifier(
n_jobs=-1,
n_estimators=1000,
learning_rate=0.02,
num_leaves=32,
subsample=0.8,
max_depth=12,
silent=-1,
verbose=-1
)
clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric= 'auc', verbose= 100,
early_stopping_rounds= 100)
return clf
def get_dataset():
app_train = pd.read_csv('application_train.csv')
app_test = pd.read_csv('application_test.csv')
apps = pd.concat([app_train, app_test])
prev = pd.read_csv('previous_application.csv')
return apps, prev
apps, prev = get_dataset()
apps_all = get_apps_all_with_prev_agg(apps, prev)
apps_all = get_apps_all_encoded(apps_all)
apps_all_train, apps_all_test = get_apps_all_train_test(apps_all)
clf = train_apps_all(apps_all_train)
prev_agg shape: (338857, 42)
apps_all before merge shape: (356255, 135)
apps_all after merge with prev_agg shape: (356255, 176)
train shape: (215257, 174) valid shape: (92254, 174)
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.76611 training's binary_logloss: 0.245964 valid_1's auc: 0.753701 valid_1's binary_logloss: 0.248684
[200] training's auc: 0.789025 training's binary_logloss: 0.237779 valid_1's auc: 0.766044 valid_1's binary_logloss: 0.244034
[300] training's auc: 0.804705 training's binary_logloss: 0.232402 valid_1's auc: 0.770975 valid_1's binary_logloss: 0.242307
[400] training's auc: 0.81739 training's binary_logloss: 0.228102 valid_1's auc: 0.773264 valid_1's binary_logloss: 0.241518
[500] training's auc: 0.828598 training's binary_logloss: 0.224327 valid_1's auc: 0.773917 valid_1's binary_logloss: 0.241253
[600] training's auc: 0.839337 training's binary_logloss: 0.220751 valid_1's auc: 0.77427 valid_1's binary_logloss: 0.241111
[700] training's auc: 0.848905 training's binary_logloss: 0.217397 valid_1's auc: 0.774436 valid_1's binary_logloss: 0.241006
[800] training's auc: 0.857485 training's binary_logloss: 0.214237 valid_1's auc: 0.774647 valid_1's binary_logloss: 0.240922
[900] training's auc: 0.866027 training's binary_logloss: 0.211076 valid_1's auc: 0.774776 valid_1's binary_logloss: 0.24087
[1000] training's auc: 0.873512 training's binary_logloss: 0.208114 valid_1's auc: 0.774799 valid_1's binary_logloss: 0.240861
Did not meet early stopping. Best iteration is:
[1000] training's auc: 0.873512 training's binary_logloss: 0.208114 valid_1's auc: 0.774799 valid_1's binary_logloss: 0.240861
preds = clf.predict_proba(apps_all_test.drop(['SK_ID_CURR'], axis=1))[:, 1 ]
apps_all_test['TARGET'] = preds
apps_all_test[['SK_ID_CURR', 'TARGET']].to_csv('prev_baseline_02.csv', index=False)
| 차수 | Private Score | Public Score | 설명 |
|---|---|---|---|
| 1차 | 0.74088 | 0.74448 | application 데이터 세트 기본 preprocessing |
| 2차 | 0.75458 | 0.75882 | application 데이터 세트 Feature Engineering |
| 3차 | 0.76396 | 0.77579 | previous 데이터 세트 Feature Engineering |
| 4차 | 0.76420 | 0.77583 | previous 데이터 세트 Feature Engineering 함수화 |
Source: inflearn / 캐글 Advanced 머신러닝 실전 박치기