import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
titanic_df = pd.read_csv('titanic_train.csv')
titanic_df.head(2)
titanic_df.info()
titanic_df['Age'].fillna(titanic_df['Age'].mean(), inplace=True)
titanic_df['Cabin'].fillna('N',inplace=True)
titanic_df['Embarked'].fillna('N',inplace=True) #그냥 N이라는 문자열을 넣어줌
titanic_df.isnull().sum() #nan값 Ture인지 False인지 확인
titanic_df.isnull().sum().sum()
titanic_df['Sex'].value_counts()
titanic_df['Cabin'].value_counts()
titanic_df['Embarked'].value_counts()
titanic_df['Cabin'] = titanic_df['Cabin'].str[:1]
titanic_df.groupby(['Sex','Survived'])['Survived'].count()
sns.barplot(x='Sex', y='Survived', data=titanic_df)
plt.figure(figsize=(10,6))
sns.barplot(x='Pclass', y='Survived', hue='Sex', data=titanic_df)
titanic_df.columns
def get_category(age):
cat=''
if age<=-1: cat='Unknown'
elif age <= 5: cat='Baby'
elif age <= 12: cat='Child'
elif age <= 18: cat='Teenager'
elif age <= 25: cat='Student'
elif age <= 35: cat='Young Adult'
elif age <= 60: cat='Adult'
else:cat='Elderly'
return cat
titanic_df['Age_cat'] = titanic_df['Age'].apply(get_category)
titanic_df.head(1)
plt.figure(figsize=(10,5))
group_names=['Unknown','Baby','Child','Teenager','Student','Young Adult', 'Adult', 'Elderly']
sns.barplot(x='Age_cat', y='Survived', hue='Sex', data=titanic_df, order=group_names)
from sklearn.preprocessing import LabelEncoder
def encode_features(dataDF):
features=['Sex','Cabin','Embarked']
for feature in features:
le=LabelEncoder()
le=le.fit(dataDF[feature])
dataDF[feature]=le.transform(dataDF[feature])
return dataDF
titanic_df = encode_features(titanic_df)
titanic_df.head(1)
def fillna(df):
df['Age'].fillna(df['Age'].mean(), inplace=True)
df['Cabin'].fillna('N',inplace=True)
df['Embarked'].fillna('N',inplace=True)
df['Fare'].fillna(0, inplace=True)
return df
def drop_features(df):
df.drop(['PassengerId','Name','Ticket'],axis=1, inplace=True)
return df
def format_features(df):
df['Cabin'] = df['Cabin'].str[:1]
features=['Sex','Cabin','Embarked']
for feature in features:
le=LabelEncoder()
le=le.fit(df[feature])
df[feature]=le.transform(df[feature])
return df
def tranform_features(df):
df = fillna(df)
df = drop_features(df)
df = format_features(df)
return df
titanic_df = pd.read_csv('titanic_train.csv')
y_titanic_df = titanic_df['Survived']
x_titanic_df = titanic_df.drop('Survived', axis=1)
x_titanic_df = tranform_features(x_titanic_df)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X_titanic_df,y_titanic_df,test_size=0.2,random_state=121)
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
dt_clf = DecisionTreeClassifier(random_state=11)
rf_clf = RandomForestClassifier(random_state=11)
lr_clf = LogisticRegression()
dt_clf.fit(X_train, y_train)
dt_pred = dt_clf.predict(X_test)
print('dt 정확도:',accuracy_score(y_test, dt_pred))
rf_clf.fit(X_train, y_train)
rf_pred = dt_clf.predict(X_test)
print('rf 정확도:',accuracy_score(y_test, rf_pred))
lr_clf.fit(X_train, y_train)
lr_pred = dt_clf.predict(X_test)
print('lr 정확도:',accuracy_score(y_test, lr_pred))
from sklearn.model_selection import KFold
def exec_kfold(clf, folds=5):
kfold = KFold(n_splits=folds,shuffle=True)
scores=[]
for iter_count,(train_index, test_index) in enumerate(kfold.split(X_titanic_df)):
X_train, X_test = X_titanic_df.values[train_index],X_titanic_df.values[test_index]
y_train, y_test = y_titanic_df.values[train_index],y_titanic_df.values[test_index]
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
accuracy = accuracy_score(y_test,predictions)
scores.append(accuracy)
print(f'교차검증 {iter_count} 정확도:{accuracy:.4f}')
mean_score = np.mean(scores)
print(f'평균정확도 : {mean_score:.4f}')
exec_kfold(dt_clf)
exec_kfold(rf_clf)
exec_kfold(lr_clf)
from sklearn.model_selection import cross_val_score
scores = cross_val_score(dt_clf,X_titanic_df,y_titanic_df,cv=5) #cv값 나누는 것//디폴트값이 5개
print(scores)
print(np.mean(scores))
scores = cross_val_score(rf_clf,X_titanic_df,y_titanic_df,cv=5)
print(scores)
print(np.mean(scores))
scores = cross_val_score(lr_clf,X_titanic_df,y_titanic_df,cv=5)
print(scores)
print(np.mean(scores))
from sklearn.model_selection import GridSearchCV
pram = {'max_depth':[2,3,4,10],
'min_samples_split':[2,3,5],
'min_samples_leaf':[1,5,8]}
grid_dclf = GridSearchCV(dt_clf,param_grid=pram,scoring='accuracy', cv=5)
grid_dclf.fit(X_train,y_train)
print(grid_dclf.best_params_)
print(grid_dclf.best_score_)
best_dclf = grid_dclf.best_estimator_
pred = best_dclf.predict(X_test)
accuracy_score(y_test,pred)
from sklearn.base import BaseEstimator
class MyDummyClassifier(BaseEstimator):
def fit(self,X,y=None):
pass
def predict(self,X):
pred = np.zeros((X.shape[0],1))
for i in range(X.shape[0]):
if X['Sex'].iloc[i]==1: #남자
pred[i] = 0 #사망
else:
pred[i] = 1 #생존
return pred
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import numpy as np
def fillna(df):
df['Age'].fillna(df['Age'].mean(),inplace=True)
df['Cabin'].fillna('N',inplace=True)
df['Embarked'].fillna('N',inplace=True)
df['Fare'].fillna(0,inplace=True)
return df
def drop_features(df):
df.drop(['PassengerId','Name','Ticket'],axis=1,inplace=True)
return df
def format_features(df):
df['Cabin'] = df['Cabin'].str[:1]
features=['Sex','Cabin','Embarked']
for feature in features:
le=LabelEncoder()
le=le.fit(df[feature])
df[feature]=le.transform(df[feature])
return df
def tranform_features(df):
df = fillna(df)
df = drop_features(df)
df = format_features(df)
return df
titanic_df = pd.read_csv('titanic_train.csv')
y_titanic_df = titanic_df['Survived']
X_titanic_df = titanic_df.drop('Survived',axis=1)
X_titanic_df = tranform_features(X_titanic_df)
X_train,X_test,y_train,y_test = train_test_split(X_titanic_df,y_titanic_df,test_size=0.2,random_state=0)
myclf = MyDummyClassifier()
myclf.fit(X_train,y_train)
pred = myclf.predict(X_test)
accuracy_score(y_test,pred)
from sklearn.datasets import load_digits
class MyFakeClassifier(BaseEstimator):
def fit(self,X,y):
pass
def predict(self,X):
return np.zeros((len(X),1),dtype=bool)
digits = load_digits()
digits
y = (digits.target==7).astype(int)
y
X_train,X_test,y_train,y_test=train_test_split(digits.data,y,random_state=11)
y_test.shape
pd.Series(y_test).value_counts()
fakeclf = MyFakeClassifier()
fakeclf.fit(X_train,y_train)
pred = fakeclf.predict(X_test)
accuracy_score(y_test,pred)
from sklearn.metrics import confusion_matrix #혼동행렬
confusion_matrix(y_test,pred)
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
def get_clf_eval(y_test,pred):
confusion = confusion_matrix(y_test, pred)
accuracy = accuracy_score(y_test,pred)
precision = precision_score(y_test,pred)
recall = recall_score(y_test,pred)
print('오차행렬')
print(confusion)
print(f'정확도:{accuracy:.4f} 정밀도:{precision:.4f} 재현율:{recall:.4f}' )
titanic_df = pd.read_csv('titanic_train.csv')
y_titanic_df = titanic_df['Survived']
X_titanic_df = titanic_df.drop('Survived',axis=1)
X_titanic_df = tranform_features(X_titanic_df)
X_train,X_test,y_train,y_test = train_test_split(X_titanic_df,y_titanic_df,test_size=0.2,random_state=0)
from sklearn.linear_model import LogisticRegression
lr_clf = LogisticRegression()
lr_clf.fit(X_train, y_train)
pred=lr_clf.predict(X_test)
get_clf_eval(y_test,pred)
fakeclf = MyFakeClassifier()
fakeclf.fit(X_train,y_train)
pred = fakeclf.predict(X_test)
get_clf_eval(y_test,pred)
pred
pred_proba = lr_clf.predict_proba(X_test)
pred_proba
pred = lr_clf.predict(X_test)
pred_proba.shape
pred.shape
pred_proba[:3]
pred_proba_result = np.concatenate([pred_proba,pred.reshape(-1,1)],axis=1) #reshape(-1,-1)의미는 행,열을 각각 알아서 재배열 해준다는 소리다.
pred_proba_result[:5]
from sklearn.preprocessing import Binarizer
X = [[1,-1,2],
[2,0,0],
[0,1.1,1.2]]
binarizer = Binarizer(threshold=1.1)
binarizer.fit_transform(X)
custom_threshold = 0.5
pred_proba_1 = pred_proba[:,1].reshape(-1,1) #행은 알아서하고 열을 하나
custom_predict = Binarizer(threshold=custom_threshold).fit_transform(pred_proba_1)
get_clf_eval(y_test,custom_predict)
custom_threshold = 0.5
pred_proba_1 = pred_proba[:,1].reshape(-1,1) #행은 알아서하고 열을 하나
custom_predict = Binarizer(threshold=custom_threshold).fit_transform(pred_proba_1)
get_clf_eval(y_test,custom_predict)
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784')
type(mnist)
mnist
mnist.data.shape
mnist.target.shape
type(mnist.data)
type(mnist.target)
mnist 데이터 분리하세요 (학습용 90%,검증용 10%)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(mnist.data,mnist.target,test_size=0.1)
y_train 의 값의 빈도를 출력(value_counts())
type(y_train)
y_train.value_counts()
RandomForest를 이용하여 학습 예측 평가하시오
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
clf = RandomForestClassifier()
clf.fit(X_train,y_train)
pred = clf.predict(X_test)
accuracy_score(y_test,pred)
X_train
import numpy as np
import matplotlib.pyplot as plt
n_test = len(X_test)
random_pick = np.random.randint(0,n_test,size=10)
random_pick
X_test.iloc[191]
figure=plt.figure(figsize=(12,5))
#figure.set_size_inches(12,5)
axes=[]
for i in range(1,11):
axes.append(figure.add_subplot(2,5,i))
#axes
tmp_list=[]
for i in range(10):
tmp = X_test.iloc[random_pick[i]]
#print(tmp)
tmp = np.array(tmp)
tmp = tmp.reshape(28,28)
tmp_list.append(tmp)
print(y_test.iloc[random_pick])
for i in range(10):
axes[i].matshow(tmp_list[i])
X_test.iloc[random_pick[i]].to_list()
import glob
from PIL import Image
for image_path in glob.glob('./data/*.png'):
#print(image_path)
img = Image.open(image_path).convert('L')
plt.imshow(img)
img = np.resize(img,(1,784))
img = 255-(img)
#print(img)
pred = clf.predict(img)
print(pred)
plt.show()
I am so happy to come across this piece of write up, very much advanced my understanding to the next top level. Great job and continue to do same. Rainbow Friends: Chapter 2
If you're looking for a shooting game to play with your friends, this is the game for you. 1v1 Battle is a well-known shooting game that provides interesting battles. Come and enjoy this fantastic game.