사랑스러운 동물들의 유토피아, 주토피아
학습 데이터 준비하고 전처리하기
from google.colab import drive
drive.mount('/content/drive')
import tensorflow as tf
from keras import models
from keras import layers
# 주민 데이터 세트 준비
import pandas as pd
zootopia_citizen = pd.read_csv('./zootopia_citizen.csv', index_col=0)
print(zootopia_citizen.columns)
zootopia_citizen
# 데이터 타입 확인
print(zootopia_citizen.dtypes)
# 데이터 타입 변환
zootopia_citizen['suitable_for_partners'] = zootopia_citizen['suitable_for_partners'].astype('category')
print(zootopia_citizen.dtypes)
zootopia_citizen['suitable_for_partners']
zootopia_citizen
# 원핫인코딩
encoding = pd.get_dummies(zootopia_citizen)
print(encoding.columns)
encoding
# 변수 선언
tendency=encoding[['potential_power','importance','prejudice','morality','sociability','desire_for_power']]
suitability=encoding[['suitable_for_partners_bad','suitable_for_partners_good','suitable_for_partners_very_good']]
print(tendency.shape, suitability.shape)
tendency
suitability
# 학습 데이터와 평가 데이터 분리
from sklearn.model_selection import train_test_split
tendency_train, tendency_test, suitability_train, suitability_test = train_test_split(tendency,
suitability,
shuffle=True,
stratify=suitability,
test_size=0.3)
print(tendency_train.shape, suitability_train.shape)
print(tendency_test.shape, suitability_test.shape)
batch_size = 4
train_dataset = tf.data.Dataset.from_tensor_slices((#TODO, #TODO))
train_dataset = train_dataset.shuffle(10000).repeat().batch(#TODO)
test_dataset = tf.data.Dataset.from_tensor_slices((#TODO, #TODO))
test_dataset = test_dataset.batch(#TODO)
# 모델 구조 생성
model = tf.keras.Sequential([
#TODO
])
model.compile(optimizer=#TODO,
loss=#TODO,
metrics=['accuracy'])
# 모델 학습
train=model.fit(#TODO)
# 모델 딕셔너리 확인
print(train.history.keys())
# loss 그래프 그리기
import matplotlib.pyplot as plt
plt.plot(train.history['loss'])
plt.plot(train.history['val_loss'])
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','test'])
plt.show()
# accuracy 그래프 그리기
plt.plot(train.history['accuracy'])
plt.plot(train.history['val_accuracy'])
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train','test'])
plt.show()
# 모델 정확도 확인
model.evaluate(tendency_test,suitability_test)
# 모델 성능 확인
print(model.predict(tendency_test[-5:]))
suitability_test[-5:]
# 동료 데이터 세트 준비
zootopia_partner = pd.read_csv('./zootopia_partner.csv', index_col=0)
print(zootopia_partner.columns)
zootopia_partner
# 변수 선언
tendency_final=zootopia_partner[['potential_power','importance','prejudice','morality','sociability','desire_for_power']]
suitability_final=zootopia_partner[['suitable_for_partners']][:]
print(tendency_final.shape, suitability_final.shape)
# 동료 적합성 확인
prediction=model.predict(tendency_final)
prediction
# 동료 적합성 출력
import numpy as np
for i in range(7):
result=np.argmax(prediction[i])
if result==0:
result_val='bad'
if result==1:
result_val='good'
if result==2:
result_val='very_good'
suitability_final[i:]=result_val
suitability_final