저번시간 끝까지 다루지 못했던 신경망 모델 훈련도 작성해보고, 우리가 하려는 프로젝트에 맞는(kaggle 에 있는) 코드도 작성해보고자 한다.
4가지 정도를 따라해봤는데, 그 중 상위 2가지만 작성해보려 한다.
model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'])


import pandas as pd
df = pd.read_csv("farm/kaggle/input/students-performance-in-exams/StudentsPerformance.csv")
df.info(); df.describe()
import seaborn as sns, matplotlib.pyplot as plt
sns.countplot(x='gender', data=df)
sns.boxplot(x='test_preparation_course', y='math_score', data=df)
sns.heatmap(df.corr(), annot=True)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['gender'] = le.fit_transform(df['gender'])
df = pd.get_dummies(df, drop_first=True)
df['average_score'] = df[['math_score','reading_score','writing_score']].mean(axis=1)
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier
X = df.drop('average_score', axis=1)
y = df['average_score']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
lr = LinearRegression().fit(X_train, y_train)
rf = RandomForestClassifier().fit(X_train, y_train.round()) # 분류형 예시
import pandas as pd
df = pd.read_csv("…Student-Performance-Multiple-Linear-Regression.csv")
df.info(); df.describe()
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)