[데이터 컬럼 설명 (데이터 파일명: A0007IT.json)]
[데이터 컬럼 설명 (데이터 파일명: signal.csv)]
import sklearn as sk
import pandas as pd
A0007IT.json
, signal.csv
, df_a
, df_b
df_a = pd.read_json('A0007IT.json')
df_b = pd.read_csv('signal.csv')
df=pd.merge(df_a,df_b,how="inner",on="RID")
# print(df.head())
# 사전실행코드
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
plt.rc('font', family='NanumGothicCoding')
Address1
import seaborn as sns
sns.countplot(data=df, x='Address1')
plt.show()
# df[df['Address1'] == '-'] # 3개
df = df[df['Address1'] != '-']
답안04 = 3
Time_Driving
, Speed_Per_Hour
sns.jointplot(data=df, x='Time_Driving',y='Speed_Per_Hour')
plt.show()
RID
, df_temp
df_temp = df.drop(['RID'], axis=1)
df_temp = df[df['Speed_Per_Hour'] < 300]
df_na
df_temp.isna().sum() # 2개
df_na = df_temp.dropna(axis=0)
답안07 = 2
Time_Departure
, Time_Arrival
, df_del
df_del = df_na.drop(['Time_Departure','Time_Arrival'], axis=1)
df_preset
cols = df_del.select_dtypes('object').columns
df_preset = pd.get_dummies(data=df_del, columns=cols)
Time_Driving
from sklearn.model_selection import train_test_split
x = df_preset.drop('Time_Driving', axis=1)
y = df_preset['Time_Driving']
X_train, X_valid, y_train, y_valid = train_test_split(x,y, test_size=0.2, random_state=42)
from sklearn.preprocessing import RobustScaler
robustScalar = RobustScaler()
X_train_robust = robustScalar.fit_transform(X_train)
X_valid_robust = robustScalar.transform(X_valid)
X_train = pd.DataFrame(X_train_robust, index=X_train.index, columns=X_train.columns)
X_valid = pd.DataFrame(X_valid_robust, index=X_valid.index, columns=X_valid.columns)
dt
, rf
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
dt = DecisionTreeRegressor(max_depth=5, min_samples_split=3, random_state=120)
dt.fit(X_train, y_train)
rf = RandomForestRegressor(max_depth=5, min_samples_split=3, random_state=120)
rf.fit(X_train, y_train)
y_pred_dt
, dt_mae
, y_pred_rf
, rf_mae
from sklearn.metrics import mean_absolute_error
y_pred_dt = dt.predict(X_valid)
dt_mae = mean_absolute_error(y_valid, y_pred_dt)
y_pred_rf = rf.predict(X_valid)
rf_mae = mean_absolute_error(y_valid, y_pred_rf)
# dt_mae, rf_mae
# >> 108.6652..., 66.4842...
답안12 = 'randomforest'
# 사전실행코드
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
tf.random.set_seed(1)
history
model = Sequential()
model.add(Dense(64, input_shape=(X_train.shape[1],), activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse', metrics='mse')
history = model.fit(X_train, y_train, epochs=30, batch_size=16, validation_data=(X_valid, y_valid))
plt.plot(history.history['mse'])
plt.plot(history.history['val_mse'])
plt.legend(['mse','val_mse'])
plt.title('Model MSE')
plt.xlabel('Epochs')
plt.ylabel('MSE')
plt.show()
저도 올해 구글 머신러닝 부트캠프 6기 나오면 지원하려고 글 찾다가 들어오게 되었습니다. 올해는 모집이 늦네요;; 혹시 여러 AI 자격증중에 AICE를 보시는 이유가 있으실까요?? 어떤 점이 나을까요?