import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('/content/drive/MyDrive/연료 효율 예측/Fuel Consumption Ratings 2023.csv',encoding='latin-1')
df.shape
df.drop('Year' , axis = 1 , inplace = True)
df.isnull().sum()
df.dropna(axis = 0 , inplace=True)
# case1 = 범주형 변수
case1 = df.select_dtypes(include=('object')).columns.to_list()
# case2 = 연속형 변수
case2 = df.select_dtypes(exclude=('object')).columns.to_list()
def fuel_box_plot(targets,df,n,x,y):
plt.figure(figsize=(x,y))
plt.subplots_adjust(wspace=1, hspace=1)
for i in range(n):
plt.subplot(n,1,i+1)
sns.boxplot(x = targets[i] , data = df)
plt.title(f'boxplot of {targets[i]}')
plt.show()
def fuel_hist_plot(targets,df,n,x,y):
plt.figure(figsize = (x,y))
plt.subplots_adjust(wspace=1, hspace=1)
for i in range(n):
plt.subplot(n,1,i+1)
sns.histplot(x = df[targets[i]])
plt.title(f'histplot of {targets[i]}')
plt.show()
def fuel_count_plot(targets , df ,n, x,y):
plt.figure(figsize= (x,y))
plt.subplots_adjust(wspace=5, hspace=5)
for i in range(n):
plt.subplot(n,1,i+1)
sns.countplot(x = targets[i] , data = df)
plt.xticks(rotation = 90)
plt.title(f'countplot of {targets[i]}')
plt.show()
def fuel_bar_plot(targets , df , n,x,y):
plt.figure(figsize = (x,y))
plt.subplots_adjust(wspace=5, hspace=5)
for i in range(n):
plt.subplot(n,1,i+1)
sns.barplot(x = targets[i] ,y = 'Fuel Consumption (L/100Km)', data = df)
plt.xticks(rotation= 90)
plt.title(f'barplot of {targets[i]}')
plt.show()

df_temp[case2].corr()

target = 'Fuel Consumption (L/100Km)'
y = df_temp[target]
x = df_temp.drop(target , axis = 1 ,inplace = False)
from sklearn.model_selection import train_test_split
train_x , test_x ,train_y , test_y = train_test_split(x,y , test_size=0.2 , random_state = 42)
https://colab.research.google.com/drive/1sltQsDvKb4Xo7U8m2mGzyP1tj7tHWyzY?usp=sharing