■ 관련 내용 : (EDA) HW 02. 주유소 데이터 분석
data = pd.read_csv('Oil Price Analysis.csv', thousands = ',', encoding = 'utf-8', index_col = 0)
data
data.info()
data.sort_values(by = "gasolinePrice", ascending = False).head(10)
data.sort_values(by = "gasolinePrice").head(10)
data.sort_values(by = "dieselPrice", ascending = False).head(10)
data.sort_values(by = "dieselPrice").head(10)
# 한글 설정
import platform
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import rc, font_manager
%matplotlib inline
path = "C:/Windows/Fonts/malgun.ttf"
if platform.system() == "Darwin":
print("System On : MAC")
rc("font", family = "Arial Unicode MS")
elif platform.system() == "Windows":
font_name = font_manager.FontProperties(fname = path).get_name()
print("System On : Windows")
rc("font", family = font_name)
else:
print("Unknown System")
def ComparisonGasolinePrice():
plt.figure(figsize=(6, 6))
sns.boxplot(x = "oilSelf", y="gasolinePrice", data = data, palette = "Reds")
plt.title('Comparison Gasoline Price by Self', size = 15)
sns.despine()
plt.tight_layout()
plt.show()
ComparisonGasolinePrice()
def ComparisonGasolinePrice():
plt.figure(figsize=(6, 6))
sns.boxplot(x = "oilSelf", y="dieselPrice", data = data, palette = "Blues")
plt.title('Comparison Diesel Price by Self', size = 15)
sns.despine()
plt.tight_layout()
plt.show()
ComparisonGasolinePrice()
def ComparisonGasolinePriceWithBrand():
plt.figure(figsize=(12, 8))
sns.boxplot(x = "brand", y = "gasolinePrice", hue = "oilSelf", data = data, palette = "Reds")
plt.title('Comparison Gasoline Price by Self', size = 15)
sns.despine()
plt.tight_layout()
plt.show()
ComparisonGasolinePriceWithBrand()
def ComparisonDieselPriceWithBrand():
plt.figure(figsize=(12, 8))
sns.boxplot(x = "brand", y = "dieselPrice", hue = "oilSelf", data = data, palette = "Blues")
plt.title('Comparison Diesel Price by Self', size = 15)
sns.despine()
plt.tight_layout()
plt.show()
ComparisonDieselPriceWithBrand()
brand_Pivot = pd.pivot_table(data,
index = ['brand', 'oilSelf'],
values = ['gasolinePrice', 'dieselPrice',],
aggfunc = np.mean)
brand_Pivot['gasolinePrice'] = round(brand_Pivot['gasolinePrice'], 2)
brand_Pivot['dieselPrice'] = round(brand_Pivot['dieselPrice'], 2)
brand_Pivot
def brandGasolineBasedSelf():
plt.figure(figsize=(10, 6))
sns.barplot(x = 'brand', y = 'gasolinePrice', data = data, hue = 'oilSelf', palette = 'Reds')
plt.title('Each brand gasoline Price based on Self Service', size = 15)
sns.despine()
plt.tight_layout()
plt.show()
brandGasolineBasedSelf()
def brandDieselBasedSelf():
plt.figure(figsize=(10, 6))
sns.barplot(x = 'brand', y = 'dieselPrice', data = data, hue = 'oilSelf', palette = 'Blues')
plt.title('Each brand Diesel Price based on Self Service', size = 15)
sns.despine()
plt.tight_layout()
plt.show()
brandDieselBasedSelf()
pivotCnt = pd.pivot_table(data,
index = ['gu', 'oilSelf'],
values = ['address',],
aggfunc = 'count')
pivotCnt
Output | Output |
---|---|
# Module & lat, lon in Seoul
import folium
import json
geo_path='../data/02. skorea_municipalities_geo_simple.json'
geo_str=json.load(open(geo_path,encoding='utf-8'))
gu_Pivot = pd.pivot_table(data,
index = ['gu'],
values = ['gasolinePrice', 'dieselPrice'],
aggfunc = np.mean)
gu_Pivot
셀프 유/무에 따른 지도 시각화
- 셀프주유가 가능할 경우 파란색(Blue)으로 표시
- 셀프주유가 불가할 경우 빨간색(Red)으로 표시
# 가솔린 평균 가격 시각화
gasolineMap = folium.Map(location=[37.55, 126.98],
zoom_start = 12,
tiles="Stamen Toner"
)
folium.Choropleth(
geo_data = geo_str,
data = gu_Pivot,
columns = [gu_Pivot.index, 'gasolinePrice'],
key_on='feature.id',
fill_color='PuRd',
fill_opacity = 0.7,
line_opacity = 0.2,
#legend_name = legend
).add_to(gasolineMap)
for idx, rows in data.iterrows():
if rows['oilSelf'] == 'Y':
folium.CircleMarker(
location = [rows['lat'], rows['lon']],
radius = 12,
fill = True,
color ='Blue',
fill_color ='Blue',
).add_to(gasolineMap)
else:
folium.CircleMarker(
location = [rows['lat'], rows['lon']],
radius = 12,
fill= True,
color ='Red',
fill_color ='Red',
).add_to(gasolineMap)
gasolineMap
# 디젤 평균 가격 시각화
dieselMap = folium.Map(location=[37.55, 126.98],
zoom_start = 12,
tiles="Stamen Toner"
)
folium.Choropleth(
geo_data = geo_str,
data = gu_Pivot,
columns = [gu_Pivot.index, 'dieselPrice'],
key_on='feature.id',
fill_color='YlGnBu',
fill_opacity = 0.7,
line_opacity = 0.2,
).add_to(dieselMap)
for idx, rows in data.iterrows():
if rows['oilSelf'] == 'Y':
folium.CircleMarker(
location = [rows['lat'], rows['lon']],
radius = 12,
fill = True,
color ='Blue',
fill_color ='Blue',
).add_to(dieselMap)
else:
folium.CircleMarker(
location = [rows['lat'], rows['lon']],
radius = 12,
fill= True,
color ='Red',
fill_color ='Red',
).add_to(dieselMap)
dieselMap