Matplotlib์ Seaborn pip์ ์ด์ฉ ์ค์น
pip list | grep matplotlib
pip list | grep seaborn
๋ํ์ง๋ฅผ ํผ์น๊ณ ์ถ์ ๊ทธ๋ฆฌ๊ณ ๊ทธ ์์ ๋ฐ์ดํฐ๋ฅผ ๊ทธ๋ฆผ
import matplotlib.pyplot as plt
%matplotlib inline
# ๊ทธ๋ํ ๋ฐ์ดํฐ
subject = ['English', 'Math', 'Korean', 'Science', 'Computer']
points = [40, 90, 50, 60, 100]
# ์ถ ๊ทธ๋ฆฌ๊ธฐ
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
# ๊ทธ๋ํ ๊ทธ๋ฆฌ๊ธฐ
ax1.bar(subject, points)
# ๋ผ๋ฒจ, ํ์ดํ ๋ฌ๊ธฐ
plt.xlabel('Subject')
plt.ylabel('Points')
plt.title("Yuna's Test Result")
# ๋ณด์ฌ์ฃผ๊ธฐ
plt.savefig('./barplot.png') # ๊ทธ๋ํ๋ฅผ ์ด๋ฏธ์ง๋ก ์ถ๋ ฅ
plt.show() # ๊ทธ๋ํ๋ฅผ ํ๋ฉด์ผ๋ก ์ถ๋ ฅ
import ํ๊ธฐ
import matplotlib.pyplot as plt #๋ชจ๋์ ๋ถ๋ฌ์ค๊ณ
%matplotlib inline
# ๋งค์ง ๋ฉ์๋
# ๊ทธ๋ํ ๋ฐ์ดํฐ
subject = ['English', 'Math', 'Korean', 'Science', 'Computer']
points = [40, 90, 50, 60, 100]
# ์ถ ๊ทธ๋ฆฌ๊ธฐ
fig = plt.figure() #๋ํ์ง(๊ทธ๋ํ) ๊ฐ์ฒด ์์ฑ
ax1 = fig.add_subplot(1,1,1) #figure()๊ฐ์ฒด์ add_subplot ๋ฉ์๋๋ฅผ ์ด์ฉํด ์ถ์ ๊ทธ๋ ค์ค๋ค.
fig = plt.figure()
<Figure size 432x288 with 0 Axes>
fig = plt.figure(figsize=(5,2)) #figsize ์ธ์ ๊ฐ์ ์ฃผ์ด ๊ทธ๋ํ์ ํฌ๊ธฐ๋ฅผ ์ ํ ์ ์์
ax1 = fig.add_subplot(1,1,1) # (nrows, ncols, index)
fig = plt.figure()
ax1 = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)
ax3 = fig.add_subplot(2,2,4)
# ๊ทธ๋ํ ๋ฐ์ดํฐ
subject = ['English', 'Math', 'Korean', 'Science', 'Computer']
points = [40, 90, 50, 60, 100]
# ์ถ ๊ทธ๋ฆฌ๊ธฐ
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
# ๊ทธ๋ํ ๊ทธ๋ฆฌ๊ธฐ
ax1.bar(subject,points)
<BarContainer object of 5 artists>
$ ํ๋กฏ ํ ์คํธ $
fig = plt.figure()
ax1 = fig.add_subplot(3,3,1)
ax2 = fig.add_subplot(3,3,2)
ax3 = fig.add_subplot(3,3,3)
ax4 = fig.add_subplot(3,3,5)
x๋ผ๋ฒจ, y๋ผ๋ฒจ, ์ ๋ชฉ์ ์ถ๊ฐํ๊ธฐ ์ํด์๋
xlabel() ๋ฉ์๋์ ylabel() ๋ฉ์๋ title() ๋ฉ์๋๋ฅผ ์ด์ฉ
plt.xlabel('Subject')
plt.ylabel('Points')
plt.title("Yuna's Test Result")
Text(0.5, 1.0, "Yuna's Test Result")
# ๊ทธ๋ํ ๋ฐ์ดํฐ
subject = ['English', 'Math', 'Korean', 'Science', 'Computer']
points = [40, 90, 50, 60, 100]
# ์ถ ๊ทธ๋ฆฌ๊ธฐ
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
# ๊ทธ๋ํ ๊ทธ๋ฆฌ๊ธฐ
ax1.bar(subject, points)
# ๋ผ๋ฒจ, ํ์ดํ ๋ฌ๊ธฐ
plt.xlabel('Subject')
plt.ylabel('Points')
plt.title("Yuna's Test Result")
Text(0.5, 1.0, "Yuna's Test Result")
๊ณผ๊ฑฐ ์๋ง์กด ์ฃผ๊ฐ ๋ฐ์ดํฐ
AMZN
from datetime import datetime
import pandas as pd
import os
# ๊ทธ๋ํ ๋ฐ์ดํฐ
csv_path = os.getenv("HOME") + "/aiffel/data_visualization/data/AMZN.csv"
data = pd.read_csv(csv_path ,index_col=0, parse_dates=True)
price = data['Close']
# ์ถ ๊ทธ๋ฆฌ๊ธฐ ๋ฐ ์ขํ์ถ ์ค์
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
price.plot(ax=ax, style='black')
plt.ylim([1600,2200])
plt.xlim(['2019-05-01','2020-03-01'])
# ์ฃผ์๋ฌ๊ธฐ
important_data = [(datetime(2019, 6, 3), "Low Price"),(datetime(2020, 2, 19), "Peak Price")]
for d, label in important_data:
ax.annotate(label, xy=(d, price.asof(d)+10), # ์ฃผ์์ ๋ฌ ์ขํ(x,y)
xytext=(d,price.asof(d)+100), # ์ฃผ์ ํ
์คํธ๊ฐ ์์ฐจํ ์ขํ(x,y)
arrowprops=dict(facecolor='red')) # ํ์ดํ ์ถ๊ฐ ๋ฐ ์ ์ค์
# ๊ทธ๋ฆฌ๋, ํ์ดํ ๋ฌ๊ธฐ
plt.grid()
ax.set_title('StockPrice')
# ๋ณด์ฌ์ฃผ๊ธฐ
plt.show()
Pandas์ Series๋ ์ ๊ทธ๋ํ ๊ทธ๋ฆฌ๊ธฐ ์ต์ ํ
price = data['Close']๊ฐ ๋ฐ๋ก Pandas์ Series
price.plot(ax=ax, style='black')์์
Pandas์ plot์ ์ฌ์ฉํ๋ฉด์,
matplotlib์์ ์ ์ํ subplot ๊ณต๊ฐ ax๋ฅผ ์ฌ์ฉ
plt.xlim(), plt.ylim()์ ํตํด x, y ์ขํ์ถ์ ์ ๋นํ ๋ฒ์๋ฅผ ์ค์
๊ทธ๋ํ ์์ ์ถ๊ฐ์ ์ผ๋ก ๊ธ์๋ ํ์ดํ ๋ฑ ์ฃผ์์ ๊ทธ๋ฆด ๋๋ annotate() ๋ฉ์๋๋ฅผ ์ด์ฉ
grid() ๋ฉ์๋๋ฅผ ์ด์ฉํ๋ฉด ๊ทธ๋ฆฌ๋(๊ฒฉ์๋๊ธ)๋ฅผ ์ถ๊ฐ
๊ธฐ๋ณธ์ ์ผ๋ก
figure() ๊ฐ์ฒด๋ฅผ ์์ฑํ๊ณ add_subplot()์ผ๋ก ์๋ธํ๋กฏ์ ์์ฑํ๋ฉฐ plot์ ๊ทธ๋ฆผ
plt.plot()์ ์ธ์๋ก x ๋ฐ์ดํฐ, y ๋ฐ์ดํฐ, ๋ง์ปค ์ต์ , ์์ ๋ฑ์ ์ธ์๋ฅผ ์ด์ฉ
import numpy as np
x = np.linspace(0, 10, 100) #0์์ 10๊น์ง ๊ท ๋ฑํ ๊ฐ๊ฒฉ์ผ๋ก 100๊ฐ์ ์ซ์๋ฅผ ๋ง๋ค๋ผ๋ ๋ป์
๋๋ค.
plt.plot(x, np.sin(x),'o')
plt.plot(x, np.cos(x),'--', color='black')
plt.show()
์๋ธํ๋กฏ๋ plt.subplot์ ์ด์ฉํด ์ถ๊ฐ
x = np.linspace(0, 10, 100)
plt.subplot(2,1,1)
plt.plot(x, np.sin(x),'orange','o')
plt.subplot(2,1,2)
plt.plot(x, np.cos(x), 'orange')
plt.show()
x = np.linspace(0, 10, 100)
plt.plot(x, x + 0, linestyle='solid')
plt.plot(x, x + 1, linestyle='dashed')
plt.plot(x, x + 2, linestyle='dashdot')
plt.plot(x, x + 3, linestyle='dotted')
plt.plot(x, x + 0, '-g') # solid green
plt.plot(x, x + 1, '--c') # dashed cyan
plt.plot(x, x + 2, '-.k') # dashdot black
plt.plot(x, x + 3, ':r'); # dotted red
plt.plot(x, x + 4, linestyle='-') # solid
plt.plot(x, x + 5, linestyle='--') # dashed
plt.plot(x, x + 6, linestyle='-.') # dashdot
plt.plot(x, x + 7, linestyle=':'); # dotted
๋ง๋ ๊ทธ๋ฆฌํ kind -> bar
fig, axes = plt.subplots(2, 1)
data = pd.Series(np.random.rand(5), index=list('abcde'))
data.plot(kind='bar', ax=axes[0], color='blue', alpha=1)
data.plot(kind='barh', ax=axes[1], color='red', alpha=0.3)
<AxesSubplot:>
์ ๊ทธ๋ํ ๊ทธ๋ฆฌ๋ ๋ฒ
df = pd.DataFrame(np.random.rand(6,4), columns=pd.Index(['A','B','C','D']))
df.plot(kind='line')
<AxesSubplot:>
seaborn์ load_dataset() ๋ฉ์๋๋ฅผ ์ด์ฉ
๋ฉ์๋๋ฅผ ์คํํ๋ฉด home directory์ seaborn-data๊ฐ ์๋ ๋ค์ด๋ก๋ํ์ฌ ์ ์ฅ๋จ
import pandas as pd
import seaborn as sns
tips = sns.load_dataset("tips")
Tips ๋ฐ์ดํฐ๋ฅผ ๋ถ๋ฌ์ค์
tips.csv
Pandas์ dataframe๋ฅผ ์ด์ฉํ์ฌ ๋ฐ์ดํฐ ๊ตฌ์ฑ ํ์ธํ๊ธฐ
df = pd.DataFrame(tips)
df.head()
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
df.shape
(244, 7)
df.describe()
total_bill | tip | size | |
---|---|---|---|
count | 244.000000 | 244.000000 | 244.000000 |
mean | 19.785943 | 2.998279 | 2.569672 |
std | 8.902412 | 1.383638 | 0.951100 |
min | 3.070000 | 1.000000 | 1.000000 |
25% | 13.347500 | 2.000000 | 2.000000 |
50% | 17.795000 | 2.900000 | 2.000000 |
75% | 24.127500 | 3.562500 | 3.000000 |
max | 50.810000 | 10.000000 | 6.000000 |
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 total_bill 244 non-null float64
1 tip 244 non-null float64
2 sex 244 non-null category
3 smoker 244 non-null category
4 day 244 non-null category
5 time 244 non-null category
6 size 244 non-null int64
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB
๊ฒฐ์ธก๊ฐ์ด ์์ด ๊ฒฐ์ธก๊ฐ ์ฒ๋ฆฌ ํ์๊ฐ ์๋ค.
๋ฐ์ดํฐ ๋ณ์ ์ค sex,smoker, day, time์ ๋ฒ์ฃผํ
tips, total_bill, size๋ ์์นํ / size ๋ฒ์ฃผํ์ผ๋ก (ํ ์ด๋ธ ์ธ์์ ์๋ฏธ)
print(df['sex'].value_counts())
print("===========================")
print(df['time'].value_counts())
print("===========================")
print(df['smoker'].value_counts())
print("===========================")
print(df['day'].value_counts())
print("===========================")
print(df['size'].value_counts())
print("===========================")
Male 157
Female 87
Name: sex, dtype: int64
===========================
Dinner 176
Lunch 68
Name: time, dtype: int64
===========================
No 151
Yes 93
Name: smoker, dtype: int64
===========================
Sat 87
Sun 76
Thur 62
Fri 19
Name: day, dtype: int64
===========================
2 156
3 38
4 37
5 5
1 4
6 4
Name: size, dtype: int64
===========================
matplotlib์ ๋ฐ์ดํฐ๋ฅผ ์ธ์๋ก ๋ฃ๊ธฐ ์ํด์
pandas ๋ฐ์ดํฐ ๋ฐ๋ก ์ด์ฉ ๋ถ๊ฐ
๋ฐ์ดํฐ๋ฅผ x series ๋๋ list,
y์ list ํํ๋ก ๊ฐ๊ฐ ๋๋ ์ค
#df์ ์ฒซ 5ํ์ ํ์ธํด๋ด
์๋ค.
df.head()
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
# tip ์ปฌ๋ผ์ ์ฑ๋ณ์ ๋ํ ํ๊ท ์ผ๋ก ๋ํ๋ด ๋ณด๊ฒ ์ต๋๋ค.
# pandas์ groupby ๋ฉ์๋๋ฅผ ํ์ฉํฉ๋๋ค.
grouped = df['tip'].groupby(df['sex']) # df['tip'] ์ปฌ๋ผ์ groupby() ํ๋ค -> ํ์ ์ฑ๋ณ์ ๋ฐ๋ผ ๊ทธ๋ฃนํํ๋ค.
# -> ๊ฐ ์ฑ๋ณ ๊ทธ๋ฃน์ ๋ํ ์ ๋ณด(์ดํฉ, ํ๊ท , ๋ฐ์ดํฐ ๋ ๋ฑ)๊ฐ grouped ๊ฐ์ฒด์ ์ ์ฅ
# ํ๊ท ๊ณผ ๋ฐ์ดํฐ๋ ํ์ธ
grouped.mean() # ์ฑ๋ณ์ ๋ฐ๋ฅธ ํ์ ํ๊ท .
sex
Male 3.089618
Female 2.833448
Name: tip, dtype: float64
grouped.size() # ์ฑ๋ณ์ ๋ฐ๋ฅธ ๋ฐ์ดํฐ ๋(ํ ํ์)
sex
Male 157
Female 87
Name: tip, dtype: int64
# ์ฑ๋ณ์ ๋ฐ๋ฅธ ํ ์ก์์ ํ๊ท ์ ๋ง๋๊ทธ๋ํ๋ก ๊ทธ๋ฆฌ๋ฉด
import numpy as np
sex = dict(grouped.mean()) #ํ๊ท ๋ฐ์ดํฐ๋ฅผ ๋์
๋๋ฆฌ ํํ๋ก ๋ฐ๊ฟ์ค๋๋ค.
sex
{'Male': 3.0896178343949043, 'Female': 2.833448275862069}
x = list(sex.keys()) # x์ถ ๋ฆฌ์คํธ ํํ๋ก
x
['Male', 'Female']
y = list(sex.values()) # y์ถ ๋ฆฌ์คํธ ํํ๋ก
y
[3.0896178343949043, 2.833448275862069]
import matplotlib.pyplot as plt # ๋ง๋ ๊ทธ๋ํ ์ธํฌํธํด์
plt.bar(x = x, height = y) # x์ถ x, ๋์ด๋ y๋ก
plt.ylabel('tip[$]') # y์ถ ๋ผ๋ฒจ์ ํ์ผ๋ก
plt.title('Tip by Sex') # x์ถ ๋ผ๋ฒจ์ ์ฑ๋ณ๋กด ใ
ใ
ใ
ใ
ใ
ใ
ใ
ใ
ใ
ใ
ใ
ใ
ใ
4ใ
ในในในในในใ
Text(0.5, 1.0, 'Tip by Sex')
sns.barplot์ ์ธ์๋ก df๋ฅผ ๋ฃ๊ณ ์ํ๋ ์ปฌ๋ผ์ ์ง์ .
sns.barplot(data=df, x='sex', y='tip')
<AxesSubplot:xlabel='sex', ylabel='tip'>
# Matplot๊ณผ ํจ๊ป ์ฌ์ฉ figsize, title ๋ฑ ๊ทธ๋ํ์ ๋ค์ํ ์ต์
plt.figure(figsize=(10,6)) # ๋ํ์ง ์ฌ์ด์ฆ๋ฅผ ์ ํฉ๋๋ค.
sns.barplot(data=df, x='sex', y='tip')
plt.ylim(0, 4) # y๊ฐ์ ๋ฒ์๋ฅผ ์ ํฉ๋๋ค..
plt.title('Tip by sex') # ๊ทธ๋ํ ์ ๋ชฉ์ ์ ํฉ๋๋ค.
Text(0.5, 1.0, 'Tip by sex')
# ์์ผ์ ๋ฐ๋ฅธ tips
plt.figure(figsize=(10,6))
sns.barplot(data=df, x='day', y='tip')
plt.ylim(0, 4)
plt.title('Tip by day')
Text(0.5, 1.0, 'Tip by day')
# Subplot์ ํ์ฉ, ๋ฒ์ฃผํ ๊ทธ๋ํ๋ฅผ ๋ํ๋ด๊ธฐ์ ์ข์ ๊ฒ : violin plot ์ฌ์ฉ๊ฐ๋ฅ
# palette ์ต์
ฅ - ์์ ์ฌ์ฉ.
fig = plt.figure(figsize=(10,7))
ax1 = fig.add_subplot(2,2,1)
sns.barplot(data=df, x='day', y='tip',palette="ch:.25")
ax2 = fig.add_subplot(2,2,2)
sns.barplot(data=df, x='sex', y='tip')
ax3 = fig.add_subplot(2,2,4)
sns.violinplot(data=df, x='sex', y='tip')
ax4 = fig.add_subplot(2,2,3)
sns.violinplot(data=df, x='day', y='tip',palette="ch:.25")
<AxesSubplot:xlabel='day', ylabel='tip'>
# catplot์ ์ฌ์ฉ
sns.catplot(x="day", y="tip", jitter=False, data=tips)
<seaborn.axisgrid.FacetGrid at 0x7fa90ef12190>
์ฐ์ ๋, ์ ๊ทธ๋ํ ์ฌ์ฉ์ ์ฌ์ฉํ๋ ๊ฒ์ด ์ข๋ค.
์ ์ฒด ๊ฐ๊ฒฉ total_bill์ ๋ฐ๋ฅธ tip ๋ฐ์ดํฐ๋ฅผ ์๊ฐํ
# hue์ธ์์ 'day'๋ฅผ ์ฃผ์ด ์์ผ(day)์ ๋ฐ๋ฅธ tip๊ณผ total_bill์ ๊ด๊ณ๋ฅผ ์๊ฐํ
sns.scatterplot(data=df , x='total_bill', y='tip', palette="ch:r=-.2,d=.3_r")
<AxesSubplot:xlabel='total_bill', ylabel='tip'>
sns.scatterplot(data=df , x='total_bill', y='tip', hue='day')
<AxesSubplot:xlabel='total_bill', ylabel='tip'>
#np.random.randn ํจ์๋ ํ์ค ์ ๊ท๋ถํฌ์์ ๋์๋ฅผ ์์ฑํ๋ ํจ์์
๋๋ค.
#cumsum()์ ๋์ ํฉ์ ๊ตฌํ๋ ํจ์์
๋๋ค.
plt.plot(np.random.randn(50).cumsum())
[<matplotlib.lines.Line2D at 0x7fa90c127130>]
x = np.linspace(0, 10, 100)
plt.plot(x, np.sin(x), 'o')
plt.plot(x, np.cos(x))
plt.show()
# Seaborn์ ํ์ฉ.
sns.lineplot(x=x, y=np.sin(x))
sns.lineplot(x=x, y=np.cos(x))
<AxesSubplot:>
ํ์คํ ๊ทธ๋จ ๊ฐ๋
โ๊ฐ๋ก์ถ
๊ณ๊ธ: ๋ณ์์ ๊ตฌ๊ฐ, bin (or bucket)
โ์ธ๋ก์ถ
๋์: ๋น๋์, frequency
์ ์ฒด ์ด๋: n
# x1์ ํ๊ท ์ 100์ด๊ณ ํ์คํธ์ฐจ๋ 15์ธ ์ ๊ท๋ถํฌ๋ฅผ ๋ฐ๋ฆ
๋๋ค.
# x2๋ ํ๊ท ์ 130์ด๊ณ ํ์คํธ์ฐจ๋ 15์ธ ์ ๊ท๋ถํฌ๋ฅผ ๋ฐ๋ฆ
๋๋ค.
# ๋์๋ฅผ 50๊ฐ์ ๊ตฌ๊ฐ์ผ๋ก ํ์ํ๋ฉฐ, ํ๋ฅ ๋ฐ๋๊ฐ ์๋ ๋น๋๋ก ํ๊ธฐํฉ๋๋ค.
#๊ทธ๋ํ ๋ฐ์ดํฐ
mu1, mu2, sigma = 100, 130, 15
x1 = mu1 + sigma*np.random.randn(10000)
x2 = mu2 + sigma*np.random.randn(10000)
# ์ถ ๊ทธ๋ฆฌ๊ธฐ
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
# ๊ทธ๋ํ ๊ทธ๋ฆฌ๊ธฐ
patches = ax1.hist(x1, bins=50, density=False) #bins๋ x๊ฐ์ ์ด 50๊ฐ ๊ตฌ๊ฐ์ผ๋ก ๋๋๋ค๋ ๋ป์
๋๋ค.
patches = ax1.hist(x2, bins=50, density=False, alpha=0.5)
ax1.xaxis.set_ticks_position('bottom') # x์ถ์ ๋๊ธ์ ์๋ ํ์
ax1.yaxis.set_ticks_position('left') #y์ถ์ ๋๊ธ์ ์ผ์ชฝ์ ํ์
# ๋ผ๋ฒจ, ํ์ดํ ๋ฌ๊ธฐ
plt.xlabel('Bins')
plt.ylabel('Number of Values in Bin')
ax1.set_title('Two Frequency Distributions')
# ๋ณด์ฌ์ฃผ๊ธฐ
plt.show()
tips ๋ฐ์ดํฐ ํ์ธ
# tips์ total_bill๊ณผ tips์ ๋ํ ํ์คํ ๊ทธ๋จ ํ์ธ
sns.histplot(df['total_bill'], label = "total_bill")
sns.histplot(df['tip'], label = "tip").legend()# legend()๋ฅผ ์ด์ฉํ์ฌ label์ ํ์ํด ์ค๋๋ค.
<matplotlib.legend.Legend at 0x7fa90be91700>
# ๊ฒฐ์ ๊ธ์ก ๋๋น ํ์ ๋น์จ
df['tip_pct'] = df['tip'] / df['total_bill']
df['tip_pct'].hist(bins=50)
<AxesSubplot:>
# kind='kde'๋ก ํ๋ฅ ๋ฐ๋ ๊ทธ๋ํ
df['tip_pct'].plot(kind='kde')
<AxesSubplot:ylabel='Density'>
csv_path = os.getenv("HOME") + "/aiffel/data_visualization/data/flights.csv"
data = pd.read_csv(csv_path)
flights = pd.DataFrame(data)
flights
year | month | passengers | |
---|---|---|---|
0 | 1949 | January | 112 |
1 | 1949 | February | 118 |
2 | 1949 | March | 132 |
3 | 1949 | April | 129 |
4 | 1949 | May | 121 |
... | ... | ... | ... |
139 | 1960 | August | 606 |
140 | 1960 | September | 508 |
141 | 1960 | October | 461 |
142 | 1960 | November | 390 |
143 | 1960 | December | 432 |
144 rows ร 3 columns
sns.barplot(data=flights, x='year', y='passengers')
<AxesSubplot:xlabel='year', ylabel='passengers'>
sns.pointplot(data=flights, x='year', y='passengers')
<AxesSubplot:xlabel='year', ylabel='passengers'>
sns.lineplot(data=flights, x='year', y='passengers')
<AxesSubplot:xlabel='year', ylabel='passengers'>
# ๋ฌ๋ณ๋ก ๋๋์ด ๋ณด๊ธฐ ์ํด hue ์ธ์์ 'month'๋ฅผ ํ ๋น
sns.lineplot(data=flights, x='year', y='passengers', hue='month', palette='ch:.50')
plt.legend(bbox_to_anchor=(1.03, 1), loc=2) #legend ๊ทธ๋ํ ๋ฐ์ ์ถ๊ฐํ๊ธฐ
<matplotlib.legend.Legend at 0x7fa90bcc53d0>
ํ์คํ ๊ทธ๋จ
sns.histplot(flights['passengers'])
<AxesSubplot:xlabel='passengers', ylabel='Count'>
# flights(DataFrame)์ ํ์น๊ฐ ์๋ฅผ year๊ณผ month๋ก pivot
pivot = flights.pivot(index='year', columns='month', values='passengers')
pivot
month | April | August | December | February | January | July | June | March | May | November | October | September |
---|---|---|---|---|---|---|---|---|---|---|---|---|
year | ||||||||||||
1949 | 129 | 148 | 118 | 118 | 112 | 148 | 135 | 132 | 121 | 104 | 119 | 136 |
1950 | 135 | 170 | 140 | 126 | 115 | 170 | 149 | 141 | 125 | 114 | 133 | 158 |
1951 | 163 | 199 | 166 | 150 | 145 | 199 | 178 | 178 | 172 | 146 | 162 | 184 |
1952 | 181 | 242 | 194 | 180 | 171 | 230 | 218 | 193 | 183 | 172 | 191 | 209 |
1953 | 235 | 272 | 201 | 196 | 196 | 264 | 243 | 236 | 229 | 180 | 211 | 237 |
1954 | 227 | 293 | 229 | 188 | 204 | 302 | 264 | 235 | 234 | 203 | 229 | 259 |
1955 | 269 | 347 | 278 | 233 | 242 | 364 | 315 | 267 | 270 | 237 | 274 | 312 |
1956 | 313 | 405 | 306 | 277 | 284 | 413 | 374 | 317 | 318 | 271 | 306 | 355 |
1957 | 348 | 467 | 336 | 301 | 315 | 465 | 422 | 356 | 355 | 305 | 347 | 404 |
1958 | 348 | 505 | 337 | 318 | 340 | 491 | 435 | 362 | 363 | 310 | 359 | 404 |
1959 | 396 | 559 | 405 | 342 | 360 | 548 | 472 | 406 | 420 | 362 | 407 | 463 |
1960 | 461 | 606 | 432 | 391 | 417 | 622 | 535 | 419 | 472 | 390 | 461 | 508 |
sns.heatmap(pivot)
<AxesSubplot:xlabel='month', ylabel='year'>
# ์ฌ๊ธฐ์ ์ต์
์ถ๊ฐ
sns.heatmap(pivot, linewidths=.2, annot=True, fmt="d")
<AxesSubplot:xlabel='month', ylabel='year'>
sns.heatmap(pivot, cmap="YlGnBu")
<AxesSubplot:xlabel='month', ylabel='year'>