<수업 내용>
matplotlib
막대그래프
import matplotlib.pyplot as plt
fig, ax=plt.subplots()
fruits = ['apple', 'blueberry', 'cherry', 'orange']
counts = [40, 100, 30, 55]
bar_labels = ['red', 'blue', '_red', 'orange']
bar_colors = ['tab:red', 'tab:blue', 'tab:red', 'tab:orange']
ax.bar(fruits, counts, label=bar_labels, color=bar_colors)
ax.set_ylabel('fruit supply')
ax.set_title('Fruit supply by kind and color')
ax.legend(title='Fruit color')
plt.show()
라인차트
import matplotlib.pyplot as plt
import numpy as np
t = np.arange(0.0, 2.0, 0.01)
s = 1 + np.sin(2 * np.pi * t)
fig, ax=plt.subplots()
ax.plot(t,s)
ax.set(xlabel='time (s)', ylabe='voltage (mV)' title='About as simple as it gets, folks')
plt.show()
파이차트
import matplotlib.pyplot as plt
labels = ['Frogs', 'Hogs', 'Dogs', 'Logs']
sizes = [15, 30, 45, 10]
fig, ax = plt.subplots()
ax.pie(sizes, labels=labels)
히트맵
- X : 범주형
- Y : 범주형
- value : 연속형
import numpy as np
import matplotlib
import matplotlib as mpl
import matplotlib.pyplot as plt
vegetables = ["cucumber", "tomato", "lettuce", "asparagus",
"potato", "wheat", "barley"]
farmers = ["Farmer Joe", "Upland Bros.", "Smith Gardening",
"Agrifun", "Organiculture", "BioGoods Ltd.", "Cornylee Corp."]
harvest = np.array([[0.8, 2.4, 2.5, 3.9, 0.0, 4.0, 0.0],
[2.4, 0.0, 4.0, 1.0, 2.7, 0.0, 0.0],
[1.1, 2.4, 0.8, 4.3, 1.9, 4.4, 0.0],
[0.6, 0.0, 0.3, 0.0, 3.1, 0.0, 0.0],
[0.7, 1.7, 0.6, 2.6, 2.2, 6.2, 0.0],
[1.3, 1.2, 0.0, 0.0, 0.0, 3.2, 5.1],
[0.1, 2.0, 0.0, 1.4, 0.0, 1.9, 6.3]])
fig, ax = plt.subplots()
im = ax.imshow(harvest)
ax.set_xticks(np.arange(len(farmers)), labels=farmers)
ax.set_yticks(np.arange(len(vegetables)), labels=vegetables)
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
for i in range(len(vegetables)):
for j in range(len(farmers)):
text = ax.text(j, i, harvest[i, j],
ha="center", va="center", color="w")
ax.set_title("Harvest of local farmers (in tons/year)")
fig.tight_layout()
plt.show()
스케터차트
- X : 연속형
- Y : 연속형
- 원사이즈 : 연속형
import numpy as np
import matplotlib.pyplot as plt
N = 50
x = np.random.rand(N)
y = np.random.rand(N)
colors = np.random.rand(N)
area = (30 * np.random.rand(N))**2
plt.scatter(x, y, s=area, c=colors, alpha=0.5)
plt.show()
히스토그램
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colors
from matplotlib.ticker import PercentFormatter
rng = np.random.default_rng(19680801)
N_points = 100000
n_bins = 20
dist1 = rng.standard_normal(N_points)
fig, axs = plt.subplots()
axs.hist(dist1, bins=n_bins)
박스플롯, 바이올린플롯
import matplotlib.pyplot as plt
import numpy as np
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(9, 4))
all_data = [np.random.normal(0, std, 100) for std in range(6, 10)]
axs[0].violinplot(all_data,
showmeans=False,
showmedians=True)
axs[0].set_title('Violin plot')
axs[1].boxplot(all_data)
axs[1].set_title('Box plot')
plt.show()
matplotlib vs seaborn vs plotly
matplotlib
- 가장 먼저 생긴 시각화 라이브러리
- seaborn도 matplotlib 기반으로 만들어짐
- 그래프에 X,Y에 리스트(배열)로 넣어줘야 한다.
seaborn
- matplotlib 기반으로 만들어졌다.
- pandas에서 활용하는 데이터프레임 데이터타입의 데이터를 시각화하는데 유리
import seaborn as sns
df = sns.load_dataset("penguins")
df.head()
g1 = df[['island', 'body_mass_g']].groupby('island').mean()
sns.barplot(data=g1, x=g1.index, y="body_mass_g")
sns.barplot(data=df, x="island", y="body_mass_g")
sns.barplot(data=df, x="island", y="body_mass_g", hue="sex")
plotly
import plotly.express as px
data_canada = px.data.gapminder().query("country == 'Canada'")
data_canada.head()
fig = px.bar(data_canada, x='year', y='pop')
fig.show()
import plotly.express as px
df = px.data.iris()
df.head()
fig = px.scatter(df, x="sepal_width", y="sepal_length", color="species",
size='petal_length', hover_data=['petal_width'])
fig.show()
import plotly.graph_objects as go
fig = go.Figure([go.Bar(x=['giraffes', 'orangutans', 'monkeys'], y=[20, 14, 23])])
fig.show()