[청년취업사관학교 새싹]핀테커스 수업 3주차(9/14 Day-14)

장민정·2023년 9월 15일
0
post-thumbnail

<수업 내용>

matplotlib

막대그래프

  • X : 범주형 데이터
  • Y : 연속형 데이터
import matplotlib.pyplot as plt

fig, ax=plt.subplots()

fruits = ['apple', 'blueberry', 'cherry', 'orange']
counts = [40, 100, 30, 55]
bar_labels = ['red', 'blue', '_red', 'orange'] # _red : 이미 존재하는 red에 포함
bar_colors = ['tab:red', 'tab:blue', 'tab:red', 'tab:orange']

ax.bar(fruits, counts, label=bar_labels, color=bar_colors)
ax.set_ylabel('fruit supply')
ax.set_title('Fruit supply by kind and color')
ax.legend(title='Fruit color')

plt.show()

라인차트

  • X : 연속형(시계열)
  • Y : 연속형
import matplotlib.pyplot as plt
import numpy as np

t = np.arange(0.0, 2.0, 0.01)
s = 1 + np.sin(2 * np.pi * t)

fig, ax=plt.subplots()
ax.plot(t,s)

ax.set(xlabel='time (s)', ylabe='voltage (mV)' title='About as simple as it gets, folks')
plt.show()

파이차트

  • X : 연속형
import matplotlib.pyplot as plt
labels = ['Frogs', 'Hogs', 'Dogs', 'Logs']
sizes = [15, 30, 45, 10]

fig, ax = plt.subplots()
ax.pie(sizes, labels=labels)

히트맵

  • X : 범주형
  • Y : 범주형
  • value : 연속형
import numpy as np
import matplotlib
import matplotlib as mpl
import matplotlib.pyplot as plt

vegetables = ["cucumber", "tomato", "lettuce", "asparagus",
              "potato", "wheat", "barley"]
farmers = ["Farmer Joe", "Upland Bros.", "Smith Gardening",
           "Agrifun", "Organiculture", "BioGoods Ltd.", "Cornylee Corp."]

harvest = np.array([[0.8, 2.4, 2.5, 3.9, 0.0, 4.0, 0.0],
                    [2.4, 0.0, 4.0, 1.0, 2.7, 0.0, 0.0],
                    [1.1, 2.4, 0.8, 4.3, 1.9, 4.4, 0.0],
                    [0.6, 0.0, 0.3, 0.0, 3.1, 0.0, 0.0],
                    [0.7, 1.7, 0.6, 2.6, 2.2, 6.2, 0.0],
                    [1.3, 1.2, 0.0, 0.0, 0.0, 3.2, 5.1],
                    [0.1, 2.0, 0.0, 1.4, 0.0, 1.9, 6.3]])

fig, ax = plt.subplots()
im = ax.imshow(harvest)

# Show all ticks and label them with the respective list entries
ax.set_xticks(np.arange(len(farmers)), labels=farmers)
ax.set_yticks(np.arange(len(vegetables)), labels=vegetables)

# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")

# Loop over data dimensions and create text annotations.
for i in range(len(vegetables)):
    for j in range(len(farmers)):
        text = ax.text(j, i, harvest[i, j],
                       ha="center", va="center", color="w")

ax.set_title("Harvest of local farmers (in tons/year)")
fig.tight_layout() # 재정렬
plt.show()

스케터차트

  • X : 연속형
  • Y : 연속형
  • 원사이즈 : 연속형
import numpy as np
import matplotlib.pyplot as plt

N = 50
x = np.random.rand(N) # 랜덤한 값 50개
y = np.random.rand(N) # 랜덤한 값 50개
colors = np.random.rand(N) # 랜덤한 값 50개
area = (30 * np.random.rand(N))**2  # 0 to 15 point radii

plt.scatter(x, y, s=area, c=colors, alpha=0.5) # s : 원 사이즈 , c : 색상, alpha : 투명도
plt.show()

히스토그램

  • X, Y : 연속형
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colors
from matplotlib.ticker import PercentFormatter

# Create a random number generator with a fixed seed for reproducibility
rng = np.random.default_rng(19680801)

N_points = 100000
n_bins = 20

# Generate two normal distributions
dist1 = rng.standard_normal(N_points) # 정규분포데이터 생성
fig, axs = plt.subplots()

# We can set the number of bins with the *bins* keyword argument.
axs.hist(dist1, bins=n_bins)

박스플롯, 바이올린플롯

  • X, Y : 연속형
import matplotlib.pyplot as plt
import numpy as np

fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(9, 4))

# 100개의 배열을 가지는 데이터 그룹 4개 생성
all_data = [np.random.normal(0, std, 100) for std in range(6, 10)]

# plot violin plot####
axs[0].violinplot(all_data,
                  showmeans=False,
                  showmedians=True)
axs[0].set_title('Violin plot')

# plot box plot
axs[1].boxplot(all_data)
axs[1].set_title('Box plot')

plt.show()

matplotlib vs seaborn vs plotly

matplotlib

  • 가장 먼저 생긴 시각화 라이브러리
  • seaborn도 matplotlib 기반으로 만들어짐
  • 그래프에 X,Y에 리스트(배열)로 넣어줘야 한다.

seaborn

  • matplotlib 기반으로 만들어졌다.
  • pandas에서 활용하는 데이터프레임 데이터타입의 데이터를 시각화하는데 유리
import seaborn as sns
df = sns.load_dataset("penguins")
df.head()

g1 = df[['island', 'body_mass_g']].groupby('island').mean()
sns.barplot(data=g1, x=g1.index, y="body_mass_g")

sns.barplot(data=df, x="island", y="body_mass_g")

sns.barplot(data=df, x="island", y="body_mass_g", hue="sex")

plotly

  • 인터렉티브하다.
  • 배열, 데이터프레임 가능
import plotly.express as px
data_canada = px.data.gapminder().query("country == 'Canada'")
data_canada.head()

fig = px.bar(data_canada, x='year', y='pop')
fig.show()

import plotly.express as px
df = px.data.iris()
df.head()

fig = px.scatter(df, x="sepal_width", y="sepal_length", color="species",
                 size='petal_length', hover_data=['petal_width'])
fig.show()

import plotly.graph_objects as go
fig = go.Figure([go.Bar(x=['giraffes', 'orangutans', 'monkeys'], y=[20, 14, 23])])
fig.show()

0개의 댓글