출처: https://data101.oopy.io/plolty-tutorial-guide-in-korean
fig = go.Figure() : go를 통해 그래프를 하나하나 설정하며 제작
fig = px.scatter() : px를 통해 템플릿으로 그래프를 빠르게 제작
pip install plotly # 설치
pip install plotly --upgrade # 업데이트
- Bar graph
- Scatter Plot
a. Dot Plot
b. Bubble Chart- Line chart
- Pie graph
a. Treemap
b. Sunburst Chart- Statistical Charts
a. Box Plot
b. Strip Plot
c. Violin Plot
d. Histogram
: plotly로 시각화한 차트를 chart studio에 upload
pip install chart_studio로 설치!
import chart_studio
username = '' # 자신의 username (plotly account를 만들어야 함)
api_key = '' # 자신의 api key (settings > regenerate key)
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)
chart_studio.plotly.plot(fig, filename = '파일이름', auto_open=True) # fig: 작성한 차트를 저장한 변수
## 위 코드를 실행하면 새로운 window로 해당 차트의 링크가 열리고, notebook에도 link를 아래에 return해줌
import plotly.express as px
bills_df = px.data.tips() # plotly express에서 제공되는 기본 data 사용
bills_df.head()
fig = px.bar(bills_df.groupby(['day', 'sex'])[['total_bill']].mean().reset_index(),
x='day', y='total_bill', color='sex',
title='Average Bills per Day', category_orders={'day':['Thur', 'Fri', 'Sat', 'Sun']},
color_discrete_sequence=px.colors.qualitative.Pastel)
fig.show()
fig = px.bar(bills_df.groupby(['day', 'sex'])[['total_bill']].mean().reset_index(),
x='day', y='total_bill', color='sex', height=400,
title='Average Bills per Day', category_orders={'day':['Thur', 'Fri', 'Sat', 'Sun']},
color_discrete_sequence=px.colors.qualitative.Pastel,
barmode='group') # 양 옆으로 놓이는 구조 # barmode='stack'라고 하면 누적 (dafault setting)
fig.show()
fig = px.bar(bills_df, x='day', y='total_bill', color='sex',
title='Total Bills per Day', category_orders={'day':['Thur', 'Fri', 'Sat', 'Sun']},
color_discrete_sequence=px.colors.qualitative.Pastel)
fig.show()
fig = px.bar(bills_df, x='day', y='total_bill', color='smoker', barmode='group', facet_col='sex',
category_orders={'day':['Thur', 'Fri', 'Sat', 'Sun']},
color_discrete_sequence=px.colors.qualitative.Safe, template='plotly')
fig.show()
iris_df = px.data.iris()
iris_df.head()
fig = px.scatter(iris_df, x='sepal_width', y='sepal_length', color='species',
color_discrete_sequence=px.colors.qualitative.Safe)
fig.show()
fig = px.scatter(bills_df, x='total_bill', y='tip', trendline='ols',
color_discrete_sequence=px.colors.qualitative.Pastel1,
trendline_color_override='gold')
fig.show()
데이터 가져오기
gapminder_df = px.data.gapminder()
gapminder_df.head()
fig = px.scatter(gapminder_df.query("continent=='Americas'"), x='lifeExp', y='country',
color='year',
color_continuous_scale='Burgyl')
fig.show()
fig = px.scatter(gapminder_df.query("year == 2007"),
x='gdpPercap', y='lifeExp', size='pop', color='continent',
hover_name='country')
fig.show()
fig = px.line(gapminder_df.query("continent == 'Oceania'"),
x='year', y='lifeExp', color='country', symbol='country',
color_discrete_sequence=px.colors.qualitative.Pastel1)
fig.show()
# 2007년 gapminder 수치 중, Asia 지역의 국가별 population 비중을 시각화
temp_df = gapminder_df.query("year == 2007").query("continent == 'Asia'")
# population이 가장 많은 top 15 국가를 제외하고는 다 'Other countries'로 처리
temp_df.sort_values(by='pop', ascending=False, inplace=True)
temp_df.iloc[15:, 0] = 'Other countries'
fig = px.pie(temp_df, values='pop', names='country',
color_discrete_sequence=px.colors.qualitative.Antique)
fig.show()
temp_df = gapminder_df.query("year == 2007")
fig = px.treemap(temp_df, path=[px.Constant('world'), 'continent', 'country'],
values='pop', color='lifeExp', color_continuous_scale='RdBu',
color_continuous_midpoint=np.average(temp_df['lifeExp'], weights=temp_df['pop']))
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig.show()
fig = px.sunburst(bills_df, path=['day', 'time', 'sex'], values='tip',
color='time', color_discrete_sequence=px.colors.qualitative.Pastel)
fig.show()
fig = px.box(bills_df, x='sex', y='total_bill', color='smoker',
color_discrete_sequence=px.colors.qualitative.Pastel)
fig.show()
fig = px.box(bills_df, x='sex', y='total_bill', color='smoker', points='all',
color_discrete_sequence=px.colors.qualitative.Pastel)
fig.show()
fig = px.strip(bills_df, x='sex', y='total_bill', color='smoker',
color_discrete_sequence=px.colors.qualitative.Pastel)
fig.show()
fig = px.strip(bills_df, x='total_bill', y='time', color='sex', facet_col='day',
category_orders={'day':['Thur', 'Fri', 'Sat', 'Sun']},
color_discrete_sequence=px.colors.qualitative.Safe, template='plotly')
fig.show()
fig = px.violin(bills_df, x='sex', y='total_bill', color='smoker',
color_discrete_sequence=px.colors.qualitative.Pastel)
fig.show()
fig = px.violin(bills_df, y='total_bill', color='sex', box=True, points='all',
color_discrete_sequence=px.colors.qualitative.Pastel)
fig.show()
fig = px.histogram(bills_df, x='total_bill', nbins=10)
fig.show()
fig = px.histogram(bills_df, x='day', category_orders={'day':['Thur', 'Fri', 'Sat', 'Sun']})
fig.show()
fig = px.histogram(bills_df, x='total_bill', y='tip', color='sex', marginal='box',
color_discrete_sequence=px.colors.qualitative.Pastel)
fig.show()