boot camp day6 and 7

HIROYOSHI·2022년 2월 8일

2022/2/7 & 2022/2/8

matplotlib.pyplot

matplot은 데이터를 시각화하기 위한 도구로서 numpy와 pandas를 응용하여 만든 데이터를 시각화하는곳에 사용한다.

import 하는 방법으로는

import matplotlib as mpl
import matplotlib.pyplot as plt

plt.plot([1,2,3,4])
plt.ylabel('y_label')
plt.xlabel('x_label')
plt.show()

위에코드는 y=x를 나타낸는 코드로
plt.plot()안에있는 리스트에 숫자들이 y값으로 들어오고 [0,1,2,3]이 x값으로 들어가는 형식으로 mapping이 된다. label은 x축과 y축에 보기 좋으라고 이름을 달아놓은 것이다.

y=x^2은

x=np.arange(10)
plt.plot(x**2)

과같이 나타낸다.
numpy를 이용하여 [0,,,9] 배열을 만들고 그 배열에^2되는 값을 y값으로 만들어서 함수그래프를 그리는 것이다.

plt.axis()

plt.axis()함수는 x와 y의 범위를 지정해주는 함수입니다. 괄호안에 [0,100,0,100] 과 같은 식으로 넣을 수 있습니다.

style

plt.plot(x,y1,"g*:")
과같이 뒤에 문자열을 넣어주는 경우가 있습니다. 이때 문자열의 첫 문자는 색을 지정해주는 역할이고 두번째 문자는 중간에 point를 집어주는 역할이었고 마지막은 선을 정하는 열할이었습니다.

figure(), savefig()

위 함수들을 삼각함수 그래프를 그리고 이미지로 저장하는 함수들 입니다.

x=np.linspace(0 , np.pi*2,100)
fig = plt.figure()

plt.plot(x,np.sin(x),'r-',label='sin_curve')
plt.plot(x,np.cos(x),'b:',label='cos_curve')

fig.savefig('sin_cos_fig.png')

위와같이 fig = plt.figure()를 해주고 그래프를 그린 뒤 fig.savefig('sin_cos_fig.png')를 하면 png,jpg 등의 지정한 파일들로 그림이 저장됩니다.

저장할 수 있는 파일들로는
{'eps': 'Encapsulated Postscript',
'jpeg': 'Joint Photographic Experts Group',
'jpg': 'Joint Photographic Experts Group',
'pdf': 'Portable Document Format',
'pgf': 'PGF code for LaTeX',
'png': 'Portable Network Graphics',
'ps': 'Postscript',
'raw': 'Raw RGBA bitmap',
'rgba': 'Raw RGBA bitmap',
'svg': 'Scalable Vector Graphics',
'svgz': 'Scalable Vector Graphics',
'tif': 'Tagged Image File Format',
'tiff': 'Tagged Image File Format'}
이 있습니다.

style-theme

import numpy as np

plt.style.use('Solarize_Light2')
#plt.style.use('dark_background')
#plt.style.use('seaborn-whitegrid')

x=np.linspace(0,np.pi*2,100)
fig=plt.figure()
plt.title('Sin Cos 그래프')

plt.plot(x,np.sin(x),'r-',label='sin 그래프')
plt.plot(x,np.cos(x),'b:',label='cos 그래프')

plt.legend(loc='upper right')
plt.xlabel("x value")
plt.ylabel('y value')

plt.style.use() 괄호안에 아래에 있는 명령어들을 입력하면 theme를 변경할 수 있습니다.

['Solarize_Light2',
'_classic_test_patch',
'bmh',
'classic',
'dark_background',
'fast',
'fivethirtyeight',
'ggplot',
'grayscale',
'seaborn',
'seaborn-bright',
'seaborn-colorblind',
'seaborn-dark',
'seaborn-dark-palette',
'seaborn-darkgrid',
'seaborn-deep',
'seaborn-muted',
'seaborn-notebook',
'seaborn-paper',
'seaborn-pastel',
'seaborn-poster',
'seaborn-talk',
'seaborn-ticks',
'seaborn-white',
'seaborn-whitegrid',
'tableau-colorblind10']

subplot

#subplot
plt.style.use("default")
fig,ax=plt.subplots(2,2)	#2행 2열을 그림을 그리는 공간
x=np.random.randn(100)	#정규 분포를 가지는 데이터
y=np.random.randn(100)
ax[0,0].scatter(x,y)	#산점도 그림
x=np.arange(10)
y=np.random.uniform(1,10,10)	#균일한 분포생성
ax[0,1].bar(x,y)	 #막대차트
x=np.linspace(0,10,100)
y=np.cos(x)
ax[1,0].plot(x,y)
z=np.random.uniform(0,1,(5,5))
ax[1,1].imshow(z) 	#분포 2d 이미지로 그림

subplot은 여러개의 그래프를 한번에 그리는 함수입니다.

#도전문제 4.1
fig=plt.figure()
spec=fig.add_gridspec(3,3)

ax1 = fig.add_subplot(spec[0, :])
ax2 = fig.add_subplot(spec[1, :2])
ax3 = fig.add_subplot(spec[1:3,2])
ax4 = fig.add_subplot(spec[2, 0])
ax5 = fig.add_subplot(spec[2, 1])

x=np.random.randn(100)
y=np.random.randn(100)
ax1.scatter(2*x,y)

x=np.arange(0,4)
ax2.plot(x,x)

z=np.random.uniform(0,1,(5,5))
ax3.imshow(z)


x=np.linspace(0,10,100)
y=np.cos(x)
ax4.plot(x,y)


x=np.arange(10)
y=np.random.uniform(1,10,10)
ax5.bar(x,y)

이 코드는 위에있는 다르게 figure에 그리드 add_space(n,n) 와같이
axn = fig.add_subplot(spec[0, :]) axn이 어느범위를 할당할지를 정해준 뒤
axn.(그래프)를 해주면 그 할당된 범위 만큼의 공간에 그래프를 그립니다.

read.csv() and draw graph

import csv

class human:   		#class 생성
    def __init__(self, name, age, weight):
        self.Name = name
        self.Age = age
        self.Weight = weight

    def eat(self, meal):
        self.Weight += meal*0.01       

    def gym(self,kaloli):
        self.Weight -= kaloli* 0.002

    def write(self):        
        print(f"{self.Name} is {self.Age} years old and weights {self.Weight}Kg")

#메인
minsu = human("Min-su",16,55.0)
weight = {}

f = open("weight.csv")
data = csv.reader(f)
for row in data:    
    if row[2] =="eat":
        minsu.eat(int(row[3]))
        weight[row[0]] = minsu.Weight        
    if row[2] =="gym":
        minsu.gym(int(row[3]))
        weight[row[0]] = minsu.Weight      
    
plt.title("Weight Progress state")
plt.xticks(rotation=45)
plt.ylabel("Weight",fontsize=12)
plt.scatter(weight.keys(),weight.values())
plt.plot(list(weight.keys()),list(weight.values()))
#plt.show()

class를 생성하고 메소드들을 정리를 한 뒤 클래스 인스턴스를 생성하고 데이터를 읽어들여 for문을 만들어서 minsu의 weight을 딕셔너리로 정리를 하고 plt.scatter을 이용하여 점을 선으로 이은 모양의 그래프를 그립니다.

정규분포

정규분포가 돼있는 data를 만들어서 histogram으로 그릴 수 있습니다.

f1=np.random.normal(loc=0,scale=1,size=10000) #평균 0 분산이 1인 생성
f2=np.random.normal(loc=3,scale=.5,size=10000) #평균 3 분산이 0.5인 생성
plt.hist(f1,bins=200,color='red',alpha=.7,label='loc=0,scale=1') #그래프 생성
plt.hist(f2,bins=200,color='blue',alpha=.5,label='loc=3,scale=0.5')

그래프를 그릴 수 있습니다.

random scatter

xData = np.random.randn(10000)  # 정규분포를 가지는 난수생성
yData = np.random.randn(10000)

plt.scatter(xData, yData, alpha=0.01) #산점도 그래프 생성

이렇듯 정규분포를 만들어서 산점도를 그릴 수 있습니다.

seaborn

seaborn은 matplotlib 기반의 시각화 라이브러리입니다. 유익한 통계 그래프를 그리기 위해 인터페이스를 제공합니다.
import와 데이터가져오는 방법의 예시로는

import seaborn as sns
sns.set_theme(style="darkgrid")
tips = sns.load_dataset("tips")

tips를 불러와서 tips에 저장합니다. tips의 data형태는 DataFrame로 받아습니다.

sns.pairplot(tips)
sns.pairplot(tips, kind='hist')
sns.pairplot(tips, kind='kde')

1번째는 산점도를 그리고 2번째는 히스토그램을 3번째는 커널밀도추정 을 해주는데
data에 있는 내용들의 상관관계를 한눈에 보기 쉽게 해주는 역할을 합니다.

다음으로는 타이타닉의 데이터를 가져와서 시각화 해볼 것 입니다.
데이터를 가져오는 것으 아까와 동일하게

titanic = sns.load_dataset("titanic")

와같이 데이터를 가져오고

sns.countplot(x="class", hue="who", data=titanic)

x축에는 타이타닉호의 급을넣고 hue='who'는 색별로 who를 나눠서 그래프를 그립니다.

내가 좋아하는 맛집 찾아보기

import folium
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

foodplace_df = pd.read_csv('foodplace_df.csv', encoding='utf-8')

x=[] #위도
y=[] #경도
name=[] #상호명
popup=[] #링크

for i in range(len(foodplace_df['위도'])):
    x.append(foodplace_df['위도'][i])
    y.append(foodplace_df['경도'][i])
    name.append(foodplace_df['상호명'][i])
    popup.append(foodplace_df['링크'][i])

foodplace_map = folium.Map(location=[x[0],y[0]],zoom_start=14)

for i in range(len(foodplace_df['위도'])):
    folium.Marker(location=[x[i],y[i]],tooltip=name[i],popup='<iframe width="1000" height="315" src={}><//iframe>'.format(popup[i])).add_to(foodplace_map)
foodplace_map.save('knu_foodplace.html')
foodplace_map