import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
plt.plot([1,2,3,4]) # x 값 자동 완성.
plt.ylabel('y-label')
plt.show()
import matplotlib.pyplot as plt
plt.plot([1,2,3,4], [1,4,9,16]) # x, y 지정
plt.show()
import matplotlib.pyplot as plt
plt.plot([1,2,3,4], [1,4,9,16])
plt.xlabel('X-label')
plt.ylabel('Y-label')
plt.show()
import matplotlib.pyplot as plt
plt.plot([1,2,3,4], [1,4,9,16]) # x, y 지정.
plt.xlabel('X-label')
plt.ylabel('Y-label')
plt.axis([0, 5, 0, 20])
plt.show()
import matplotlib.pyplot as plt
plt.plot([1,2,3,4], [1,4,9,16], 'bo') # x, y 지정.
plt.xlabel('x-label')
plt.ylabel('y-label')
plt.axis([0, 5, 0, 20])
plt.show()
import matplotlib.pyplot as plt
plt.plot([1,2,3,4], [1,4,9,16], 'r')
plt.plot([1,2,3,4], [1,3,5,7], 'violet')
plt.xlabel('X-label')
plt.ylabel('Y-label')
plt.axis([0, 5, 0, 20])
plt.show()
x, y 값 인자에 대해 선의 색상과 형태를 지정하는 포맷 문자열을 세번째 인자에 입력.
import matplotlib.pyplot as plt
plt.plot([1,2,3,4], [1,4,9,16], 'ro')
plt.axis([0,6,0,20])
plt.show()
matplotlib에서는 일반적으로 Numpy 어레이를 이용.
Numpy 어레이를 사용하지 않더라도 모든 시퀀스는 내부적으로 Numpy 어레이로 변환.
import matplotlib,pyplot as plt
import numpy as np
t = np.arange(0., 5., 0.2)
plt.plot(t, t, 'r--', t, t**2, 'bs', t, t**3, 'g^')
plt.show()
그래프의 특정 영역을 색상으로 채워서 강조.
- fill_between()
- fill_betweenx()
- fill()
import matplotlib.pyplot as plt
x = [1,2,3,4]
y = [1,4,9,16]
plt.plot(x, y)
plt.xlabel('X-label')
plt.ylabel('Y-label')
plt.fill_between(x[1:3], y[1:3], alpha = 0.5)
plt.show()
그래프의 특정 영역을 색상으로 채워서 강조.
- fill_between()
- fill_betweenx()
- fill()
import matplotlib.pyplot as plt
x = [1,2,3,4]
y = [1,4,9,16]
plt.plot(x, y)
plt.xlabel('X-label')
plt.ylabel('Y-label')
plt.fill_betweenx(y[2:4], x[2:4], alpha = 0.5)
plt.show()
import matplotlib .pyplot as plt
x = [1,2,3,4]
y1 = [1,4,9,16]
y2 = [1,2,4,8]
plt.plot(x, y1)
plt.plot(x, y2)
plt.xlabel('X-label')
plt.ylabel('Y-label')
plt.fill_between(x[1:3], y1[1:3], y2[1:3], color = "lightgray", alpha = 0.5)
plt.show()
matplotlib 그래프 영역 채우기 - 임의의 영역 채우기.import matplotlib.pyplot as plt
x = [1,2,3,4]
y1 = [1,4,9,16]
y2 = [1,2,4,8]
plt.plot(x, y1)
plt.plot(x, y2)
plt.xlabel('X-label')
plt.ylabel('Y-label')
plt.fill([1.9, 1.9, 3.1, 3.1], [2, 5, 11, 8],
color = 'lightgray', alpha = 0.5)
plt.show()
import matplotlib.pyplot as plt
import numpy as np
a = np.arange(0, 2, 0.2)
plt.plot(a, a, 'bo')
plt.plot(a, a**2, color = 'red' marker = '*', linewidth = 2)
plt.plot(a, a**3, color = 'springgreen' marker = '^', markersize = 9)
plt.xticks([0, 1, 2])
plt.yticks(np.arange(1, 6))
plt.show()
import matplotlib.pyplot as plt
import numpy as np
a = np.arnage(0, 2, 0.2)
plt.plot(a, a, 'bo')
plt.plot(a, a**2, color = 'red', marker = '*', linewidth = 2)
plt.plot(a, a**3, color = 'springgreen' marker = '^', markersize = 9)
plt.xticks([0, 1, 2])
plt.yticks(np,arange(1, 6))
plt.title('Title test')
plt.show()
import matplotlib.pyplot as plt
import numpy as np
x = np.arange(3)
years = ['2017', '2018', '2019']
values = [100, 400, 900]
plt.bar(x.values)
plt.xticks(x, years)
plt.show()
plt.bar(x, values, width = 0.6,
align = 'edge', color = 'springgreen',
edgecolor = 'gray', linewidth = 3, tick_label = years, log=True)
width는 막대 너비. 디폴트 값 0.8
align는 틱과 막대의위치 조절. 디폴트 값은 ‘center’, ‘edge’로 설정하면 막대의 왼쪽 끝에 x_tick 표시.
color는 막대의 색 지정.
edgecolor는 막대의 테두리 색 지정.
linewidth는 테두리의 두께 지정.
tick_label을 어레이 형태로 지정하면, 틱에 어레이의 문자열을 순서대로 나타낼 수 있음.
log = True로 설정하면, y축이 로그 스케일로 표시.
import matplotlib as plt
import numpy as np
y = np.arnage(3)
years = ['2017', '2018', '2019']
values = [100, 400, 900]
plt.barh(y, values, height = 0.6, align = 'edge', color = 'springgreen',
edgecolor = 'gray', linewidth = 3, ticl_label = years, log = False)
plt.show()
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(19680801)
N = 50
x = np.random.rand(N)
y = np.rnadom.rand(N)
colors = np.random.rand(N))**2
area = (30 * np.random.rand(N)) ** 2
plt.scatter(x, y, s = area, c = colors, alpha = 0.5)
plt.show()
import matplotlib.pyplot as plt
weight = [68, 81, 64, 56, 78, 74, 61, 77, 66, 68, 59, 71, 80, 59, 67, 81, 69, 73, 69, 74, 70, 65]
plt.hist(weight)
plt.show()
import matplotlib.pyplot as plt
import numpy as np
a = 2.0 * np.random.rnadn(10000) + 1.0
b = np. rnadom.standard_normal(10000)
c = 20.0 * np.random.rand(5000) = 10.0
plt.hist(a, bins = 100, density = True, alpha = 0.7, histtype = 'step')
plt.hist(b, bins = 50, density = True, alpha = 0.5, histtype = 'stepfilled')
plt.hist(c, bins = 100, density = True, alpha = 0.9, histtype = 'step')
plt.show()
import matplotlib.pyplot as plt
x = [1,2,3,4]
y = [1,4,9,16]
yerr = [2.3, 3.1, 1.7, 2.5]
plt.errorbar(x, y, yerr = yerr)
plt.show()
import matplotlib.pyplot as plt
x = [1,2,3,4]
y = [1,4,9,16]
yerr = [(2.3, 3.1, 1.7, 2.5), (1.1, 2.5, 0.9, 3.9)]
plt.errorbar(x, y, yerr= yerr)
plt.show()
import matplotlib.pyplot as plt
ratio = [34, 32, 16, 18]
labels = ['Apple', 'Banana', 'Melon', 'Grapes']
plt.pie(ratio, labels = labels, autopct = '%1.f%%')
plt.show()
plt.pie(ratio, labels = labels, autopct = '%.1f%%', startangle = 260,
counterclock = False)
import matpotlib.pyplot as plt
ratio = [34, 32, 16, 18]
labels = ['Apple', 'Banana', 'Melon', 'Grapes']
explode = [0.05, 0.05, 0.05, 0.05]
colors = ['silver', 'gold', 'whitesmoke', 'lightgray']
plt.pie(ratio, labels = labels, autopct = '%.1f%%',
startangle = 260, counterclock = False, explode = explode,
shadow = True, colors = colors)
plt.show()
from bs4 import BeautifulSoup
with open("../Users/chohyunjun/Desktop/data/books (1).xml", "r", encoding = "utf8")as books_file:
books_xml = books_file.read()
soup = BeautifulSoup(books_xml, "lxml")
#aouthor가 들어간 모든 요소의 값 추출.
for book_info in soup.find_all("author"):
print(book_info)
print(book_info.get_text())
)
import urllib.request
from bs4 import BeautifulSoup
with open("../Users/chohyunjun/Desktop/data/US08621662-20140107 (1).xml", "r", encoding = "UTF8") as patent_xml:
xml = patent_xml.read()
soup = BeautifulSoup(xml, "lxml")
invention_title_tag = soup.find("invention-title")
print(invention_title_tag.get_text())
import urllib.request
from bs4 import BeautifulSoup
with open("../Users/chohyunjun/Desktop/data/US08621662-20140107 (1).xml", "r", encoding = 'utf8')as patent_xml:
xml = patent_xml.read()
soup = BeautifulSoup(xml, "lxml")
invention_title_tag = soup.find("invention-title")
print(invention_title_tag.get_text())
publication_reference_tag = soup.find("publication-reference")
p_document_id_tag = publication_reference_tag.find("document-id")
p_country = p_document_id_tag.find("country").get_text()
p_doc_number = p_document_id_tag.find("doc-number").get_text()
p_kind = p_document_id_tag.find("kind").get_text()
p_date = p_document_id_tag.find("date").get_text()
Sample data(json_example.json)
import json
with open("json_example.json","r",encoding = "utf8") as f:
contents = f.read()
json_data = json.loads(contents)
print(json_data["employees"])
1행에서 JSON 모듈을 호출.
3행에서 open() 함수를 사용하여 파일 내용을 가져옴.
4행에서 문자열형으로 변환하여 처리.
5행에서는 loads() 함수를 사용하여 해당 문자열을 딕셔너리형처럼 변환.
6행에서 테스트로 json_data[”employees”]를 출력.
import requests
url = '웹사이트 json 파일'
data =requests.get(url).json()
<예제>
import requests
url = '.....'
data = requests.get(url).json()
print(data)
딕셔너리형으로 구성된 데이터를 json형태의 파일로 변환하는 과정에 대해 알아보자.
import json
dict_data = {'Name' : 'Zara', 'Age':7, 'Class':'First'}
with open("data.json","w") as f:
json.dump(dict_data, f)
→ Json을 파일 작성.
3행처럼 데이터를 저장한 딕셔너리형 생성.
6행에서 Json.dump() 함수를 사용하여 데이터 저장. 이때 인수는 딕셔너리형 자료와 파일 객체.
실행 결과, 작업 폴더에 ‘data.json’ 파일.