πŸ–₯️ 크둀링 πŸ–₯️

parkeuΒ·2022λ…„ 8μ›” 26일
1

ABCλΆ€νŠΈμΊ ν”„

λͺ©λ‘ 보기
14/55

😏 CSS

  • HTML λ¬Έμ„œμ—μ„œ μŠ€νƒ€μΌμ— κ΄€λ ¨λœ 뢀뢄을 λ³„λ„λ‘œ 뢄리
  • μ‚¬μš© μœ ν˜•
    1) νƒœκ·Έ λ‚΄λΆ€ μ‚½μž…ν˜• : HTML νƒœκ·Έ 내에 style 속성 이용
    2) λ‚΄λΆ€ μŠ€νƒ€μΌ μ‹œνŠΈ : headνƒœκ·Έ 속 styleνƒœκ·Έ 내뢀에 속성 μž‘μ„±
    3) μ™ΈλΆ€ μŠ€νƒ€μΌ μ‹œνŠΈ : css파일 외뢀에 μ €μž₯
  • 클래슀 μ„ νƒμž : λ™μΌν•œ νƒœκ·Έλ”λΌλ„ λ‹€λ₯Έ μŠ€νƒ€μΌμ„ μ μš©ν•˜κ³  싢을 λ•Œ μ„ νƒμžμ— class속성 μΆ”κ°€
  • id μ„ νƒμž : id 속성이 μžˆλŠ” νŠΉμ •νƒœκ·Έμ˜ λ‚΄μš©μ—λ§Œ ν•΄λ‹Ή μŠ€νƒ€μΌ 지정, ν•œ λ¬Έμ„œμ— ν•œλ²ˆλ§Œ μ‚¬μš©κ°€λŠ₯

πŸ˜΅β€πŸ’« css 기초(class 이용)

<html>
	<head>
		<title>css μ μš©ν•œ λ¬Έμž₯ 포맷</title>
		<style type="text/css">
			P.content1 {font-family:κΆμ„œ; color:red;}
			.content2 {font-family:κ΅΄λ¦Ό; background:yellow;}
		</style>
	</head>

	<body>
		<p class="content1">μ²«λ²ˆμ§Έλ¬Έλ‹¨</p>
		<p class="content2">λ‘λ²ˆμ§Έλ¬Έλ‹¨</p>
		<p>μ„Έλ²ˆμ§Έλ¬Έλ‹¨</p>
	</body>
</html>


πŸ˜΅β€πŸ’« css 기초(id 이용)

<html>
	<head>
		<title>css μ μš©ν•œ λ¬Έμž₯ 포맷</title>
		<style type="text/css">
			P#content1 {font-family:κΆμ„œ; color:red;}
			#content2 {font-family:κ΅΄λ¦Ό; background:yellow;}
		</style>
	</head>

	<body>
		<p id="content1">μ²«λ²ˆμ§Έλ¬Έλ‹¨</p>
		<p id="content2">λ‘λ²ˆμ§Έλ¬Έλ‹¨</p>
		<p>μ„Έλ²ˆμ§Έλ¬Έλ‹¨</p>
	</body>
</html>

πŸ‘€ κ²°κ³ΌλŠ” classλ₯Ό μ΄μš©ν•œ 것과 κ°™λ‹€.


🏍️ νƒ€μŠˆ 크둀링

라이브러리 μž„ν¬νŠΈ

from urllib.request import urlopen
from bs4 import BeautifulSoup
import time
import re

import pandas as pd

크둀링

# 1) λ°μ΄ν„°ν”„λ ˆμž„ 생성
data = pd.DataFrame(columns=['μŠ€ν…Œμ΄μ…˜λͺ…','μœ„μΉ˜','μƒνƒœμ •λ³΄','μœ„λ„','경도'])

# 2) νƒ€μŠˆ μ‚¬μ΄νŠΈ 접속 μ£Όμ†Œ : https://new.tashu.or.kr/stationList.do
url = 'https://new.tashu.or.kr/stationList.do'

# 3) url μ ‘μ†ν•˜μ—¬ HTMLκ°€μ Έμ˜€κΈ°
html = urlopen(url)

# 4) HTML νƒœκ·Έ νŒŒμ‹±(parsing)ν•˜μ—¬ λ³€ν™˜
bsObject = BeautifulSoup(html, 'html.parser', from_encoding='UTF-8')

# 5) νƒ€μŠˆ μ •λ₯˜μž₯ 정보가 μžˆλŠ” table만 κ°€μ Έμ˜€κΈ°
table = bsObject.find_all('table', {'class','board-tp-01 stationtable'})

# 6) 상세정보 μΆ”μΆœν•˜κΈ°
# 6-1) table내에 trλ₯Ό μ°ΎκΈ°
tr = table[0].find_all('tr')

# 6-2) 첫 tr(ν…Œμ΄λΈ”μ˜ μ»¬λŸΌμ •λ³΄)은 μ œμ™Έ
tr = tr[1:len(tr)]

# 6-3) νƒ€μŠˆ μ •λ₯˜μž₯ 상세 정보 μΆ”μΆœ
for index_tr in range(0, len(tr)):
  td = tr[index_tr].find_all('td')

  # μž₯μ†Œ
  # ex) 1.λ¬΄μ—­μ „μ‹œκ΄€μž…κ΅¬(νƒμ‹œμŠΉκ°•μž₯) -> λ¬΄μ—­μ „μ‹œκ΄€μž…κ΅¬(νƒμ‹œμŠΉκ°•μž₯) 
  station = td[0].text.split('.')[1] 

  # μœ„μΉ˜
  location = td[1].text

  # μƒνƒœ
  condition = td[2].text

  # μ’Œν‘œ μΆ”μΆœ
  # μœ„λ„ Latitude
  lat = td[3].button.attrs['data-lat']
  # 경도 Longitude
  lon = td[3].button.attrs['data-ltd']

  # 7) νƒ€μŠˆ μ •λ₯˜μž₯ 상세 정보 Dataframe에 λ‹΄κΈ°(append)
  data = data.append({'μŠ€ν…Œμ΄μ…˜λͺ…': station,
                      'μœ„μΉ˜':location,
                      'μƒνƒœμ •λ³΄':condition,
                      'μœ„λ„':lat,
                      '경도':lon},ignore_index=True)
  print('Complets of ' + station)

print('----------------------------------------------------------------')
print(data)

ν¬λ‘€λ§ν•œ 데이터 μ €μž₯

data.to_csv('TASHU.csv', encoding='utf-8-sig')

πŸ“– 크둀링 데이터λ₯Ό μ΄μš©ν•œ 지도 μ‹œκ°ν™”

# 데이터 μ€€λΉ„ν•˜κΈ°
df = pd.read_csv('/content/TASHU.csv')
df.head()
import folium
# 1) νƒ€μŠˆ μ •λ₯˜μž₯ μœ„μΉ˜ 기반으둜 μ€‘μ‹¬μ’Œν‘œ μ„€μ •
t_map = folium.Map(location=[df['μœ„λ„'].mean(),df['경도'].mean()], zoom_start=14)

# 2) νƒ€μŠˆ μ •λ₯˜μž₯의 μƒνƒœμ— λ”°λΌμ„œ Markerλ₯Ό λ‹€λ₯Έ μƒ‰μœΌλ‘œ ν‘œμ‹œ 
# df.μƒνƒœμ •λ³΄.unique() -> 정상:νŒŒλž€μƒ‰, NETWORK μ—λŸ¬ : 빨간색
condition = df.μƒνƒœμ •λ³΄ # df['μƒνƒœμ •λ³΄']

# 3) νƒ€μŠˆ μ •λ₯˜μž₯ Marker μΆ”κ°€ν•˜κΈ°
for index_draw in range(0, len(condition)):
  if condition[index_draw] == '정상':
    folium.Marker([df.loc[index_draw, 'μœ„λ„'], df.loc[index_draw, '경도']],
                  popup='<pre>'+df.loc[index_draw, 'μŠ€ν…Œμ΄μ…˜λͺ…']+'</pre>', icon=folium.Icon(color='blue', icon='info-sign')).add_to(t_map)
  elif condition[index_draw] == 'NETWORK μ—λŸ¬':
    folium.Marker([df.loc[index_draw, 'μœ„λ„'], df.loc[index_draw, '경도']],
                  popup='<pre>'+df.loc[index_draw, 'μŠ€ν…Œμ΄μ…˜λͺ…']+'</pre>', icon=folium.Icon(color='red', icon='info-sign')).add_to(t_map)
t_map


βž• tiles='Stamen Terrain'μΆ”κ°€

t_map = folium.Map(location=[df['μœ„λ„'].mean(),df['경도'].mean()], zoom_start=14, tiles='Stamen Terrain')


βž• tiles='Stamen Toner'μΆ”κ°€


βž• tiles='Stamenwatercolor'μΆ”κ°€

πŸ‘€ λ‹€λ₯Έ λ‹€μ–‘ν•œ tiles -> https://deparkes.co.uk/2016/06/10/folium-map-tiles/ μ°Έκ³ 


μ•„μ΄μ½˜ λ³€κ²½

popup='<pre>'+df.loc[index_draw, 'μŠ€ν…Œμ΄μ…˜λͺ…']+'</pre>', icon=folium.Icon(color='red', icon='fa-bicycle', prefix='fa')).add_to(t_map)

ν•΄λ‹ΉλΆ€λΆ„ icon을 bicycle둜 λ³€κ²½ https://fontawesome.com/v4/icon/bicycle


πŸš²μ„Έμ’…μ–΄μšΈλ§ 크둀링

from urllib.request import urlopen
from bs4 import BeautifulSoup
import time
import re

import pandas as pd

import folium

# 1) λ°μ΄ν„°ν”„λ ˆμž„ 생성
data = pd.DataFrame(columns=['μŠ€ν…Œμ΄μ…˜λͺ…','μœ„μΉ˜','μƒνƒœμ •λ³΄','μœ„λ„','경도'])

# 2) μ–΄μšΈλ§ μ‚¬μ΄νŠΈ 접속 μ£Όμ†Œ : https://www.sejongbike.kr/userStationAction.do?process=stationTotalList&menu=21
url = 'https://www.sejongbike.kr/userStationAction.do?process=stationTotalList&menu=21'

# 3) url μ ‘μ†ν•˜μ—¬ HTMLκ°€μ Έμ˜€κΈ°
html = urlopen(url)

# 4) HTML νƒœκ·Έ νŒŒμ‹±(parsing)ν•˜μ—¬ λ³€ν™˜
bsObject = BeautifulSoup(html, 'html.parser', from_encoding='UTF-8')

# 5) μ •λ₯˜μž₯ 정보가 μžˆλŠ” table만 κ°€μ Έμ˜€κΈ°
table = bsObject.find_all('table', {'class','content_table'})

# 6) 상세정보 μΆ”μΆœν•˜κΈ°
# 6-1) table내에 trλ₯Ό μ°ΎκΈ°
tr = table[0].find_all('tr')

# 6-2) 첫 tr(ν…Œμ΄λΈ”μ˜ μ»¬λŸΌμ •λ³΄)은 μ œμ™Έ
tr = tr[1:len(tr)]

# 6-3) μ •λ₯˜μž₯ 상세 정보 μΆ”μΆœ
for index_tr in range(0, len(tr)):
  td = tr[index_tr].find_all('td')
  # μž₯μ†Œ
  station = td[0].text.split('.')[1] 
  # μœ„μΉ˜
  location = td[1].text
  # μƒνƒœ
  condition = td[2].text
  # μ’Œν‘œ μΆ”μΆœ
  # μœ„λ„ Latitude
  lat = td[3].find('a').attrs['onclick'].split('\'')[3]
  # 경도 Longitude
  lon = td[3].find('a').attrs['onclick'].split('\'')[5]
  # 7) μ •λ₯˜μž₯ 상세 정보 Dataframe에 λ‹΄κΈ°(append)
  data = data.append({'μŠ€ν…Œμ΄μ…˜λͺ…': station,
                      'μœ„μΉ˜':location,
                      'μƒνƒœμ •λ³΄':condition,
                      'μœ„λ„':lat,
                      '경도':lon},ignore_index=True)
  
data.to_csv('sejongbike.csv', encoding='utf-8-sig')

df = pd.read_csv('/content/sejongbike.csv')

# 1) μ •λ₯˜μž₯ μœ„μΉ˜ 기반 μ€‘μ‹¬μ’Œν‘œ μ„€μ •
s_map = folium.Map(location = [df['μœ„λ„'].mean(), df['경도'].mean()], zoom_start = 14)  # mean은 평균


# 2) μ •λ₯˜μž₯의 μƒνƒœμ— λ”°λΌμ„œ Markerλ₯Ό λ‹€λ₯Έ μƒ‰μœΌλ‘œ ν‘œμ‹œ-> 정상-νŒŒλž€μƒ‰, λ„€νŠΈμ›Œν¬ 였λ₯˜-빨간색
condition = df.μƒνƒœμ •λ³΄  # df.μƒνƒœμ •λ³΄ == df['μƒνƒœμ •λ³΄'], μ»¬λŸΌλ‚΄λΆ€μ˜ κ°’λ“€μ˜ μ’…λ₯˜λ₯Ό λ³΄λŠ” 방법 unique : df.μƒνƒœμ •λ³΄.unique()

# 3) μ •λ₯˜μž₯ Marker μΆ”κ°€ν•˜κΈ°
for index_draw in range(0, len(condition)):  # condition λ³€μˆ˜λ₯Ό μ„ μ–Έν•œ 이유, μƒνƒœμ •λ³΄κ°€ μ—†λŠ” κ²½μš°κΉŒμ§€ μ»€λ²„ν•˜κΈ° μœ„ν•΄
  if condition[index_draw] == '정상':
    folium.Marker([df.loc[index_draw, 'μœ„λ„'], df.loc[index_draw, '경도']], 
                  popup = '<pre>'+df.loc[index_draw, 'μŠ€ν…Œμ΄μ…˜λͺ…']+ '</pre>', icon=folium.Icon(color = 'blue', icon = 'info-sign')).add_to(s_map)
  elif condition[index_draw] == 'Network μ—λŸ¬':
    folium.Marker([df.loc[index_draw, 'μœ„λ„'], df.loc[index_draw, '경도']], 
                  popup = '<pre>'+ df.loc[index_draw, 'μŠ€ν…Œμ΄μ…˜λͺ…']+ '</pre>', icon=folium.Icon(color = 'red', icon = 'info-sign')).add_to(s_map)
  else:
    folium.Marker([df.loc[index_draw, 'μœ„λ„'], df.loc[index_draw, '경도']], 
                  popup = '<pre>'+ df.loc[index_draw, 'μŠ€ν…Œμ΄μ…˜λͺ…']+ '</pre>', icon=folium.Icon(color = 'black', icon = 'info-sign')).add_to(s_map)
s_map.save('sejong bike.html')


βž• 경도 μœ„λ„ κ΅¬ν•˜λŠ” λ‹€λ₯Έ 방법듀

  • 경도

    1 ) lat = td[3].a.attrs['onclick'].split('\'')[3]
    2 ) κ΅μˆ˜λ‹˜μ½”λ“œ
    onclick = td[3].a.attrs['onclick']
    lat = onclick.split(',')[1]
    lat = lat.replace('\'','').strip()

  • μœ„λ„

    1 ) lon = td[3].a.attrs['onclick'].split('\'')[5]
    2 ) κ΅μˆ˜λ‹˜μ½”λ“œ
    lon = onclick.split(',')[2]
    lon = lon.replace(');','').replace('\'','').strip()

https://colab.research.google.com/drive/1DbjiXZEBagugta0MVgQz5oqM98gbQ-rY?usp=sharing


🚩 μ†Œκ° 🚩

μ„Έμ’… μ–΄μšΈλ§ 크둀링을 ν•΄λ³Όλ•Œ μ²˜μŒμ— μœ„λ„μ™€ 경도λ₯Ό μ–΄λ–»κ²Œ 받아와야 ν•˜λ‚˜ λ§‰λ§‰ν–ˆλ‹€. κ·Έλž˜μ„œ μ—΄μ‹¬νžˆ μ„œμΉ˜ν–ˆλ‹€ ..^^... νƒ€μŠˆ μœ„λ„κ²½λ„ μ–΄λ–»κ²Œ λ°›μ•˜λŠ”μ§€ μ™„λ²½νžˆ μ΄ν•΄ν•˜μ§€ λͺ»ν•œ μƒνƒœμ—¬μ„œ 검색어λ₯Ό λŒ€μ²΄ 뭐라고 μ³μ•Όλ˜λ‚˜ κ³ λ―Όν–ˆλŠ”λ° 이것저것 κ²€μƒ‰ν•˜λ‹€λ³΄λ‹ˆκΉŒ μ›ν•˜λŠ” 정보λ₯Ό μ°Ύμ•˜κ³  μ„±κ³΅ν–ˆλ‹€.γ…Ž γ…Ž γ…Ž λ‚΄κ°€ 찾은 방법이 μ†”μ§νžˆ μ™„λ²½ν•œ 정닡이라고 μƒκ°ν–ˆλŠ”λ°, λ‹€λ₯Έ 데이터λ₯Ό 크둀링할 λ•Œ μ •ν™•νžˆ 지정을 μ•ˆ ν•΄μ£Όλ©΄ 잘 μ•ˆ 될 μˆ˜λ„ μžˆλ‹€κ³  ν•΄μ„œ lat = td[3].find('a')['onclick'].split('\'')[3] 말고 onclickμ΄λΌλŠ” 속성을 μ°Ύμ„κ²ƒμ΄λΌλŠ” 의미둜 lat = td[3].find('a').attrs['onclick'].split('\'')[3]으둜 κ³ μ³μ£Όμ—ˆλ‹€. μ–΄λ ΅λ‹€ γ…Žγ…Ž..... κ·Έλž˜λ„ μ„œμΉ˜ν•΄μ„œ μ°Ύκ³  μ½”λ“œλ₯Ό λŒλ €λ΄€λŠ”λ° 였λ₯˜ μ•ˆλ‚¬μ„λ•Œ λ„ˆλ¬΄μ’‹μ•˜λ‹€ ~~🀑 μ΄λž˜μ„œ μ½”λ”© μ’‹μ•„ν•˜λ‚˜λ³΄λ‹€,, 아직은 코딩을 μ’‹μ•„ν•˜μ§„μ•Šμ§€λ§Œ λ…Έλ ₯해보ㅏ야지.... 코딩이 λ‚  μ’‹μ•„ν•΄μ€¬μœΌλ©΄ λ§ˆμ§€λͺ»ν•΄ 쒋아해쀄텐데 λ‚  μ’‹μ•„ν•  생각이 μ—†μ–΄λ³΄μ—¬μ„œ λ‚΄κ°€ λ…Έλ ₯ν•΄μ•Όλ˜λŠ”κ²Œμ§œμ¦λ‚˜γ…œ^γ…œ..

profile
배고파용.

1개의 λŒ“κΈ€

comment-user-thumbnail
2022λ…„ 8μ›” 29일

μ½”λ”©... 널 κ°–κ³  말겠어 γ…‹γ…‹γ…‹γ…‹γ…‹γ…‹γ…‹γ…‹γ…‹γ…‹ νŒŒμ΄μ¬μ„ μžλ°”λ¨ΉλŠ” κ·Έλ‚ κΉŒμ§€ ν™”μ΄νŒ…

λ‹΅κΈ€ 달기