NLP3 - 추천 시스템

ganadara·2023년 1월 10일
0

복습

목록 보기
35/49

cos_sim : 추천 시스템

import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
data = pd.read_csv("datasets/movies_metadata.csv",low_memory=False)
data
adult belongs_to_collection budget genres homepage id imdb_id original_language original_title overview ... release_date revenue runtime spoken_languages status tagline title video vote_average vote_count
0 False {'id': 10194, 'name': 'Toy Story Collection', ... 30000000 [{'id': 16, 'name': 'Animation'}, {'id': 35, '... http://toystory.disney.com/toy-story 862 tt0114709 en Toy Story Led by Woody, Andy's toys live happily in his ... ... 1995-10-30 373554033.0 81.0 [{'iso_639_1': 'en', 'name': 'English'}] Released NaN Toy Story False 7.7 5415.0
1 False NaN 65000000 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... NaN 8844 tt0113497 en Jumanji When siblings Judy and Peter discover an encha... ... 1995-12-15 262797249.0 104.0 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released Roll the dice and unleash the excitement! Jumanji False 6.9 2413.0
2 False {'id': 119050, 'name': 'Grumpy Old Men Collect... 0 [{'id': 10749, 'name': 'Romance'}, {'id': 35, ... NaN 15602 tt0113228 en Grumpier Old Men A family wedding reignites the ancient feud be... ... 1995-12-22 0.0 101.0 [{'iso_639_1': 'en', 'name': 'English'}] Released Still Yelling. Still Fighting. Still Ready for... Grumpier Old Men False 6.5 92.0
3 False NaN 16000000 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... NaN 31357 tt0114885 en Waiting to Exhale Cheated on, mistreated and stepped on, the wom... ... 1995-12-22 81452156.0 127.0 [{'iso_639_1': 'en', 'name': 'English'}] Released Friends are the people who let you be yourself... Waiting to Exhale False 6.1 34.0
4 False {'id': 96871, 'name': 'Father of the Bride Col... 0 [{'id': 35, 'name': 'Comedy'}] NaN 11862 tt0113041 en Father of the Bride Part II Just when George Banks has recovered from his ... ... 1995-02-10 76578911.0 106.0 [{'iso_639_1': 'en', 'name': 'English'}] Released Just When His World Is Back To Normal... He's ... Father of the Bride Part II False 5.7 173.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
45461 False NaN 0 [{'id': 18, 'name': 'Drama'}, {'id': 10751, 'n... http://www.imdb.com/title/tt6209470/ 439050 tt6209470 fa رگ خواب Rising and falling between a man and woman. ... NaN 0.0 90.0 [{'iso_639_1': 'fa', 'name': 'فارسی'}] Released Rising and falling between a man and woman Subdue False 4.0 1.0
45462 False NaN 0 [{'id': 18, 'name': 'Drama'}] NaN 111109 tt2028550 tl Siglo ng Pagluluwal An artist struggles to finish his work while a... ... 2011-11-17 0.0 360.0 [{'iso_639_1': 'tl', 'name': ''}] Released NaN Century of Birthing False 9.0 3.0
45463 False NaN 0 [{'id': 28, 'name': 'Action'}, {'id': 18, 'nam... NaN 67758 tt0303758 en Betrayal When one of her hits goes wrong, a professiona... ... 2003-08-01 0.0 90.0 [{'iso_639_1': 'en', 'name': 'English'}] Released A deadly game of wits. Betrayal False 3.8 6.0
45464 False NaN 0 [] NaN 227506 tt0008536 en Satana likuyushchiy In a small town live two brothers, one a minis... ... 1917-10-21 0.0 87.0 [] Released NaN Satan Triumphant False 0.0 0.0
45465 False NaN 0 [] NaN 461257 tt6980792 en Queerama 50 years after decriminalisation of homosexual... ... 2017-06-09 0.0 75.0 [{'iso_639_1': 'en', 'name': 'English'}] Released NaN Queerama False 0.0 0.0

45466 rows × 24 columns

data['overview']
0        Led by Woody, Andy's toys live happily in his ...
1        When siblings Judy and Peter discover an encha...
2        A family wedding reignites the ancient feud be...
3        Cheated on, mistreated and stepped on, the wom...
4        Just when George Banks has recovered from his ...
                               ...                        
45461          Rising and falling between a man and woman.
45462    An artist struggles to finish his work while a...
45463    When one of her hits goes wrong, a professiona...
45464    In a small town live two brothers, one a minis...
45465    50 years after decriminalisation of homosexual...
Name: overview, Length: 45466, dtype: object
data = data[['title','overview']]
data[:2]
title overview
0 Toy Story Led by Woody, Andy's toys live happily in his ...
1 Jumanji When siblings Judy and Peter discover an encha...
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45466 entries, 0 to 45465
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   title     45460 non-null  object
 1   overview  44512 non-null  object
dtypes: object(2)
memory usage: 710.5+ KB
45460-44512 
948
#결측치 확인
data['overview'].isnull().sum()
954
data = data.dropna()
data.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 44506 entries, 0 to 45465
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   title     44506 non-null  object
 1   overview  44506 non-null  object
dtypes: object(2)
memory usage: 1.0+ MB
#희소행렬 만들기
tfidf_vec = TfidfVectorizer(stop_words="english")
tfidf_dtm = tfidf_vec.fit_transform(data['overview']) 
tfidf_dtm
<44506x75827 sparse matrix of type '<class 'numpy.float64'>'
	with 1210839 stored elements in Compressed Sparse Row format>
cos_sim_res = cosine_similarity(tfidf_dtm, tfidf_dtm) #tfidf_dtm는 44506개 서로 유사도를 비교해라
---------------------------------------------------------------------------

출력생략

title_to_index = dict(zip(data["title"], data.index))
title_to_index
{'Toy Story': 0,
 'Jumanji': 1,
 'Grumpier Old Men': 2,
 'Waiting to Exhale': 3,
 'Father of the Bride Part II': 4,
 'Heat': 29042,
 'Sabrina': 888,
 'Tom and Huck': 7,생략
 ...}
title_to_index['Toy Story'], title_to_index['Jumanji']
(0, 1)
#추천(score가 높은 순으로 추천하려면 정렬해야 한다.)
def get_recommendation(title,n):
    idx = title_to_index[title]
    sim_scores = list(enumerate(cos_sim_res[idx]))
    sim_scores = sorted(sim_scores,key = lambda x : x[1], reverse=True)
    sim_scores_n = sim_scores[1:n+1]
    movie_idx = [movie_dict[0] for movie_dict in sim_scores_n]
    return data["title"].iloc[movie_idx]
get_recommendation("Toy Story",5)
15348                    Toy Story 3
2997                     Toy Story 2
10301         The 40 Year Old Virgin
24523                      Small Fry
23843    Andy Hardy's Blonde Trouble
Name: title, dtype: object
get_recommendation("Batman",5)
8681                 Scars of Dracula
18562    Johnny Cash at Folsom Prison
6600        The Prince and the Pauper
5355            Nosferatu the Vampyre
37915                          Vaesen
Name: title, dtype: object
get_recommendation("The Dark Knight Rises",5)
31143             Deadly Daycare
19286            The One Percent
44918                  Once More
45106    Nicostratos the Pelican
33008       White Cannibal Queen
Name: title, dtype: object

word2vec, 임베딩

import pandas as pd
train_data = pd.read_table('datasets/ratings.txt')
train_data 
id document label
0 8112052 어릴때보고 지금다시봐도 재밌어요ㅋㅋ 1
1 8132799 디자인을 배우는 학생으로, 외국디자이너와 그들이 일군 전통을 통해 발전해가는 문화산... 1
2 4655635 폴리스스토리 시리즈는 1부터 뉴까지 버릴께 하나도 없음.. 최고. 1
3 9251303 와.. 연기가 진짜 개쩔구나.. 지루할거라고 생각했는데 몰입해서 봤다.. 그래 이런... 1
4 10067386 안개 자욱한 밤하늘에 떠 있는 초승달 같은 영화. 1
... ... ... ...
199995 8963373 포켓 몬스터 짜가 ㅡㅡ;; 0
199996 3302770 쓰.레.기 0
199997 5458175 완전 사이코영화. 마지막은 더욱더 이 영화의질을 떨어트린다. 0
199998 6908648 왜난 재미없었지 ㅠㅠ 라따뚜이 보고나서 스머프 봐서 그런가 ㅋㅋ 0
199999 8548411 포풍저그가나가신다영차영차영차 0

200000 rows × 3 columns

train_data = train_data.dropna()
train_data.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 199992 entries, 0 to 199999
Data columns (total 3 columns):
 #   Column    Non-Null Count   Dtype 
---  ------    --------------   ----- 
 0   id        199992 non-null  int64 
 1   document  199992 non-null  object
 2   label     199992 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 6.1+ MB
#한글 이외 제거 #[^ㄱ-ㅎㅏ-ㅣ가-힣] 여기에 해당되지 않으면 지워라
train_data['document'] = train_data['document'].str.replace("[^ㄱ-ㅎㅏ-ㅣ가-힣]","")
C:\Users\user\AppData\Local\Temp\ipykernel_14536\3143078289.py:2: FutureWarning: The default value of regex will change from True to False in a future version.
  train_data['document'] = train_data['document'].str.replace("[^ㄱ-ㅎㅏ-ㅣ가-힣]","")
C:\Users\user\AppData\Local\Temp\ipykernel_14536\3143078289.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['document'] = train_data['document'].str.replace("[^ㄱ-ㅎㅏ-ㅣ가-힣]","")
train_data
id document label
0 8112052 어릴때보고지금다시봐도재밌어요ㅋㅋ 1
1 8132799 디자인을배우는학생으로외국디자이너와그들이일군전통을통해발전해가는문화산업이부러웠는데사실우... 1
2 4655635 폴리스스토리시리즈는부터뉴까지버릴께하나도없음최고 1
3 9251303 와연기가진짜개쩔구나지루할거라고생각했는데몰입해서봤다그래이런게진짜영화지 1
4 10067386 안개자욱한밤하늘에떠있는초승달같은영화 1
... ... ... ...
199995 8963373 포켓몬스터짜가ㅡㅡ 0
199996 3302770 쓰레기 0
199997 5458175 완전사이코영화마지막은더욱더이영화의질을떨어트린다 0
199998 6908648 왜난재미없었지ㅠㅠ라따뚜이보고나서스머프봐서그런가ㅋㅋ 0
199999 8548411 포풍저그가나가신다영차영차영차 0

199992 rows × 3 columns

from konlpy.tag import Okt
okt = Okt()
stopwords=["의","가","이","은","는",
          "들","좀","잘","강","과",
          "도","를","으로","자","에",
         "와","한","하다"]
toked_data=[]
i = 0
for sen in train_data["document"]:
    if i % 100==0:
        print(".", end="")
    i+=1
    toked_sen = okt.morphs(sen, stem=True)
    toked_data_wo_stop = [word for word in toked_sen if not word in stopwords]
    toked_data.append(toked_data_wo_stop)
from tqdm.notebook import tqdm
toked_data=[]
for sen in tqdm(train_data['document']):
    toked_sen = okt.morphs(sen, stem=True)
    toked_data_wo_stop = [word for word in toked_sen if not word in stopwords]
    toked_data.append(toked_data_wo_stop)
  0%|          | 0/199992 [00:00<?, ?it/s]
toked_data
[['어리다', '때', '보고', '지금', '다시', '보다', '재밌다', 'ㅋㅋ'],
 ['디자인',
  '을',
  '배우다',
  '학생',
  '외국',
  생략
max(len(review) for review in toked_data)
68
min(len(review) for review in toked_data)
0
sum((len(review) for review in toked_data))/len(toked_data)
10.67258190327613
import matplotlib.pyplot as plt
plt.hist([len(review) for review in toked_data], bins=20)
plt.show()
#!pip install gensim
from gensim.models import Word2Vec
model = Word2Vec(sentences = toked_data, vector_size=100, window=5, min_count=5, sg=0)
model.wv.vectors.shape #임베딩 행렬(단어수, 표현력)
#표현력: 대략 100차원에 17947개의 단어가 배치되어 있다. 차원수는 크게 의미가 없다.
(17947, 100)
model.wv.vectors
array([[ 3.0088416e-01, -1.1911200e-01, -1.1325821e+00, ...,
        -6.4689957e-02,  4.4979069e-01,  3.5684457e-01],
       [-8.5318393e-01,  2.2151016e-01, -7.5326794e-01, ...,
        -7.0007372e-01, -1.0359819e+00,  4.8177451e-01],
       [ 8.1992215e-01,  5.9770411e-01, -1.8147305e+00, ...,
         1.0774486e+00,  9.4935614e-01, -1.6808929e-01],
       ...,
       [-2.5813028e-02,  4.6597794e-02, -1.8362358e-02, ...,
        -4.6418682e-02, -3.0143352e-03,  1.4660546e-02],
       [ 3.3053622e-02,  6.6365726e-02, -5.1748389e-03, ...,
        -4.6356149e-02,  4.6646088e-02, -6.3183427e-02],
       [ 1.9338656e-02,  3.3694621e-02, -6.0516104e-02, ...,
        -5.8152623e-02,  6.8320349e-02,  1.1045593e-03]], dtype=float32)
model.wv.most_similar("송혜교")
[('신민아', 0.8548964858055115),
 ('남상미', 0.8466795682907104),
 ('김규리', 0.8433918356895447),
 ('장나라', 0.8420997262001038),
 ('이승기', 0.8361702561378479),
 ('민호', 0.8360784649848938),
 ('드류', 0.8347111344337463),
 ('이연희', 0.8310037851333618),
 ('손예진', 0.8286734223365784),
 ('강혜정', 0.8286411166191101)]
#단어 비슷한 걸 찾았다 단어 문맥을 찾았다
import gensim
pre_trained_word2vec = gensim.models.KeyedVectors.load_word2vec_format(
"datasets/GoogleNews-vectors-negative300.bin", binary=True)
pre_trained_word2vec.vectors.shape #단어:3000000, 차원:300
(3000000, 300)
pre_trained_word2vec.similarity("this","is")
0.40797037
pre_trained_word2vec.similarity("food","book")
0.10171259
pre_trained_word2vec.similarity("chicken","pizza")
0.37984928

BS4

from bs4 import BeautifulSoup
# 시맨틱 웹처럼 바꿔보기
test_html = """
<html>
<head></head>
<body>
<h1 id = "title">새싹반</h1>
<div>
    <ul clss="name">
        <li><a class ="to_naver"  href="https://www.naver.com">네이버로 갈래요</a></li>
    </ul>

    <ul class="brand">
        <li><a href="https://www.google.com">구글로 갈래요</a></li>
        <li><a href="https://www.youtube.com">유튜브로 갈래요</a></li>
    </ul>
</div>
</body>


</html>
"""
soup = BeautifulSoup(test_html, 'html.parser')
soup
<html>
<head></head>
<body>
<h1 id="title">새싹반</h1>
<div>
<ul clss="name">
<li><a class="to_naver" href="https://www.naver.com">네이버로 갈래요</a></li>
</ul>
<ul class="brand">
<li><a href="https://www.google.com">구글로 갈래요</a></li>
<li><a href="https://www.youtube.com">유튜브로 갈래요</a></li>
</ul>
</div>
</body>
</html>
soup.prettify()
'<html>\n <head>\n </head>\n <body>\n  <h1 id="title">\n   새싹반\n  </h1>\n  <div>\n   <ul clss="name">\n    <li>\n     <a class="to_naver" href="https://www.naver.com">\n      네이버로 갈래요\n     </a>\n    </li>\n   </ul>\n   <ul class="brand">\n    <li>\n     <a href="https://www.google.com">\n      구글로 갈래요\n     </a>\n    </li>\n    <li>\n     <a href="https://www.youtube.com">\n      유튜브로 갈래요\n     </a>\n    </li>\n   </ul>\n  </div>\n </body>\n</html>\n'
soup.h1
<h1 id="title">새싹반</h1>
soup.div
<div>
<ul clss="name">
<li><a class="to_naver" href="https://www.naver.com">네이버로 갈래요</a></li>
</ul>
<ul class="brand">
<li><a href="https://www.google.com">구글로 갈래요</a></li>
<li><a href="https://www.youtube.com">유튜브로 갈래요</a></li>
</ul>
</div>
soup.find("ul")
<ul clss="name">
<li><a class="to_naver" href="https://www.naver.com">네이버로 갈래요</a></li>
</ul>
soup.find_all("ul")
[<ul clss="name">
 <li><a class="to_naver" href="https://www.naver.com">네이버로 갈래요</a></li>
 </ul>,
 <ul class="brand">
 <li><a href="https://www.google.com">구글로 갈래요</a></li>
 <li><a href="https://www.youtube.com">유튜브로 갈래요</a></li>
 </ul>]
res_a = soup.find_all("a")
res_a
[<a class="to_naver" href="https://www.naver.com">네이버로 갈래요</a>,
 <a href="https://www.google.com">구글로 갈래요</a>,
 <a href="https://www.youtube.com">유튜브로 갈래요</a>]
res_a[0]
<a class="to_naver" href="https://www.naver.com">네이버로 갈래요</a>
res_a[2]
<a href="https://www.youtube.com">유튜브로 갈래요</a>
soup.a.attrs
{'class': ['to_naver'], 'href': 'https://www.naver.com'}
soup.find_all('a',attrs={"class":'to_naver'})
[<a class="to_naver" href="https://www.naver.com">네이버로 갈래요</a>]
soup.find('a',attrs={"class":'to_naver'})
<a class="to_naver" href="https://www.naver.com">네이버로 갈래요</a>
res = soup.find('a',attrs={"class":'to_naver'})
res.string, res.text
('네이버로 갈래요', '네이버로 갈래요')
soup.select("div>ul.brand>li")
[<li><a href="https://www.google.com">구글로 갈래요</a></li>,
 <li><a href="https://www.youtube.com">유튜브로 갈래요</a></li>]

할리스 매장정보 분석

from bs4 import BeautifulSoup
import urllib.request

page_num = 2
url = f"https://www.hollys.co.kr/store/korea/korStore2.do?pageNo=1&sido=&gugun=&store="

html = urllib.request.urlopen(url) #url열어주세요
soup = BeautifulSoup(html, 'html.parser')
html
<http.client.HTTPResponse at 0x20dc28046a0>
soup
<!DOCTYPE html>

<html lang="ko" xml:lang="ko" xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>할리스</title>
<meta charset="utf-8"/>
<meta content="HOLLYS" name="Subject">
<meta content="HOLLYS" name="Title"/>
<meta content="HOLLYS,HOLLYS,할리스,할리스,커피" name="Keywords"/>
<meta content="할리스는 1998년 국내 첫 에스프레소 커피전문점을 개점한 순수 국내브랜드로서 당당하게 시장 선점이라는 확고한 위치를 확보하고 국내 에스프레소 커피시장을 주도해 나가고 있습니다." name="Description"/>
<meta content="HOLLYS F&amp;B" name="Author"/>
<meta content="HOLLYS F&amp;B" name="Publisher"/>
<meta content="COFFEE,커피,음료,푸드" name="Classification"/>
<meta content="Korea" name="Location"/>
<meta content="2015.04.01" name="Author-Date"/>
<meta content="2023.01.10" name="Date"/>
<meta content="HOLLYS, HOLLYS F&amp;B" name="Distribution"/>
<meta content="HOLLYS" name="Copyright"/>
<meta content="website" property="og:type"/>
<meta content="할리스" property="og:title"/>
<meta content="HOLLYS" property="og:description"/>
<meta content="https://www.hollys.co.kr/" 생력
tbody = soup.find("tbody")
tbody
<tbody>
<tr>
<!--
				<td class="noline center_t">
																<a href="javascript:goLogin();"><img src="https://www.hollys.co.kr/websrc/images/store/ico_favorite_off.png" alt="즐겨찾기"></a>
									</td>
				 -->
<td class="noline center_t">서울 강남구</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1032); return false;">강남우리라운지점</a></td>
<td class="center_t tdp0">영업중</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1032); return false;">서울특별시 강남구 테헤란로 301 역삼동 701-02 삼정빌딩 1층</a></td>
<td class="center_t">
<img alt="주차" src="https://www.hollys.co.kr/websrc/images/store/img_store_s08.png" style="margin-right:1px"/>
</td>
<td class="center_t">02-566-1002</td>
</tr>
<tr>
<!--
				<td class="noline center_t">
																<a href="javascript:goLogin();"><img src="https://www.hollys.co.kr/websrc/images/store/ico_favorite_off.png" alt="즐겨찾기"></a>
									</td>
				 -->
<td class="noline center_t">경기 수원시</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1031); return false;">수원영통점</a></td>
<td class="center_t tdp0">영업중</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1031); return false;">경기도 수원시 영통구 청명남로 10 영통동 1000-1</a></td>
<td class="center_t">
<img alt="주차" src="https://www.hollys.co.kr/websrc/images/store/img_store_s08.png" style="margin-right:1px"/>
</td>
<td class="center_t">031-202-3356</td>
</tr>
<tr>
<!--
				<td class="noline center_t">
																<a href="javascript:goLogin();"><img src="https://www.hollys.co.kr/websrc/images/store/ico_favorite_off.png" alt="즐겨찾기"></a>
									</td>
				 -->
<td class="noline center_t">경기 고양시 덕양구</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1030); return false;">원흥역점</a></td>
<td class="center_t tdp0">영업중</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1030); return false;">경기 고양시 덕양구 권율대로 690 201동 108호~111호</a></td>
<td class="center_t">
<img alt="주차" src="https://www.hollys.co.kr/websrc/images/store/img_store_s08.png" style="margin-right:1px"/>
</td>
<td class="center_t">031.967.0302</td>
</tr>
<tr>
<!--
				<td class="noline center_t">
																<a href="javascript:goLogin();"><img src="https://www.hollys.co.kr/websrc/images/store/ico_favorite_off.png" alt="즐겨찾기"></a>
									</td>
				 -->
<td class="noline center_t">서울 용산구</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1029); return false;">이태원역점</a></td>
<td class="center_t tdp0">영업중</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1029); return false;">서울특별시 용산구 이태원로 180 2층~3층</a></td>
<td class="center_t">
</td>
<td class="center_t">02-749-8752</td>
</tr>
<tr>
<!--
				<td class="noline center_t">
																<a href="javascript:goLogin();"><img src="https://www.hollys.co.kr/websrc/images/store/ico_favorite_off.png" alt="즐겨찾기"></a>
									</td>
				 -->
<td class="noline center_t">서울 양천구</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1028); return false;">오목교역점</a></td>
<td class="center_t tdp0">영업중</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1028); return false;">서울특별시 양천구 오목로 344 (목동, 청학빌딩) 1층</a></td>
<td class="center_t">
<img alt="주차" src="https://www.hollys.co.kr/websrc/images/store/img_store_s08.png" style="margin-right:1px"/>
</td>
<td class="center_t">02-2062-8405</td>
</tr>
<tr>
<!--
				<td class="noline center_t">
																<a href="javascript:goLogin();"><img src="https://www.hollys.co.kr/websrc/images/store/ico_favorite_off.png" alt="즐겨찾기"></a>
									</td>
				 -->
<td class="noline center_t">대구 서구</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1027); return false;">롯데시네마프리미엄만경관점</a></td>
<td class="center_t tdp0">영업중</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1027); return false;">대구 중구 국채보상로 547 MMC 만경관 4층 .</a></td>
<td class="center_t">
<img alt="주차" src="https://www.hollys.co.kr/websrc/images/store/img_store_s08.png" style="margin-right:1px"/>
</td>
<td class="center_t">070-7717-2192</td>
</tr>
<tr>
<!--
				<td class="noline center_t">
																<a href="javascript:goLogin();"><img src="https://www.hollys.co.kr/websrc/images/store/ico_favorite_off.png" alt="즐겨찾기"></a>
									</td>
				 -->
<td class="noline center_t">전남 순천시</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1024); return false;">순천신대점</a></td>
<td class="center_t tdp0">영업중</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1024); return false;">전라남도 순천시 해룡면 향매로 67 신대리 1978</a></td>
<td class="center_t">
</td>
<td class="center_t">061-723-5185</td>
</tr>
<tr>
<!--
				<td class="noline center_t">
																<a href="javascript:goLogin();"><img src="https://www.hollys.co.kr/websrc/images/store/ico_favorite_off.png" alt="즐겨찾기"></a>
									</td>
				 -->
<td class="noline center_t">서울 금천구</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1023); return false;">시흥사거리점</a></td>
<td class="center_t tdp0">영업중</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1023); return false;">서울특별시 금천구 시흥대로 225 시흥동 994-9</a></td>
<td class="center_t">
<img alt="주차" src="https://www.hollys.co.kr/websrc/images/store/img_store_s08.png" style="margin-right:1px"/>
</td>
<td class="center_t">02-804-9226</td>
</tr>
<tr>
<!--
				<td class="noline center_t">
																<a href="javascript:goLogin();"><img src="https://www.hollys.co.kr/websrc/images/store/ico_favorite_off.png" alt="즐겨찾기"></a>
									</td>
				 -->
<td class="noline center_t">전북 전주시 완산구</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1022); return false;">전주효천점</a></td>
<td class="center_t tdp0">영업중</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1022); return false;">전라북도 전주시 완산구 쑥고개로 351, 1층 101호~102호 효자동2가 1326-4</a></td>
<td class="center_t">
<img alt="주차" src="https://www.hollys.co.kr/websrc/images/store/img_store_s08.png" style="margin-right:1px"/>
</td>
<td class="center_t">063-224-5777</td>
</tr>
<tr>
<!--
				<td class="noline center_t">
																<a href="javascript:goLogin();"><img src="https://www.hollys.co.kr/websrc/images/store/ico_favorite_off.png" alt="즐겨찾기"></a>
									</td>
				 -->
<td class="noline center_t">제주 제주시</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1021); return false;">제주도두해안DT점</a></td>
<td class="center_t tdp0">영업중</td>
<td class="center_t"><a href="#" onclick="javascript:storeView(1021); return false;">제주특별자치도 제주시 도두일동 1686 .</a></td>
<td class="center_t">
<img alt="DT 매장" src="https://www.hollys.co.kr/websrc/images/store/img_store_s01.gif" style="margin-right:1px"/>
<img alt="테라스" src="https://www.hollys.co.kr/websrc/images/store/img_store_s02.gif" style="margin-right:1px"/>
<img alt="주차" src="https://www.hollys.co.kr/websrc/images/store/img_store_s08.png" style="margin-right:1px"/>
</td>
<td class="center_t">064-745-7301</td>
</tr>
</tbody>
len(tbody.find_all("tr"))
10
tr0 = tbody.find_all("tr")[0]
tr0.find_all("td")
[<td class="noline center_t">서울 강남구</td>,
 <td class="center_t"><a href="#" onclick="javascript:storeView(1032); return false;">강남우리라운지점</a></td>,
 <td class="center_t tdp0">영업중</td>,
 <td class="center_t"><a href="#" onclick="javascript:storeView(1032); return false;">서울특별시 강남구 테헤란로 301 역삼동 701-02 삼정빌딩 1층</a></td>,
 <td class="center_t">
 <img alt="주차" src="https://www.hollys.co.kr/websrc/images/store/img_store_s08.png" style="margin-right:1px"/>
 </td>,
 <td class="center_t">02-566-1002</td>]
tds = tr0.find_all("td")
tds[0].text, tds[1].text, tds[3].text, tds[5].text
('서울 강남구', '강남우리라운지점', '서울특별시 강남구 테헤란로 301 역삼동 701-02 삼정빌딩 1층', '02-566-1002')
tr1 = tbody.find_all("tr")[1]
tds = tr1.find_all("td")
tds[0].text, tds[1].text, tds[3].text, tds[5].text
('경기 수원시', '수원영통점', '경기도 수원시 영통구 청명남로 10 영통동 1000-1', '031-202-3356')
profile
DL 공부중

0개의 댓글