Today I Learned(20221111)

Seoyoon Choi·2022년 11월 11일

TodayILearned

목록 보기

3/5

한번 배웠던 python 임에도 기억이 하나도 안난다..^^

어떡하쥐..?

# LIST 
# LIST는 순서가 중요 
# always starts with 0

a_list = ['apple','pear','감','수박']

a_list.append('딸기')
a_list[4]

# Dictionary

a_dict = {'name':'claire','age':'29'}

a_dict

a_list = [{'name':'철수','age':'16'}, {'name':'영희','age':'25'} ]
## 철수가 0번  영희는 1번
#주로 리스트안에 딕셔너리가 들어있는 구조가 많음
a_list[0]['age']코드를 입력하세요

##함수

def sum(a,b):
  print('hello world')
  return a+b

result = sum(2,3)
result ## 들여쓰기하면 함수 내용물으로 인식함 ** 주의

##조건문

age = 25

def is_adult(age): ##함수
  if age > 20:  ##조건문
    print('성인')
  else:
    print('청소년')

##반복문
## 리스트와 반복문은 대부분 같이 쓴다

a_list = ['사과','배','감','귤']

for a in a_list:
  print(a)

##스크랩핑 실습
## enter를 치는 라이브러리 " request"
## https://search.naver.com/search.naver?where=news&ie=utf8&sm=nws_hty&query=%EC%82%BC%EC%84%B1%EC%A0%84%EC%9E%90

##크롤링 기본코드

import requests
from bs4 import BeautifulSoup




headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get('https://search.naver.com/search.naver?where=news&ie=utf8&sm=nws_hty&query=삼성전자',headers=headers)

soup = BeautifulSoup(data.text, 'html.parser')

soup

a = soup.select_one('#sp_nws1 > div.news_wrap.api_ani_send > a') ## 개발자 도구에서 가져온 a tag

print(a)
a.text ## a안에있는 텍스트만 보겠다는 뜻
a['href']

##뉴스 여러개 가져오기
##li tag 먼저 가저오고 a tag 가져오기
lis = soup.select('#main_pack > section > div > div.group_news > ul > li') ## ui에서 li tag 로 한번 더 들어가라
lis[0].select_one('a.news_tit')##여러개의 a 태그들 중에 class로 정의

a['href']

## 여러개의 뉴스를 반복문으로 하나씩 가져오기

for li in lis:
  a = li.select_one('a.news_tit')
  ##print(a.text.a['href'])
  print(a.text)

##뉴스가져오는 함수 만들기

def get_news(keyword):


  headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
  data = requests.get( f'https://search.naver.com/search.naver?where=news&ie=utf8&sm=nws_hty&query={keyword}',headers=headers)
## 삼성전자 대신 키워드 넣고 앞에 f 붙여주기
  soup = BeautifulSoup(data.text, 'html.parser')

  lis = soup.select('#main_pack > section > div > div.group_news > ul > li') ## ui에서 li tag 로 한번 더 들어가라

  for li in lis:
    a = li.select_one('a.news_tit')
    ##print(a.text.a['href'])
    print(a.text)

## 1주차 숙제

##파일 읽는코드
import urllib.request
import openpyxl
wb = openpyxl.load_workbook('관리종목.xlsx')
sheet = wb['종목']


new_rows = list(sheet.rows)[1:]


for row in new_rows:
  code = row[1].value
  name = row[0].value
  url = f'https://ssl.pstatic.net/imgfinance/chart/item/area/day/{code}.png'
  urllib.request.urlretrieve(url, f"{name}.jpg")

  
  ##print(row[0].value, row[1].value)

Seoyoon Choi

이전 포스트

TodayILearne(20221109)

다음 포스트

Today I Learned(20221111)

TodayILearned

TodayILearne(20221109)

Today I Learned(20221114)

0개의 댓글