이미지를 다운로드하는 과정은 다음과 같다.
이미지 검색 -> 이미지 URL 수집 -> URL에 이미지 요청 -> 이미지 다운로드
from urllib.request import Request, urlopen
from selenium import webdriver
driver = webdriver.Chrome()
keyword = input()
name = input()
image_xpath = '/html/body/div[1]/div[1]/div/div[2]/div[3]/div/div/div/div[3]/div/a/img'
#image xpath 내의 src를 뽑아야 한다.
def get_src(xpath):
return driver.find_element(By.XPATH, xpath).get_attribute('src')
image_url = get_src(image_xpath)
def download_image(image_url, name):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.9999.999 Safari/537.36'}
req = Request(image_url, headers=headers)
f = open(name+'.jpg', "wb")
f.write(urlopen(req).read())
f.close()
download_image(image_url, name)
image_area_xpath = '/html/body/div[1]/div[1]/div/div[2]/div[3]/div/div/div'
image_area = driver.find_element(By.XPATH, image_area_xpath)
image_elements = image_area.find_elements(By.TAG_NAME, "img")
result = []
for img in image_elements:
#img url
if img.get_attribute("data-lazy") is None:
img_url = img.get_attribute("src")
elif img.get_attribute("data-lazy-src"):
img_url = img.get_attribute("data-lazy-src")
else:
img_url = img.get_attribute("data-lazy")
result.append(img_url)
result = [i for i in result if "blank" not in i]
for i in range(len(result)):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.9999.999 Safari/537.36'}
req = Request(image_url, headers=headers)
f = open(f"cat_{i}.jpg", "wb")
f.write(urlopen(req).read())
f.close()