import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import time
SERVICE = Service("/Users/****/Development/chromedriver")
GOOGLE_FORM = "https://docs.google.com/forms/d/e/1FAIpQLSdlfzrV7SHNXONkQjE7Mj6cvJhq6MWFotKCqq8UlCP9iPtu3g/viewform?usp=sf_link"
ZILLOW_URL = "https://www.zillow.com/homes/for_rent/1-_beds/?searchQueryState=%7B%22pagination%22%3A%7B%7D%2C%22mapBounds%22%3A%7B%22west%22%3A-122.92290724707031%2C%22east%22%3A-121.94375075292969%2C%22south%22%3A37.4478420315079%2C%22north%22%3A38.10129696297871%7D%2C%22isMapVisible%22%3Afalse%2C%22filterState%22%3A%7B%22price%22%3A%7B%22max%22%3A872627%7D%2C%22beds%22%3A%7B%22min%22%3A1%7D%2C%22fore%22%3A%7B%22value%22%3Afalse%7D%2C%22mp%22%3A%7B%22max%22%3A3000%7D%2C%22nc%22%3A%7B%22value%22%3Afalse%7D%2C%22fr%22%3A%7B%22value%22%3Atrue%7D%2C%22cmsn%22%3A%7B%22value%22%3Afalse%7D%2C%22fsba%22%3A%7B%22value%22%3Afalse%7D%7D%2C%22isListVisible%22%3Atrue%7D"
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.83 Safari/537.36",
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7"
}
response = requests.get(ZILLOW_URL, headers=headers)
soup = BeautifulSoup(response.content, "html.parser")
links = [link.get("href") for link in soup.find_all(name="a", class_="list-card-link list-card-link-top-margin")]
prices = [price.getText().split("/")[0] for price in soup.find_all(name="div", class_="list-card-price")]
addresses = [address.getText() for address in soup.find_all(name="address", class_="list-card-addr")]
class GoogleForm:
def __init__(self, service):
self.driver = webdriver.Chrome(service=service)
self.driver.get(GOOGLE_FORM)
def fill_the_form(self):
address_input = self.driver.find_element(By.XPATH, "/html/body/div/div[2]/form/div[2]/div/div[2]/div[1]/div/div/div[2]/div/div[1]/div/div[1]/input")
price_input = self.driver.find_element(By.XPATH, "/html/body/div/div[2]/form/div[2]/div/div[2]/div[2]/div/div/div[2]/div/div[1]/div/div[1]/input")
link_input = self.driver.find_element(By.XPATH, "/html/body/div/div[2]/form/div[2]/div/div[2]/div[3]/div/div/div[2]/div/div[1]/div/div[1]/input")
for i in range(len(addresses)):
time.sleep(1)
address_input.send_keys(addresses[i])
price_input.send_keys(prices[i])
link_input.send_keys(links[i])
submit_button = self.driver.find_element(By.XPATH, "/html/body/div/div[2]/form/div[2]/div/div[3]/div[1]/div[1]/div")
submit_button.click()
time.sleep(1)
another_form = self.driver.find_element(By.XPATH, "/html/body/div[1]/div[2]/div[1]/div/div[4]/a")
another_form.click()
form = GoogleForm(SERVICE)
form.fill_the_form()
첫번째 form 제출하는데는 성공했는데
두번째 form 제출은 안되고,,,
prices
도 run
할때 마다 값 제대로 물고 올 때도 있고 못 물고 올때도 있고,,,
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import time
header = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.83 Safari/537.36",
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7"
}
response = requests.get(
"https://www.zillow.com/homes/for_rent/1-_beds/?searchQueryState=%7B%22pagination%22%3A%7B%7D%2C%22mapBounds%22%3A%7B%22west%22%3A-122.92290724707031%2C%22east%22%3A-121.94375075292969%2C%22south%22%3A37.4478420315079%2C%22north%22%3A38.10129696297871%7D%2C%22isMapVisible%22%3Afalse%2C%22filterState%22%3A%7B%22price%22%3A%7B%22max%22%3A872627%7D%2C%22beds%22%3A%7B%22min%22%3A1%7D%2C%22fore%22%3A%7B%22value%22%3Afalse%7D%2C%22mp%22%3A%7B%22max%22%3A3000%7D%2C%22nc%22%3A%7B%22value%22%3Afalse%7D%2C%22fr%22%3A%7B%22value%22%3Atrue%7D%2C%22cmsn%22%3A%7B%22value%22%3Afalse%7D%2C%22fsba%22%3A%7B%22value%22%3Afalse%7D%7D%2C%22isListVisible%22%3Atrue%7D",
headers=header)
data = response.text
soup = BeautifulSoup(data, "html.parser")
all_link_elements = soup.select(".list-card-top a")
all_links = []
for link in all_link_elements:
href = link["href"]
print(href)
if "http" not in href:
all_links.append(f"https://www.zillow.com{href}")
else:
all_links.append(href)
all_address_elements = soup.select(".list-card-info address")
all_addresses = [address.get_text().split(" | ")[-1] for address in all_address_elements]
all_price_elements = soup.select(".list-card-heading")
all_prices = []
for element in all_price_elements:
# Get the prices. Single and multiple listings have different tag & class structures
try:
# Price with only one listing
price = element.select(".list-card-price")[0].contents[0]
except IndexError:
print('Multiple listings for the card')
# Price with multiple listings
price = element.select(".list-card-details li")[0].contents[0]
finally:
all_prices.append(price)
# Create Spreadsheet using Google Form
# Substitute your own path here 👇
SERVICE = Service("/Users/****/Development/chromedriver")
driver = webdriver.Chrome(service=SERVICE)
for n in range(len(all_links)):
# Substitute your own Google Form URL here 👇
driver.get("https://docs.google.com/forms/d/e/1FAIpQLSdlfzrV7SHNXONkQjE7Mj6cvJhq6MWFotKCqq8UlCP9iPtu3g/viewform?usp=sf_link")
time.sleep(2)
address = driver.find_element(By.XPATH,
'//*[@id="mG61Hd"]/div[2]/div/div[2]/div[1]/div/div/div[2]/div/div[1]/div/div[1]/input')
price = driver.find_element(By.XPATH,
'//*[@id="mG61Hd"]/div[2]/div/div[2]/div[2]/div/div/div[2]/div/div[1]/div/div[1]/input')
link = driver.find_element(By.XPATH,
'//*[@id="mG61Hd"]/div[2]/div/div[2]/div[3]/div/div/div[2]/div/div[1]/div/div[1]/input')
submit_button = driver.find_element(By.XPATH, '//*[@id="mG61Hd"]/div[2]/div/div[3]/div[1]/div/div')
address.send_keys(all_addresses[n])
price.send_keys(all_prices[n])
link.send_keys(all_links[n])
submit_button.click()
solution은 이런데 이건 아예 작동안하는데,,, 이유를 모르겠네..?