스택오버플로우 구직 사이트 Scraping
Example of Scrapping Process
from so import get_jobs as get_so_jobs
jobs = get_so_jobs()
import requests
from bs4 import BeautifulSoup
URL = "https://stackoverflow.com/jobs?q=python"
def get_last_page():
result = requests.get(URL)
soup = BeautifulSoup(result.text,"html.parser")
pagination = soup.find("div",{"class" : "s-pagination"})
links = pagination.find_all("a")
last_page = links[-2].get_text(strip=True)
return int(last_page)
def extract_job(html):
title = html.find("h2",{ "class" : "mb4" }).find("a")["title"]
company, location = html.find("h3",{ "class" : "mb4" }).find_all("span",recursive=False)
company = company.get_text(strip=True)
location = location.get_text(strip=True)
job_id = html["data-jobid"]
link = f"https://stackoverflow.com/jobs/{job_id}"
# link = "https://stackoverflow.com/" + html.find("h2",{ "class" : "mb4" }).find("a")["href"]
return {"title": title, "company" : company , "location": location, "link" : link}
def extract_jobs(last_page):
jobs= []
for page in range(last_page):
print(f"scrapping SO {page+1}")
res = requests.get(f"{URL}&pg={page+1}")
html = BeautifulSoup(res.text,"html.parser")
job_cards = html.find_all("div",{"class" :"-job"})
for job_card in job_cards:
job = extract_job(job_card)
jobs.append(job)
return jobs
def get_jobs():
last_page = get_last_page()
jobs = extract_jobs(last_page)
return jobs