<웹 크롤링과 플라스크로 웹에 올리기 - 노마드 코더>
from flask import Flask
app = Flask("SuperScrapper")
app.run("0.0.0.0")
from flask import Flask
app = Flask("FlaskClass")
@app.route("/")
def home():
return "hello Welcome"
@app.route("/contact") #여기로 이동할 수 있음
def contact():
return "contact me!"
app.run("0.0.0.0") # 레플잇 용
(키워드 인자로 핸들하기)
from flask import Flask
app = Flask("FlaskClass")
@app.route("/")
def home():
return "hello Welcome"
@app.route("/<username>")
def contact(username):
return f"Hello, your name is {username}"
app.run("0.0.0.0")
(사용자에게 웹사이트를 보여주는 것)
먼저 html 파일을 templates 폴더에 저장
(job.html)
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Job Search</title>
</head>
<body>
<h1>Job Search</h1>
<form action="">
<input placeholder="What job do you wand?" required />
<button>Search</button>
</form>
</body>
</html>
이후 render 임포트 하여 파일 불러오면 떠있는 창에서 볼 수 있음
(main.py)
from flask import Flask, render_template
app = Flask("FlaskClass")
@app.route("/")
def home():
return render_template("job.html")
app.run("0.0.0.0")
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Job Searcht</title>
</head>
<body>
<h1>Search Result</h1>
<h3>You are looking for {{searchingBy}}</h3>
</body>
</html>
job.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Job Search</title>
</head>
<body>
<h1>Job Search</h1>
<form action="/report" method="get">
<input placeholder="What job do you want?" required name = "word" />
<button>Search</button>
</form>
</body>
</html>
from flask import Flask, render_template, request
app = Flask("FlaskClass")
@app.route("/")
def home():
return render_template("job.html")
@app.route("/report")
def report():
word = request.args.get('word')
if word:
word = word.lower()
else:
return redirect("/")
return render_template("report.html", searchingBy = word)
app.run("0.0.0.0")
import requests
from bs4 import BeautifulSoup
def get_last_page(url):
result = requests.get(url)
soup = BeautifulSoup(result.text, "html.parser")
pages = soup.find("div", {"class": "s-pagination"}).find_all("a")
last_page = pages[-2].get_text(strip=True)
return int(last_page)
def extract_job(html):
title = html.find("h2", {"class" : "mb4"}).find('a')["title"]
company, location = html.find("h3", {"class" : "mb4"}).find_all("span", recursive = False)
company = company.get_text(strip = True)
location = location.get_text(strip = True).strip('-')
job_id = html["data-jobid"]
return {"title" : title, 'company' : company, 'location' : location, 'apply_link' : f"https://stackoverflow.com/jobs/{job_id}"}
def extract_jobs(last_page, url):
jobs = []
for page in range(last_page):
result = requests.get(f"{url}&pg=page+1")
soup=BeautifulSoup(result.text, "html.parser")
results = soup.find_all("div", {"class" : "-job"})
for result in results:
job = extract_job(result)
jobs.append(job)
return jobs
def get_jobs(word):
url = f"https://stackoverflow.com/jobs?q={word}&sort=i"
last_page = get_last_page(url)
jobs = extract_jobs(last_page, url)
return jobs
이 상태에서 홈페이지에 아무 직업이나 검색하면 시간이 조금 걸리긴 하지만 모두 띄워지기는 함.(양식은 전과 동일)
from flask import Flask, render_template, request
from scrapper import get_jobs
app = Flask("FlaskClass")
db = {}
@app.route("/")
def home():
return render_template("job.html")
@app.route("/report")
def report():
word = request.args.get('word')
if word:
word = word.lower()
fromDb = db.get(word)
if fromDb:
jobs = fromDb
else:
jobs = get_jobs(word)
db[word] = jobs
else:
return redirect("/")
return render_template("report.html", searchingBy = word)
app.run("0.0.0.0")
여기까지 하면 다시 검색해도 금방 나옴
(db에 저장되어 있다는 뜻.) ; route 밖에서 작동해야 함
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Job Searcht</title>
</head>
<body>
<h1>Search Result</h1>
<h3>Found {{resultsNumber}} results for : {{searchingBy}}</h3>
</body>
</html>
main.py
from flask import Flask, render_template, request
from scrapper import get_jobs
app = Flask("FlaskClass")
db = {}
@app.route("/")
def home():
return render_template("job.html")
@app.route("/report")
def report():
word = request.args.get('word')
if word:
word = word.lower()
fromDb = db.get(word)
if fromDb:
jobs = fromDb
else:
jobs = get_jobs(word)
db[word] = jobs
else:
return redirect("/")
return render_template("report.html", searchingBy = word, resultsNumber = len(jobs))
app.run("0.0.0.0")
report.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Job Searcht</title>
<style>
section {
display : grid;
gap : 20px;
grid-template-columns : repeat(4, 1fr);
}
</style>
</head>
<body>
<h1>Search Result</h1>
<h3>Found {{resultsNumber}} results for : {{searchingBy}}</h3>
<section>
<h4>title</h4>
<h4>company</h4>
<h4>location</h4>
<h4>link</h4>
{% for job in jobs %}
<span>{{job.title}}</span>
<span>{{job.company}}</span>
<span>{{job.location}}</span>
<a href="{{job.link}}" target = "_blank">apply</a>
{% endfor %}
</section>
</body>
</html>
main.py
from flask import Flask, render_template, request
from scrapper import get_jobs
app = Flask("FlaskClass")
db = {}
@app.route("/")
def home():
return render_template("job.html")
@app.route("/report")
def report():
word = request.args.get('word')
if word:
word = word.lower()
exisitingJobs = db.get(word)
if exisitingJobs:
jobs = exisitingJobs
else:
jobs = get_jobs(word)
db[word] = jobs
else:
return redirect("/")
return render_template("report.html", searchingBy = word, resultsNumber = len(jobs), jobs = jobs)
app.run("0.0.0.0")
report.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Job Searcht</title>
<style>
section {
display : grid;
gap : 20px;
grid-template-columns : repeat(4, 1fr);
}
</style>
</head>
<body>
<h1>Search Result</h1>
<h3>Found {{resultsNumber}} results for : {{searchingBy}}</h3>
<a href="/export?word={{searchingBy}}">Export to CSV</a>
<section>
<h4>title</h4>
<h4>company</h4>
<h4>location</h4>
<h4>link</h4>
{% for job in jobs %}
<span>{{job.title}}</span>
<span>{{job.company}}</span>
<span>{{job.location}}</span>
<a href="{{job.link}}" target = "_blank">apply</a>
{% endfor %}
</section>
</body>
</html>
main.py(예외 처리 나옴)
from flask import Flask, render_template, request
from scrapper import get_jobs
app = Flask("FlaskClass")
db = {}
@app.route("/")
def home():
return render_template("job.html")
@app.route("/report")
def report():
word = request.args.get('word')
if word:
word = word.lower()
exisitingJobs = db.get(word)
if exisitingJobs:
jobs = exisitingJobs
else:
jobs = get_jobs(word)
db[word] = jobs
else:
return redirect("/")
return render_template("report.html", searchingBy = word, resultsNumber = len(jobs), jobs = jobs)
@app.route("/export")
def export():
try: #예외처리 활용하기
word = request.args.get('word')
if not word:
raise Exception()
word = word.lower()
jobs = db.get(word)
if not jobs:
raise Exception()
return f"Generate CSV for {word}"
except:
return redirect("/")
app.run("0.0.0.0")
이 단계까지 하면 하이퍼링크 눌렀을 때 어디로 이동한다 정도는 알 수 있음.
import csv
def save_to_file(jobs):
file = open("jobs.csv", mode = "w")
writer = csv.writer(file)
writer .writerow(["title", "company", "location", "link"])
for job in jobs:
writer.writerow(list(job.values()))
return
main.py
from flask import Flask, render_template, request, redirect, send_file
from scrapper import get_jobs
from exporter import save_to_file
app = Flask("FlaskClass")
db = {}
@app.route("/")
def home():
return render_template("job.html")
@app.route("/report")
def report():
word = request.args.get('word')
if word:
word = word.lower()
exisitingJobs = db.get(word)
if exisitingJobs:
jobs = exisitingJobs
else:
jobs = get_jobs(word)
db[word] = jobs
else:
return redirect("/")
return render_template("report.html", searchingBy = word, resultsNumber = len(jobs), jobs = jobs)
@app.route("/export")
def export():
try:
word = request.args.get('word')
if not word:
raise Exception()
word = word.lower()
jobs = db.get(word)
if not jobs:
raise Exception()
save_to_file(jobs)
return send_file("jobs.csv") # 임포트
except:
return redirect("/")
app.run("0.0.0.0")
어려웠다. Flask에 쓰임새에 대해 간접적으로 확인할 수 있었다. 추후 사용하게 될 경우 참고하자.