크롤링코드
import requests
from bs4 import BeautifulSoup
from pymongo import MongoClient
client = MongoClient('localhost', 27017)
db = client.dbsparta
def get_urls():
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get('https://movie.naver.com/movie/sdb/rank/rpeople.nhn', headers=headers)
soup = BeautifulSoup(data.text, 'html.parser')
trs = soup.select('#old_content > table > tbody > tr')
urls = []
for tr in trs:
a = tr.select_one('td.title > a')
if a is not None:
base_url = 'https://movie.naver.com/'
url = base_url + a['href']
urls.append(url)
return urls
def insert_star(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get(url, headers=headers)
soup = BeautifulSoup(data.text, 'html.parser')
name = soup.select_one('#content > div.article > div.mv_info_area > div.mv_info.character > h3 > a').text
img_url = soup.select_one('#content > div.article > div.mv_info_area > div.poster > img')['src']
recent_work = soup.select_one(
'#content > div.article > div.mv_info_area > div.mv_info.character > dl > dd > a:nth-child(1)').text
doc = {
'name': name,
'img_url': img_url,
'recent': recent_work,
'url': url,
'like': 0
}
db.mystar.insert_one(doc)
print('완료!', name)
def insert_all():
db.mystar.drop() # mystar 콜렉션을 모두 지워줍니다.
urls = get_urls()
for url in urls:
insert_star(url)
insert_all()
원격접속하기
git bash -> $ ssh -i '페스워드위치' ubuntu@서버 주소
명령어
mkdir'폴더이름':'폴더이름'로된 폴더를 만들기
ls:내위치에 무언가있다
cd/폴더이름:'폴더이름'으로된 폴더에 들어가다
cd ..:폴더에서 나가다
컴퓨터 시간때 세팅
sudo ln -sf /usr/share/zoneinfo/Asia/Seoul /etc/localtime
파이썬3를 파이선으로세팅
sudo update-alternatives --install /usr/bin/python python /usr/bin/python3 10
pip3를 pip로 세팅
sudo apt-get update
sudo apt-get install -y python3-pip
sudo update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
mongoDB설치
wget -qO - https://www.mongodb.org/static/pgp/server-4.2.asc | sudo apt-key add -
echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu bionic/mongodb-org/4.2 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-4.2.list
sudo apt-get update
sudo apt-get install -y mongodb-org
실행
sudo service mongod start
mongo쳐서 >나오면 성공
use admin;
db.createUser({user: "test", pwd: "test", roles:["root"]});
exit
sudo service mongod restart
sudo vi /etc/mongod.conf
i
:wq
sudo service mongod restart
sudo iptables -t nat -A PREROUTING -i eth0 -p tcp --dport 80 -j REDIRECT --to-port 5000
pip install 패키지이름:파이썬의 패키지를 깔아주는것
꺼도 사용하게끔 하기
파일에 들어와서 nohup python app.py &
끄는법 ps -ef | grep 'app.py'
| =shift+\
강제종료
kill -9 포트번호
코드
<meta property="og:title" content="내 사이트의 제목" />
페이스북 초기화 : https://developers.facebook.com/tools/debug/
카카오톡 초기화 : https://developers.facebook.com/tools/debug/