Node.js에서 axios와 cheerio를 사용하여 간단한 크롤러를 만들어 보려고 합니다.
npm init -y
npm install axios cheerio
npm install iconv-lite
const axios = require('axios');
const cheerio = require('cheerio');
const iconv = require('iconv-lite');
async function crawl() {
try {
const url = 'https://news.naver.com/main/list.naver?mode=LSD&mid=sec&sid1=100';
const { data } = await axios.get(url, { responseType: 'arraybuffer' });
const decodedData = iconv.decode(data, 'EUC-KR');
const $ = cheerio.load(decodedData);
const articles = [];
// <ul> 태그 안에 있는 <li>들을 찾음 (클래스 없이 태그만 사용)
$('ul li').each((i, element) => {
const title = $(element).find('a').text().trim();
const link = $(element).find('a').attr('href');
if (title && link) {
articles.push({ title, link });
}
});
console.log(articles);
} catch (error) {
console.error('크롤링 중 오류 발생:', error);
}
}
crawl();
node crawl.js
[
{ title: '엔터', link: 'https://entertain.naver.com/home' },
{ title: '스포츠', link: 'https://sports.news.naver.com' },
{ title: '날씨', link: 'https://weather.naver.com' },
{ title: '프리미엄', link: 'https://contents.premium.naver.com' },
{ title: '언론사별', link: '/' },
{ title: '정치', link: '/section/100' },
{ title: '경제', link: '/section/101' },
{ title: '사회', link: '/section/102' },
{ title: '생활/문화', link: '/section/103' },
{ title: 'IT/과학', link: '/section/105' },
{ title: '세계', link: '/section/104' },
{
title: '랭킹',
link: '/main/ranking/popularDay.naver?mid=etc&sid1=111'
},
{ title: '신문보기', link: '/newspaper/home' },
{ title: '오피니언', link: '/opinion/home' },
{ title: 'TV', link: '/main/tv/index.naver?mid=tvh' },
{ title: '팩트체크', link: '/factcheck/main' },
{ title: '알고리즘 안내', link: 'https://media.naver.com/algorithm' },
{
title: '정정보도 모음',
link: '/main/ombudsman/revisionArticleList.naver?mid=omb'
},
... more items
]