golang webdriver 로 크롤링을 진행하다 에러 발생 및 라이브러리 업데이트가 안돼고 있어 chromedp로 라이브러리 변경
package main
import (
"log"
"context"
"time"
"fmt"
"github.com/chromedp/chromedp"
//"github.com/chromedp/cdproto/cdp"
//"github.com/chromedp/cdproto/runtime"
)
func main() {
linklist := getLinkList()
for _, val := range linklist{
getDescription(val)
}
}
func getDescription(url string){
fmt.Println(url)
contextVar, cancelFunc := chromedp.NewContext(
context.Background(),
chromedp.WithLogf(log.Printf),
)
defer cancelFunc()
contextVar, cancelFunc = context.WithTimeout(contextVar, 30*time.Second) // timeout 값을 설정
defer cancelFunc()
contextVar, cancelFunc = chromedp.NewContext(contextVar)
defer cancelFunc()
var strVar string
err := chromedp.Run(contextVar,
chromedp.Navigate("https://www.youtube.com"+url),
chromedp.Click("#primary div#primary-inner div#below ytd-watch-metadata div#above-the-fold div#bottom-row div#description tp-yt-paper-button#expand-sizer", chromedp.ByID ),
chromedp.Text("#primary div#primary-inner div#below ytd-watch-metadata div#above-the-fold div#bottom-row div#description", &strVar,chromedp.ByID ),
//chromedp.Text("#primary div#primary-inner div#below ytd-watch-metadata div#above-the-fold div#bottom-row div#description tp-yt-paper-button#expand-sizer", &attr,chromedp.ByQueryAll ),
)
if err != nil {
panic(err)
}
fmt.Println(strVar)
}
func getLinkList() []string {
contextVar, cancelFunc := chromedp.NewContext(
context.Background(),
chromedp.WithLogf(log.Printf),
)
defer cancelFunc()
contextVar, cancelFunc = context.WithTimeout(contextVar, 300*time.Second) // timeout 값을 설정
defer cancelFunc()
err := chromedp.Run(contextVar,
chromedp.Navigate("https://www.youtube.com/@paik_jongwon/videos"),
)
if err != nil {
panic(err)
}
var oldHeight int
var newHeight int
for {
err = chromedp.Run(contextVar,
chromedp.Evaluate(`window.scrollTo(0,document.querySelector("body ytd-app div#content").clientHeight); document.querySelector("body ytd-app div#content").clientHeight;`, &newHeight),
chromedp.Sleep(700*time.Millisecond),
)
if err != nil {
panic(err)
}
if(oldHeight == newHeight){
break
}
oldHeight = newHeight
}
//var strVar string
//var strTitle string
attr := make([]map[string]string, 0)
//var nodes []cdp.NodeID
err = chromedp.Run(contextVar,
chromedp.AttributesAll("#primary ytd-rich-grid-renderer div#contents ytd-rich-grid-row div#contents ytd-rich-item-renderer #video-title-link", &attr,chromedp.ByQueryAll ),
)
if err != nil {
panic(err)
}
var linklist []string
for _, val := range attr {
linklist = append(linklist, val["href"])
}
fmt.Println(len(linklist))
return linklist
}
getLinkList : 특정유투버의 모든 영상 url 수집
getDescription : 영상의 소개글 수집
아래와 같이 축약된 소개글을 더버기 버튼을 눌러 확장 후 수집
백종원님의 레시피를 수집 할 예정