[C#]웹 크롤링 HtmlAgilityPack

해내면 그만!XX·2023년 2월 7일
using HtmlAgilityPack;

namespace WebCrawler
{
    class Program
    {
        static void Main (string[] args)
        {

            SrtartCrowlerAsync();

            Console.ReadLine();

        }

        private static async Task SrtartCrowlerAsync()
        {
            var url = "http://";
            var httpClient = new HttpClient();
            var html = await httpClient.GetStringAsync(url);

            var htmlDocument = new HtmlDocument();
            htmlDocument.LoadHtml(html);

            var divs =  htmlDocument.DocumentNode.Descendants("div")
                .Where(node => node.GetAttributeValue("class", "")
                .Equals("news_main_skin8 news_main_skin8_3 news_main_skin8_3_2 news_main_skin8_b")).ToList();

            var news = new List<News>();

            foreach (var div in divs)
            {
                var nws = new News
                {
                    Title = div?.Descendants("dt")?.FirstOrDefault()?.InnerText,
                    Image = div?.Descendants("img")?.FirstOrDefault()?.ChildAttributes("src")?.FirstOrDefault()?.Value,
                    Contents = div?.Descendants("dd").Where(node => node.GetAttributeValue("class", "").Equals("body")).FirstOrDefault().InnerText,
                    Link = div?.Descendants("a")?.FirstOrDefault()?.ChildAttributes("href")?.FirstOrDefault()?.Value
                };
                
                news.Add(nws);
            }
        }
    }

    public class News
    {
        public string Title { get; set; }
        public string Image { get; set; }
        public string Contents { get; set; }
        public string Link { get; set; }
    }
}

콘솔앱에서는 데이터를 잘 가져왔는데 c# xamarin 에서는 데이터가 깨져서 가져와서 수정하였다. 잘됨

            var httpClient = new WebClient();
            var html = httpClient.DownloadString(url);

참조
https://www.youtube.com/watch?v=oeuvL1_5UIQ

0개의 댓글