使用HtmlAgilityPack抓取html标签属性内容

版权声明:博客内容仅代表个人观点,如发现阐述有误,麻烦指正,谢谢! https://blog.csdn.net/heyangyi_19940703/article/details/78352378

第一步:安装HtmlAgilityPack


简单抓取一些想要的信息:

        static void Main(string[] args)
        {
            HtmlWeb webClient = new HtmlWeb();
            HtmlDocument doc = webClient.Load("http://www.iqiyi.com/v_19rrdze0xk.html#vfrm=24-9-0-1");

            #region [获取 head meta property(键) content(值)]
            HtmlNodeCollection meat_property_List = doc.DocumentNode.SelectNodes(".//meta[@property]");
            if (meat_property_List != null)
            {
                Console.WriteLine();
                Console.WriteLine("[获取 head meta property(键) content(值)]");
                foreach (HtmlNode meat_property in meat_property_List)
                {
                    HtmlAttribute property_att = meat_property.Attributes["property"];
                    HtmlAttribute content_att = meat_property.Attributes["content"];
                    Console.WriteLine(string.Format("{0}\t:\t{1}", property_att.Value, content_att.Value));
                }
            }
            #endregion

            #region [获取 head meta name(键) content(值)]
            HtmlNodeCollection meat_name_List = doc.DocumentNode.SelectNodes(".//meta[@name]");
            if (meat_name_List != null)
            {
                Console.WriteLine();
                Console.WriteLine("[获取 head meta name(键) content(值)]");
                foreach (var meat_name in meat_name_List)
                {
                    HtmlAttribute name_att = meat_name.Attributes["name"];
                    HtmlAttribute content_att = meat_name.Attributes["content"];
                    Console.WriteLine(string.Format("{0}\t:\t{1}", name_att.Value, content_att.Value));
                }
            }
            #endregion

            #region [获取 title]
            Console.WriteLine();
            Console.WriteLine("[获取 title]");
            var titleObj = doc.DocumentNode.SelectSingleNode("//title");
            if (titleObj != null)
            {
                Console.WriteLine("title:" + titleObj.InnerText);
            }
            #endregion

            #region [获取 首个 img src]
            Console.WriteLine();
            Console.WriteLine("[获取 首个 img src]");
            var imgObj = doc.DocumentNode.SelectSingleNode("//img");
            if (imgObj != null)
            {
                string src = imgObj.Attributes["src"].Value;
                Console.WriteLine("img src:" + src);
            }
            #endregion

            Console.ReadKey();
        }



猜你喜欢

转载自blog.csdn.net/heyangyi_19940703/article/details/78352378
今日推荐