C# 爬取图片(非原图,仅供学习使用)

不是原图!不是原图!不是原图!!!

逻辑都是一样的,先请求要爬取的类型页面,这里我只爬了4k动漫的,根据需求调整,请求了之后可以看到页面上有很多图片(缩略图),点进去可以发现还有更清楚的(我们要下载的图片),然后就是批量操作,每个页面—>每个页面上的每个图片。

代码:

        private static HttpHelper helper = null;
        private static List<string> imginfo_paths = null;
        private static List<string> imgdown_paths = null;
        //private static string page_index = "/4kdongman/";
        private static string page_index = "/4kdongman/index_19.html";
        private static Regex regex = null;
        private const string netbian = "https://pic.netbian.com";
        static void Main(string[] args)
        {
            helper = new HttpHelper();
            string url = string.Empty;
            string result = string.Empty;
            string filepath = string.Empty;
            while (!string.IsNullOrEmpty(page_index))
            {
                url = netbian + page_index;
                result = helper.GetAndGetHtml(url, null, null, false, Encoding.GetEncoding("GB2312"));
                regex = new Regex("<a href=\"(?<NextPage>[^\"]*)\">下一页</a>");
                page_index = regex.Match(result).Groups["NextPage"].Value;
                regex = new Regex("<li><a href=\"(?<Url>.*?\\.html)\" target=\"_blank\">");
                MatchCollection mc = regex.Matches(result);
                foreach (Match m in mc)
                {
                    try
                    {
                        url = netbian + m.Groups["Url"].Value;
                        result = helper.GetAndGetHtml(url, null, null, false, Encoding.GetEncoding("GB2312"));//获取图片信息页面
                        regex = new Regex("<h1>(?<Name>.*?)</h1>.*?\\s.*?<a href=\"\" id=\"img\"><img src=\"(?<Img>.*?)\"");
                        url = netbian + regex.Match(result).Groups["Img"].Value;//正则取出Img的路径
                        Image image = helper.GetAndGetBitmap(url, null, null, false);
                        filepath = GetPath("netbian", regex.Match(result).Groups["Name"].Value.Replace(":", "-").Replace("?", "") + ".jpg").Replace("/", "").Replace("\\", "");
                        image.Save(filepath);//保存图片
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                        Console.WriteLine("下一頁:" + page_index);
                        Console.WriteLine("圖片信息URL:" + url);
                        Console.WriteLine("圖片名稱:" + regex.Match(result).Groups["Name"].Value);
                        Console.WriteLine("圖源:" + regex.Match(result).Groups["Img"].Value);
                        Console.WriteLine();
                    }
                }
            }
            Console.WriteLine("Down Over!");
            Console.ReadKey();
        }
        public static string GetPath(string folder, string strFileName)
        {
            //改變保存目錄
            string uploadPath = Environment.CurrentDirectory;
            uploadPath = uploadPath.Substring(0, uploadPath.IndexOf("bin")) + folder + "\\" + strFileName;//控制台程序和Windows應用程序
            return uploadPath;
        }
    

 有2302个,没有图源的过滤了。

                 

 大概就是这样子,程序并没有用线程跑,所以网速慢的话可能要下比较久(我下了一个下午)

猜你喜欢

转载自blog.csdn.net/qq_51502150/article/details/125622829