.NET Image Crawler

I wanted to make a web page image crawler on a whim. I found a lot of information on the Internet, but most of them couldn't understand it, and the knowledge was too shallow. After a lot of hardships, I finally achieved a simple completion!

 

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;

namespace 网页图片爬虫_窗体版
{
public partial class Form1 : ZHSkin.ZHForm
{
/// <summary>
/// 关键词
/// </summary>
private string SavePath = string.Empty;
private Thread thread = null;
public Form1()
{
InitializeComponent();
}
/// <summary>
/// 获取保存路径
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void btn_GetPath_Click(object sender, EventArgs e)
{
FolderBrowserDialog folderBrowser = new FolderBrowserDialog();
if (folderBrowser.ShowDialog() == DialogResult.OK)
{
txt_SavePath.Text = folderBrowser.SelectedPath;
}
}
/// <summary>
/// Start getting
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void btn_Start_Click(object sender, EventArgs e)
{
string KeyWord = txt_KeyWords.Text.Trim();
int pageCount = (int)NUD_PageCount.Value;
SavePath = txt_SavePath.Text;
if (string.IsNullOrEmpty(KeyWord))
{
txt_ShowPath. AppendText("Please enter the keyword to search!" + Environment.NewLine);
return;
}
if (string.IsNullOrEmpty(SavePath))
{
txt_ShowPath.AppendText("Please select the path to save!" + Environment.NewLine);
return;
}
if (!Directory.Exists(SavePath))//Create a file folder if it does not exist
{
txt_ShowPath.AppendText("The input path is incorrect, please check!" + Environment.NewLine);
return;
}
if (!SavePath.EndsWith("\\"))
{
SavePath = SavePath + "\\";
}
btn_Stop. Enabled = true;//Enable stop button
btn_Start.Enabled = false;//Disable start button
txt_ShowPath.Clear();//Clear log
txt_ShowPath.AppendText("Starting download!" + Environment.NewLine);
msg.Text = "Download started...";
//Start the progress bar
ProgressBar.Enabled = true;
//Download using multithreading
thread = new Thread(() =>
{
ProcessDownload(KeyWord);
});
thread.Start(); //Start the download
}
/// <summary>
/// Process the download
/// </summary>
/// <param name="KeyWord"></param>
public void ProcessDownload(string KeyWord)
{
try
{
int pageCount = (int)NUD_PageCount.Value;//下载页数
//循环获取路径
for (int i = 0; i < pageCount; i++)
{
msg.Text = "正在下载第" + (i + 1) + "页,共" + pageCount + "页";
string URL = "https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=" + Uri.EscapeDataString(KeyWord) + "&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&word=" + Uri.EscapeDataString(KeyWord) + "&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&cg=wallpaper&pn=" + (i + 1) * 60 + "&rn=60&gsm=3c&1525422519486=";
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(URL);
using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
if (response.StatusCode == HttpStatusCode.OK)
{
using (Stream stream = response.GetResponseStream())
{
try
{
DownloadPage(stream, i);//下载页面
}
catch (Exception ex)
{
txt_ShowPath.BeginInvoke(new Action(() =>
{
txt_ShowPath.AppendText(ex.Message + Environment.NewLine);
}));
}
}
}
else
{
MessageBox.Show("获取第" + i + "页失败:" + response.StatusCode);
}
}
}
msg.Text = "下载完成!下载" + pageCount + "页,共" + pageCount * 60 + "张图片";
ProgressBar.Value = 0;//The progress bar returns to zero
btn_Stop.Enabled = false;//Disable the stop button
btn_Start.Enabled = true;//Enable the start button
btn_Stop.Text = "Stop";
}
catch (Exception)
{
txt_ShowPath. BeginInvoke(new Action(() =>
{
txt_ShowPath.AppendText("Network connection failed!" + Environment.NewLine);
}));
}
}
/// <summary>
/// Download page
/// </summary>
/// <param name="stream"></param>
private void DownloadPage(Stream stream, int index)
{
using (StreamReader reader = new StreamReader(stream))
{
string json = reader.ReadToEnd();
//txt_ShowPath .AppendText(json);
JObject objRoot = (JObject)JsonConvert.DeserializeObject(json);
JArray imgs = (JArray)objRoot["data"];
txt_ShowPath.BeginInvoke(new Action(() =>
{
txt_ShowPath.AppendText("正在下载第" + (index + 1) + "页!" + Environment.NewLine);
}));
for (int i = 0; i < imgs.Count; i++)
{
JObject img = (JObject)imgs[i];
string objUrl = (string)img["middleURL"];
try
{

DownloadImage(objUrl);//下载
SetTextMessage(100 * i / imgs.Count);
}
catch (Exception ex)
{
txt_ShowPath.BeginInvoke(new Action(() =>
{
txt_ShowPath.AppendText(ex.Message + Environment.NewLine);
}));
}
}
txt_ShowPath.BeginInvoke(new Action(() =>
{
txt_ShowPath.AppendText("第" + (index + 1) + "页下载完成!" + Environment.NewLine);
}));
}
}
/// <summary>
/// 下载图片
/// </summary>
/// <param name="url"></param>
private void DownloadImage(string objUrl)
{
txt_ShowPath.AppendText("正在下载:" + Path.GetFileName(objUrl) + "" + Environment.NewLine);
//URLRefer://This image is downloaded from which page
string destFile = Path.Combine(SavePath, Path.GetFileName(objUrl));//Get the save path
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(objUrl);
request.Referer = "https://image.baidu.com/";//Deceiving the browser
using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
if (response.StatusCode == HttpStatusCode.OK)
{
using (Stream stream = response.GetResponseStream())
using (Stream FStream = new FileStream(destFile, FileMode.Create))
{
stream.CopyTo(FStream);
txt_ShowPath.AppendText("Download successful!" + Environment.NewLine);
}
}
else
{
throw new Exception("下载" + objUrl + "失败,错误码:" + response.StatusCode);
}
}
}
/// <summary>
/// 进度条代理
/// </summary>
/// <param name="ipos"></param>
/// <param name="vinfo"></param>
private delegate void SetPos(int ipos);
/// <summary>
/// 进度条更新函数
/// </summary>
/// <param name="ipos"></param>
/// <param name="vinfo"></param>
private void SetTextMessage(int ipos)
{
if (this.InvokeRequired)
{
SetPos pos = new SetPos(SetTextMessage);
this.Invoke(pos, new object[] { ipos });
}
else
{
this.ProgressBar.Value = Convert.ToInt32(ipos);
}
}
/// <summary>
/// 停止下载
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void btn_Stop_Click(object sender, EventArgs e)
{
if (thread != null)
{
if (btn_Stop.Text == "暂 停")
{

thread.Suspend();//Suspend thread thread
txt_ShowPath.AppendText("Download has been suspended!" + Environment.NewLine);
btn_Stop.Text = "Continue";
}
else if (btn_Stop.Text == "Continue")
{
thread.Resume();//Suspend the thread thread
txt_ShowPath.AppendText("Start downloading!" + Environment.NewLine);
btn_Stop.Text = "Pause";
}
}
}
/// <summary>
/// The form is closed Close all threads when
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void Form1_FormClosed(object sender, FormClosedEventArgs e )
{
if (thread != null)
{
thread.Abort();
}
}
/// <summary>
/// Set the scroll bar
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void txt_ShowPath_TextChanged(object sender, EventArgs e)
{
txt_ShowPath.SelectionStart = txt_ShowPath.Text.Length;
txt_ShowPath.ScrollToCaret();
}
}
}

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325320497&siteId=291194637