//创建URL,并创建请求
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
//携带cookie
request.CookieContainer = CookieContainer;
//发送请求,并获取HTML
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream stream = response.GetResponseStream();
//读取流
StreamReader streamreader = new StreamReader(stream, Encoding.GetEncoding("gb2312"));
string html = streamreader.ReadToEnd();
streamreader.Close();
response.Close();
if (mces == null || mces.Count <= 0) return;
//抓取网页的所有数据
foreach (Match m in mces)
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
//携带cookie
request.CookieContainer = CookieContainer;
//发送请求,并获取HTML
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream stream = response.GetResponseStream();
//读取流
StreamReader streamreader = new StreamReader(stream, Encoding.GetEncoding("gb2312"));
string html = streamreader.ReadToEnd();
streamreader.Close();
response.Close();
if (string.IsNullOrEmpty(html)) return;
接下来就可以做各种信息的提取了,正则表达式.......该不骤是针对静态网页的
//匹配分组数据
string regex = "div class=\"mls\">.*href=\"(?<url>[^\"]*)\">(?<title>[^<]*介绍)</a>";
MatchCollection mces = RegexUtil.GetGroupByRegex(html, regex);if (mces == null || mces.Count <= 0) return;
//抓取网页的所有数据
foreach (Match m in mces)
{