import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;import java.util.regex.Matcher;
import java.util.regex.Pattern;
class getHtml2 {
private int num;
public int getNum() {
return num;
}
public void setNum(int num) {
this.num = num;
}
public void getPicture(String url) {
URL httpUrl;
BufferedInputStream in;
FileOutputStream out;
try {
System.out.println("==========抓取网络图片 Start==========");
String PictrueName = url.substring(url.lastIndexOf("/"));
String savePath = "//sdcard//Download//browser";
File file = new File(savePath);
if (!file.exists() && !file.isDirectory()) {
file.mkdir();
}
httpUrl = new URL(url);
in = new BufferedInputStream(httpUrl.openStream());
out = new FileOutputStream(new File(file + PictrueName));
byte[] buffer = new byte[1024 * 1024];
int len;
while ((len = in.read(buffer)) != -1) {
out.write(buffer, 0, len);
}
in.close();
out.close();
System.out.println("==========抓取网络图片 End==========");
num++;
} catch (MalformedURLException e) { e.printStackTrace();}
catch (Exception e) {e.printStackTrace(); } }
public String getHtmlCode(String url) throws Exception {
String content = "";
URL httpUrl = new URL(url);
BufferedReader buf = new BufferedReader(new InputStreamReader(httpUrl.openStream()));
String str;
while ((str = buf.readLine()) != null) {
content += str + "\n"; }
// System.out.println(new String(content.getBytes("GBK"),"ISO-8859-1"));
return new String(content.getBytes("utf-8"), "UTF-8"); }
public void get(String url) throws Exception {
String searchImgReg = "(\\w*\\.)(\\w+\\.)(\\w*)/(\\d*/)*(\\w+)!(\\w+\\.jpg)";
String content = this.getHtmlCode(url);
System.out.println(content);
Pattern pattern = Pattern.compile(searchImgReg);
Matcher matcher = pattern.matcher(content);
while (matcher.find()) {
this.getPicture("http://"+ matcher.group(0));
System.out.println(matcher.group(0)); }
}
public static void main(String[] args) throws Exception {
String url = "https://m.woyaogexing.com/shouji/dongman/2018/4838.html";
getHtml2 ge = new getHtml2();
ge.get(url);
System.out.println("抓取完毕,本次抓取到" + ge.getNum() + "张图片"); }}
java爬虫demo
猜你喜欢
转载自blog.csdn.net/qq_40955914/article/details/80978093
今日推荐
周排行