啥都不说,直接上代码,粘上来格式有点乱,你们用的时候,直接复制就行了,但是图片规则需要自己去搞的
package grab;
import java.awt.AWTException;
import java.awt.Robot;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
import java.net.URLConnection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Service {
public static void main(String[] args) {
long startTime = System.currentTimeMillis(); //获取开始时间
//从第三页开始抓取 到87 页
for (int i = 3; i <= 87; i++) {
System.out.println("执行第" + (i-2) +"次开始");
try {
getDoc("http://www.youzi4.cc/mm/meinv/index_"+ i +".html");
System.out.println("执行第" + (i-2) +"次结束");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
/*java中延时去执行,非线程 , 如果遇见拦截的可以适当延迟执行文件
Robot r;
try {
r = new Robot();
System.out.println( "延时前:"+new Date().toString() );
r.delay( 5000 ); //延时5秒执行
System.out.println( "执行第"+i+"次" + new Date().toString() );
} catch (AWTException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} */
}
long endTime = System.currentTimeMillis();
System.out.println("开始时间为" + startTime + "结束时间为" + endTime + "程序耗时为" + (endTime - startTime)/60000 + "分钟" );
}
public static void getDoc(String urll) throws IOException{
File f = new File("E://imgs");
if(!f.exists()){
f.mkdirs();
}
//以网易为例子
//Document doc = Jsoup.connect("http://www.163.com/").get();
Document doc = Jsoup.connect(urll).get();
//获取后缀为png和jpg的图片的元素集合
Elements pngs = doc.select("img[src~=(?i)\\.(png|jpe?g)]");
for (Element e : pngs) {
String name = e.attr("alt");
String src=e.attr("src");//获取img中的src路径
//获取后缀名
String imageName = src.substring(name.lastIndexOf("/") + 1,src.length());
// String str = "d62f3df6260c4d3d845b546b6aded87b.png?imageView&thumbnail=453y225&quality=85";
//String str = "";
String pattern = "([?][^?]+)$";//截取?号 后的数据,并将其替换
String str1 = imageName.replaceAll(pattern,"");
//连接url
URL url = new URL(src);
URLConnection uri=url.openConnection();
//获取数据流
InputStream is=uri.getInputStream();
//写入数据流 如果文件名异常,会抛出异常的
OutputStream os = new FileOutputStream(new File("E://imgs", name + ".png"));
byte[] buf = new byte[5*1024*1024];
int l=0;
while ((l=is.read(buf)) != -1) {
os.write(buf, 0, l);
}
}
}
}