jsoup爬取图片
1.JsoupImgServlet.java
package com.ld.jsoup.servlet;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.*;
import java.net.URL;
import java.net.URLConnection;
public class JsoupImgServlet extends HttpServlet {
@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
//设置编码格式
request.setCharacterEncoding("UTF-8");
response.setContentType("text/html;charset=UTF-8");
//获取PrintWriter对象设置响应文本
PrintWriter out = response.getWriter();
String url = "http://699pic.com/tupian/chuntian.html";
//通过地址获取document
Document document = Jsoup.connect(url).get();
//document的select方法获取元素集合
Elements elements = document.select("img.lazy");
for(int i=0;i<elements.size();i++){
//System.out.println(element.attr("src"));
//通过元素的attr获取某个属性的值 src
String imgURL = elements.get(i).attr("data-original");
//创建一个连接对象
URL urlConn = new URL(imgURL);
//打开网址连接
URLConnection connection = urlConn.openConnection();
//获取连接中的数据流
InputStream is = connection.getInputStream();
//String imageName = imgURL.substring(imgURL.lastIndexOf("/") + 1,imgURL.length());
//设置输出流
OutputStream os = new FileOutputStream(new File("E:/image/image"+i+".jpg"));
//将输入流中的数据输出到输出流中
byte[] b = new byte[1024];
int count = 0;
while((count=is.read(b))!=-1){
os.write(b,0,count);
}
}
out.print("success");
out.close();
}
@Override
protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
doGet(req,resp);
}
}
2.IndexAjax.js
function ajaxRequest2() {
$.ajax({
"url":"/src",
"type":"GET",
"dataType":"text",
"success":ifSuccess,
"error":function(){
alert("请求错误!");
}
});
}
function ifSuccess(data){
if(data=="success"){
alert("爬取成功!");
}else{
alert("爬取失败!");
}
}
3.Web.xml
<servlet>
<servlet-name>JsoupImgServlet</servlet-name>
<servlet-class>com.ld.jsoup.servlet.JsoupImgServlet</servlet-class>
</servlet>
<servlet-mapping>
<servlet-name>JsoupImgServlet</servlet-name>
<url-pattern>/src</url-pattern>
</servlet-mapping>
根据serverlet代码之中的文件路径在E盘新建文件夹image放图片,最终爬取的图片会放入其中