jsoup爬取图片

jsoup爬取图片
1.JsoupImgServlet.java

package com.ld.jsoup.servlet;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.*;
import java.net.URL;
import java.net.URLConnection;

public class JsoupImgServlet extends HttpServlet {
    @Override
    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        //设置编码格式
        request.setCharacterEncoding("UTF-8");
        response.setContentType("text/html;charset=UTF-8");
        //获取PrintWriter对象设置响应文本
        PrintWriter out = response.getWriter();
        String url = "http://699pic.com/tupian/chuntian.html";
//通过地址获取document
        Document document = Jsoup.connect(url).get();
        //document的select方法获取元素集合
        Elements elements = document.select("img.lazy");
        for(int i=0;i<elements.size();i++){
            //System.out.println(element.attr("src"));
            //通过元素的attr获取某个属性的值 src
            String imgURL = elements.get(i).attr("data-original");
            //创建一个连接对象
            URL urlConn = new URL(imgURL);
            //打开网址连接
            URLConnection connection = urlConn.openConnection();
            //获取连接中的数据流
            InputStream is = connection.getInputStream();
            //String imageName = imgURL.substring(imgURL.lastIndexOf("/") + 1,imgURL.length());
            //设置输出流
            OutputStream os = new FileOutputStream(new File("E:/image/image"+i+".jpg"));
            //将输入流中的数据输出到输出流中
            byte[] b = new byte[1024];
            int count = 0;
            while((count=is.read(b))!=-1){
                os.write(b,0,count);
            }
        }

        out.print("success");
        out.close();
    }


    @Override
    protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
        doGet(req,resp);
    }
}

2.IndexAjax.js

function ajaxRequest2() {
    $.ajax({
        "url":"/src",
        "type":"GET",
        "dataType":"text",
        "success":ifSuccess,
        "error":function(){
            alert("请求错误!");
        }
    });
}

function ifSuccess(data){
    if(data=="success"){
        alert("爬取成功!");
    }else{
        alert("爬取失败!");
    }

}

3.Web.xml

  <servlet>
    <servlet-name>JsoupImgServlet</servlet-name>
    <servlet-class>com.ld.jsoup.servlet.JsoupImgServlet</servlet-class>
  </servlet>

  <servlet-mapping>
    <servlet-name>JsoupImgServlet</servlet-name>
    <url-pattern>/src</url-pattern>
  </servlet-mapping>

根据serverlet代码之中的文件路径在E盘新建文件夹image放图片,最终爬取的图片会放入其中

猜你喜欢

转载自blog.csdn.net/weixin_43820992/article/details/88709800