爬取招聘信息放入文件中

爬取招聘信息放入文件中
1.JobToFile.java

package com.ld.jsoup.servlet;

import com.ld.jsoup.beans.Job;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class JobToFileServlet extends HttpServlet {
    @Override
    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

        request.setCharacterEncoding("UTF-8");
        response.setContentType("java/html;charset=UTF-8");
        String contextPath = request.getContextPath();
        PrintWriter out = response.getWriter();
        ExecutorService executorService = Executors.newFixedThreadPool(3);
        for (int i=1;i<=2;i++){
            final String url="https://search.51job.com/list/010000,000000,0000,00,9,99,java,2,"
                    + i
                    + ".html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare=";            //connect用来访问网站 get获取document文件
            //引入jsoup,在scoup.xml
            try {
                final Document document = Jsoup.connect(url).get();
                executorService.execute(new Runnable() {
                    public void run() {
                        //根据选择器获取对应的元素集合
                        Elements elements = document.select("p.t1 span a");
                        for (Element element : elements) {
                            Connection absUrlConn = Jsoup.connect(element.absUrl("href"));
                            String absUrl = element.absUrl("href");
                            Document doc = null;
                            Job j = new Job();
                            try {
                                doc = absUrlConn.get();
                                //doc = (Document) Jsoup.connect(absUrl).get();
                                String jobDes = doc.select("div.bmsg.job_msg.inbox").text();
                                //创建一个File文件对象
                                File file=new File("E:"+ File.separator+"job.txt");
                                //创建字符输出流
                                OutputStreamWriter osw=new OutputStreamWriter(new FileOutputStream(file,true),"UTF-8");
                                //用输出流的writer方法将工作描述的内容写出
                                osw.write(jobDes);
                                osw.write("\r\n");
                                //刷新并关闭输出流
                                osw.flush();
                                osw.close();

                            } catch (IOException e) {
                                continue;
                            }
                        }
                    }
                });
            }
            catch (IOException el) {
                el.printStackTrace();
            }}
        out.print("success2");
    }
    @Override
    protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
        super.doPost(req, resp);
    }
}

2.indexAjax.js

function ajaxRequest5() {
    $.ajax({
        "url":"/JobToFile",
        "type":"GET",
        "datType":"text",             //返回数据
        "success":ifsuccess,        //成功后执行的方法
        "error":function () {
            alert("请求错误!");
        }
    });
}

function ifsuccess(data) {
    if(data == "success")
        alert("爬取成功!")
    else
        alert("爬取失败!");
}

3.Web.xml

<servlet>
    <servlet-name>JobToFileServlet</servlet-name>
    <servlet-class>com.ld.jsoup.servlet.JobToFileServlet</servlet-class>
  </servlet>
  <servlet-mapping>
    <servlet-name>JobToFileServlet</servlet-name>
    <url-pattern>/JobToFile</url-pattern>
  </servlet-mapping>

猜你喜欢

转载自blog.csdn.net/weixin_43820992/article/details/88710096