Android爬虫Jsoup爬取某招聘网站数据并保存为exceL文件

说明此项目仅用于学习目的,勿做其他用途。

此项目主要用到了android Jsoup和jxl框架

添加项目依赖,主要是jxl和jsoup:

dependencies {
    implementation 'org.jsoup:jsoup:1.9.2'
    implementation fileTree(dir: 'libs', include: ['*.jar'])
    implementation 'com.hynnet:jxl:2.6.12.1'
}

爬取数据代码:

private void parseWEBHtml() {
        /**
         * 1.公司名称标签:gsmc
         * 2.薪水标签:  zwyx
         * 3.岗位职责标签:  newlist_deatil_last
         */
        try {
            final Document doc = Jsoup.connect(url+position).get();
            final Elements eJob = doc.getElementsByClass("zwmc");
            final Elements eCompany = doc.getElementsByClass("gsmc");
            final Elements eSalary = doc.getElementsByClass("zwyx");
           final Elements eRes = doc.getElementsByClass("newlist_deatil_last");
            Log.d("jxd","aaa : "+eCompany.text());
           for(int i = 0; i<eCompany.size()&&i<eSalary.size()&&i<eRes.size();i++){
               ZData z = new ZData();
               z.setJobs(eJob.get(i+1).text().trim());
               z.setCompanyName(eCompany.get(i+1).text().trim());
               z.setSalary(eSalary.get(i+1).text().trim());
               z.setResponsibilities(eRes.get(i).text().trim());
               zArrayList.add(z);
           }
           if(position == 5){
               runOnUiThread(new Runnable() {
                   @Override
                   public void run() {
                       /**
                        * 网页标题  doc.title()
                        * 网页内容  doc.body().text()
                        */
                       button.setEnabled(true);
                       textView.setVisibility(View.GONE);
                       ZAdapter zAdapter = new ZAdapter(MainActivity.this,zArrayList);
                       listView.setAdapter(zAdapter);

//                    textView.setText(elements.text());
                   }
               });
           }


        } catch (IOException e) {
            e.printStackTrace();
        }
    }

将获取到的数据写入excel:

 /**
     * 对象数据写入到Excel
     */
    public static void writeExcel(String path, String excelFileName, ArrayList<ZBean> stuList ) {
        WritableWorkbook book = null;
        try {
            // 打开文件
            book = Workbook.createWorkbook(new File(path));
            // 生成名为"学生"的工作表,参数0表示这是第一页
            WritableSheet sheet = book.createSheet(excelFileName, 0);

            if(stuList!=null && !stuList.isEmpty()){
                for(int i=0; i<stuList.size(); i++){
                    sheet.addCell(new Label(0, i, stuList.get(i).getCompanyName()));
                    sheet.addCell(new Label(1, i, stuList.get(i).getJobs()));
                    sheet.addCell(new Label(2, i, stuList.get(i).getSalary()));
                    sheet.addCell(new Label(3, i, stuList.get(i).getResponsibilities()));
                }
            }

            // 写入数据并关闭文件
            book.write();
        } catch (Exception e) {
            System.out.println(e);
        }finally{
            if(book!=null){
                try {
                    book.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }

    }


项目截图如下:


生成excel图如下:



源码地址:https://github.com/jingxiongdi/Crawler

猜你喜欢

转载自blog.csdn.net/u012539700/article/details/79998009