java动漫爬虫:斗罗大陆1、斗罗大陆2、斗破苍穹

java实现 斗罗大陆漫画 爬虫

直接上代码:《斗罗大陆1》

package com.mdr.ManHua;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.*;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;

public class Dldl1 {
    private Document document;
    private Elements links;
    private HttpURLConnection con;
    private URL url;

    private static String HREF = "https://www.zymk.cn/2/";
    private static Dldl1 dldl1;
    private static List<String> aList= new ArrayList();
    private static List<Integer> bList = new ArrayList();
    private static HashMap<Integer, Integer> map = new HashMap<>();
    private static List<String> imgList= new ArrayList();
    private static List<Integer> FanWai= new ArrayList();
    private static List<String> imgIdx= new ArrayList();

    public static void main(String[] args){
        dldl1 = new Dldl1();
        //执行步骤1
        dldl1.step1(HREF);
        //执行步骤2
        dldl1.step2();
        //执行步骤3
        dldl1.step3();
        //执行步骤4
        dldl1.step4();
        //执行步骤5
        dldl1.step5();
    }

    /**
     * 检验链接是否有效性
     * */
    public Boolean ojbkImg(String src){
        try {
            url = new URL(src);
            con = (HttpURLConnection) url.openConnection();
            int state = con.getResponseCode();
            if(state==200){
                return true;
            }else{
                return false;
            }
        }catch (Exception ex) {
            System.out.print("URL不可用  ");
            System.out.println(src);
        }
        return false;
    }

    /**
     * 图片下载
     * */
    private void download(String urlList,String idx) {
        URL url = null;
        int imageNumber = 0;

        try {
            url = new URL(urlList);
            DataInputStream dataInputStream = new DataInputStream(url.openStream());

            String imageName =  "D:\\Temp\\images\\dldl1\\"+idx+".jpg";

            FileOutputStream fileOutputStream = new FileOutputStream(new File(imageName));
            ByteArrayOutputStream output = new ByteArrayOutputStream();

            byte[] buffer = new byte[1024];
            int length;

            while ((length = dataInputStream.read(buffer)) > 0) {
                output.write(buffer, 0, length);
            }
            byte[] context=output.toByteArray();
            fileOutputStream.write(output.toByteArray());
            dataInputStream.close();
            fileOutputStream.close();
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    /**
     * 黑闸子
     * src:链接
     * i:第几话
     * j:第几页
     * */
    public String 黑闸子(int i,int j){
        System.out.println(i+"=="+j);
        boolean b = true;
        boolean isSX = false;
        //初始化普通拆分版
        String src = "http://mhpic.zymkcdn.com/comic/D%2F%E6%96%97%E7%BD%97%E5%A4%A7%E9%99%86%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9D%2F"+j+".jpg-zymk.middle.webp";
        //判断是否经过番外篇
        if(!isSX){//没有经过番外篇
            if(!dldl1.ojbkImg(src)){
                //拆分+GQ
                src = "http://mhpic.zymkcdn.com/comic/D%2F%E6%96%97%E7%BD%97%E5%A4%A7%E9%99%86%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9DGQ%2F"+j+".jpg-zymk.middle.webp";
                if(!dldl1.ojbkImg(src)){
                    //拆分+GQV
                    src = "http://mhpic.zymkcdn.com/comic/D%2F%E6%96%97%E7%BD%97%E5%A4%A7%E9%99%86%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9DGQV%2F"+j+".jpg-zymk.middle.webp";
                    if(!dldl1.ojbkImg(src)){
                        System.out.print("出错 = "+i+"::"+j+"  ");
                        System.out.println(src);
                        b = false;
                    }
                }
            }
        }
        if(b){
            return src;
        }else{
            return "errer";
        }
    }

    /**
     * 步骤5:下载
     * */
    public void step5(){
        System.out.println("第五阶段开始!");
        System.out.println("番外:"+FanWai.size());
        System.out.println("可用链接"+imgList.size());
        for(int i=0;i<imgList.size();i++){
            System.out.println("正在下载:"+i+" 图 / "+ imgList.size());
            dldl1.download(imgList.get(i),imgIdx.get(i));
        }
        System.out.println("第五阶段完成!");
    }

    /**
     * 步骤4:
     * */
    public void step4(){
        System.out.println("第四阶段开始!");
        String src = "";
        String idx = "";

        for(int i=0;i<bList.size();i++){
            for(int j=1;j<=bList.get(i);j++){
                src = dldl1.黑闸子((i+1),j);
                idx = "第"+(i+1)+"话"+j;

                imgList.add(src);
                imgIdx.add(idx);
            }
        }

        System.out.println("第四阶段完成!");
    }
    /**
     * 步骤3:删除番外篇
     * */
    public void step3(){
        System.out.println("第三阶段开始!");
        System.out.println("刚开始:"+map.size());
        try{
            for (Map.Entry<Integer, Integer> entry : map.entrySet()) {
                int key = entry.getKey();
                int value = entry.getValue();

                if (( key <=637  & key>= 593 & key%2== 1) || key == 590) {
                    System.out.println(value+"::"+key);
                    FanWai.add(map.get(key));
                }else{
                    bList.add(map.get(key));
                }
            }
        }catch (Exception e){}
        for(int i=0;i<bList.size();i++){
            System.out.println((i+1) + "==" + bList.get(i));
        }

        System.out.println("第三阶段完成!");
    }

    /**
     * 步骤2:遍历aList 里面的所有链接,并分析每话有多少张
     * */
    public void step2(){
        System.out.println("第二阶段开始!");

        try{
            for(int i=0;i<aList.size();i++){
//            for(int i=0;i<300;i++){
                System.out.println("正在加载:"+i+" / "+aList.size() +" 页!......");
                document = Jsoup.connect(HREF + aList.get(i)).get();
                String body = document.toString();
                int startStr = body.indexOf("<span class=\"totalPage\">");
                int endStr = body.indexOf("</span> \n" +
                        "      <i class=\"ift-down\"></i>");
                String str = body.substring(startStr+24,endStr);
                //字符串转数字
                int num = Integer.parseInt(str);
                if(i==34){
                    System.out.println("第35章:"+num);
                }
                map.put(i,num);
            }
        }catch (Exception e){}
        System.out.println("第二阶段完成!");

        System.out.println(bList.size());
    }

    /**
     * 步骤一:分析有多少话,并拿到每一话的链接
     * */
    public void step1(String src){
        System.out.println("第一阶段开始!");
        try{
            document = Jsoup.connect(src).get();
            links = document.select("a[href]");
            for (Element link : links) {
                String url = link.attr("href");
                int end = url.indexOf(".");
                if(end!=-1){
                    String sub = url.substring(0,end);
                    if(sub.matches("[0-9]+")){
                        aList.add(link.attr("href"));
                    }
                }
            }
            //aList 倒序
            Collections.reverse(aList);
        }catch(IOException e) {
            e.printStackTrace();
        }
        System.out.println("第一阶段完成!");
    }
}

《斗罗大陆2》:

package com.mdr.ManHua;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.*;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class Dldl2 {

    //缓存每一话的链接
    private List<String> aList;
    //缓存每一话的章节数
    private List<Integer> bList;
    //缓存每一个漫画的链接
    private List<String> imgList = new ArrayList<String>();
    //缓存每一个漫画的章节
    private List<String> imgIdx = new ArrayList<String>();
    //缓存错误的链接
    private ArrayList<String> errOne  = new ArrayList<String>();

    private Document document;
    private Elements links;
    private URL url;
    private  HttpURLConnection con;

    private static String SRC = "https://www.zymk.cn/609/";

    private static Dldl2 test;
    public static void main(String[] args){
        test = new Dldl2();

        //执行步骤1
        test.step1(SRC);
        //执行步骤2
        test.step2();
        //执行步骤3
        test.step3();
        //执行步骤4
        test.step4();
        //执行步骤五
        test.step5();
    }

    /**
     * 图片下载
     * */
    private void download(String urlList,String idx) {
        URL url = null;
        int imageNumber = 0;

        try {
            url = new URL(urlList);
            DataInputStream dataInputStream = new DataInputStream(url.openStream());

            String imageName =  "D:\\Temp\\images\\dldl2\\"+idx+".jpg";

            FileOutputStream fileOutputStream = new FileOutputStream(new File(imageName));
            ByteArrayOutputStream output = new ByteArrayOutputStream();

            byte[] buffer = new byte[1024];
            int length;

            while ((length = dataInputStream.read(buffer)) > 0) {
                output.write(buffer, 0, length);
            }
            byte[] context=output.toByteArray();
            fileOutputStream.write(output.toByteArray());
            dataInputStream.close();
            fileOutputStream.close();
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 检验链接是否有效性
     * */
    public Boolean ojbkImg(String src){
        try {
            url = new URL(src);
            con = (HttpURLConnection) url.openConnection();
            int state = con.getResponseCode();
            if(state==200){
                return true;
            }else{
                return false;
            }
        }catch (Exception ex) {
            System.out.print("URL不可用  ");
            System.out.println(src);
        }
        return false;
    }

    /**
     * 步骤6
     * */
    public void step6(){

    }

    /**
     * 步骤五:开始为imgList里面每一个链接进行下载
     * */
    public void step5(){
//        System.out.println("错误总数:"+errOne.size());
        for(int i=0;i<imgList.size();i++){
            System.out.println("正在下载:"+i+" 图");
            test.download(imgList.get(i),imgIdx.get(i));
        }
    }
    /**
     * 黑闸子
     * src:链接
     * i:第几话
     * j:第几页
     * */
    public String 黑闸子(int i,int j){

        System.out.println(i+"=="+j);
        boolean b = true;
        //初始化条漫版+GQ
        String src = "http://mhpic.zymkcdn.com/comic/J%2F%E7%BB%9D%E4%B8%96%E5%94%90%E9%97%A8%E6%9D%A1%E6%BC%AB%E7%89%88%2F"+i+"%E8%AF%9DGQ%2F"+j+".jpg-zymk.middle.webp";
        if(!test.ojbkImg(src)){
            //普通拆分版
            src = "http://mhpic.zymkcdn.com/comic/J%2F%E7%BB%9D%E4%B8%96%E5%94%90%E9%97%A8%E6%8B%86%E5%88%86%E7%89%88%2F%E7%AC%AC"+i+"%E8%AF%9D%2F"+j+".jpg-zymk.middle.webp";
            if(!test.ojbkImg(src)){
                //拆分版+GQ
                src = "http://mhpic.zymkcdn.com/comic/J%2F%E7%BB%9D%E4%B8%96%E5%94%90%E9%97%A8%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9DGQ%2F"+j+".jpg-zymk.middle.webp";
                if(!test.ojbkImg(src)){
                    //条漫版+GQV
                    src = "http://mhpic.zymkcdn.com/comic/J%2F%E7%BB%9D%E4%B8%96%E5%94%90%E9%97%A8%E6%9D%A1%E6%BC%AB%E7%89%88%2F"+i+"%E8%AF%9DGQV%2F"+j+".jpg-zymk.middle.webp";
                    if(!test.ojbkImg(src)){
                        System.out.println("出错 = "+i+"::"+j);
                        b = false;
                        errOne.add(src);
                    }
                }
            }
        }
        if(b){
            return src;
        }else{
            return "errer";
        }

    }

    /**
     * 步骤四:开始遍历每一个漫画,并装入imgList中
     * */
    public void step4(){
        System.out.println("第四阶段开始!");
        String src = "";
        String idx = "";
        for(int i=0;i<bList.size();i++){
            for(int j=1;j<=bList.get(i);j++){
                  if(i==0){
                      src = "http://mhpic.zymkcdn.com/comic/J%2F%E7%BB%9D%E4%B8%96%E5%94%90%E9%97%A8%2F%E5%BC%95%2F"+j+".jpg-zymk.middle.webp";
                      idx = "第"+i+"话"+j+"--序章";
                  }else{
                      src = test.黑闸子(i,j);
                      idx = "第"+i+"话"+j;
                      if(src.equals("errer")){
                          src = test.黑闸子(i,bList.get(i)+1);
                          idx = "第"+i+"话"+bList.get(i)+1;
                      }
                  }
                imgList.add(src);
                imgIdx.add(idx);
            }
        }
        System.out.println("第四阶段完成!");
    }

    /**
     * 步骤三:遍历aList 里面的所有链接,并分析每话有多少张
     * */
    public void step3(){
        System.out.println("第三阶段开始!");
        bList = new ArrayList();
        try{
            for(int i=0;i<aList.size();i++){
//            for(int i=0;i<10;i++){
                System.out.println("正在加载:"+i+" / "+aList.size() +" 页!......");

                document = Jsoup.connect(SRC + aList.get(i)).get();
                String body = document.toString();

                int startStr = body.indexOf("<span class=\"totalPage\">");
                int endStr = body.indexOf("</span> \n" +
                        "      <i class=\"ift-down\"></i>");

                String str = body.substring(startStr+24,endStr);
                //字符串转数字
                int num = Integer.parseInt(str);
                bList.add(num);
            }
        }catch (Exception e){}
        System.out.println("第三阶段完成!");
    }

    /**
     * 步骤二:清除收费章节
     * */
    public void step2() {
        System.out.println("第二阶段开始!");
        //清除收费章节
        for (int i = 0; i < aList.size(); i++) {
            if (i == 421) {
                //往里面删除
                aList.remove(aList.get(i));
            }
        }
        System.out.println("第二阶段完成!");
    }

    /**
     * 步骤一:分析有多少话,并拿到每一话的链接
     * */
    public void step1(String src){
        System.out.println("第一阶段开始!");
        aList = new ArrayList();
        try{
            document = Jsoup.connect(src).get();
            links = document.select("a[href]");
            for (Element link : links) {
                String url = link.attr("href");
                int end = url.indexOf(".");
                if(end!=-1){
                    String sub = url.substring(0,end);
                    if(sub.matches("[0-9]+")){
                        aList.add(link.attr("href"));
                    }
                }
            }
            //aList 倒序
            Collections.reverse(aList);
        }catch(IOException e) {
            e.printStackTrace();
        }
        System.out.println("第一阶段完成!");
    }
}

《斗破苍穹》:

package com.mdr.ManHua;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.*;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;

public class Dpcq {
    private Document document;
    private Elements links;
    private HttpURLConnection con;
    private URL url;

    private static Dpcq dpcq;
    private static List<String> aList = new ArrayList<String>();
    private static List<Integer> bList = new ArrayList<Integer>();
    private static List<String> imgList = new ArrayList<String>();
    private static HashMap<Integer, Integer> map = new HashMap<>();
    private static List<String> imgIdx= new ArrayList();

    private static String HREF = "https://www.zymk.cn/1/";

    public static void main(String[] args){
        dpcq = new Dpcq();
        //执行步骤1:
        dpcq.step1();
        //执行步骤2:
        dpcq.step2();
        //执行步骤3:
        dpcq.step3();
        //执行步骤4:
        dpcq.step4();
        //执行步骤5:
        dpcq.step5();
    }

    /**
     * 图片下载
     * */
    private void download(String urlList,String idx) {
        URL url = null;
        int imageNumber = 0;

        try {
            url = new URL(urlList);
            DataInputStream dataInputStream = new DataInputStream(url.openStream());

            String imageName =  "D:\\Temp\\images\\dpcq\\"+idx+".jpg";

            FileOutputStream fileOutputStream = new FileOutputStream(new File(imageName));
            ByteArrayOutputStream output = new ByteArrayOutputStream();

            byte[] buffer = new byte[1024];
            int length;

            while ((length = dataInputStream.read(buffer)) > 0) {
                output.write(buffer, 0, length);
            }
            byte[] context=output.toByteArray();
            fileOutputStream.write(output.toByteArray());
            dataInputStream.close();
            fileOutputStream.close();
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 检验链接是否有效性
     * */
    public Boolean ojbkImg(String src){
        try {
            url = new URL(src);
            con = (HttpURLConnection) url.openConnection();
            int state = con.getResponseCode();
            if(state==200){
                return true;
            }else{
                return false;
            }
        }catch (Exception ex) {
            System.out.print("URL不可用  ");
            System.out.println(src);
        }
        return false;
    }

    /**
     * 黑闸子
     * src:链接
     * i:第几话
     * j:第几页
     * */
    public String 黑闸子(int i,int j){
        System.out.println(i+"=="+j);
//        String testUrl = "http://mhpic.zymkcdn.com/comic/D%2F"+"斗罗大陆拆分版"+"%2F"+i+"话"+"%2F"+j+".jpg-zymk.middle.webp";
        boolean b = true;
        //初始化普通拆分版
        String src = "http://mhpic.zymkcdn.com/comic/D%2F%E6%96%97%E7%A0%B4%E8%8B%8D%E7%A9%B9%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9D%2F"+j+".jpg-zymk.middle.webp";
        if(!dpcq.ojbkImg(src)){
            //拆分+GQ
            src = "http://mhpic.zymkcdn.com/comic/D%2F%E6%96%97%E7%A0%B4%E8%8B%8D%E7%A9%B9%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9DGQ%2F"+j+".jpg-zymk.middle.webp";
            if(!dpcq.ojbkImg(src)){
                //拆分+V1
                src = "http://mhpic.zymkcdn.com/comic/D%2F%E6%96%97%E7%A0%B4%E8%8B%8D%E7%A9%B9%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9DV1%2F"+j+".jpg-zymk.middle.webp";
                if(!dpcq.ojbkImg(src)){
                    //拆分+V
                    src = "http://mhpic.zymkcdn.com/comic/D%2F%E6%96%97%E7%A0%B4%E8%8B%8D%E7%A9%B9%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9DV%2F"+j+".jpg-zymk.middle.webp";
                    if(!dpcq.ojbkImg(src)){
                        //拆分+GQV
                        src = "http://mhpic.zymkcdn.com/comic/D%2F%E6%96%97%E7%A0%B4%E8%8B%8D%E7%A9%B9%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9DGQV%2F"+j+".jpg-zymk.middle.webp";
                        if(!dpcq.ojbkImg(src)){
                            System.out.print("出错 = "+i+"::"+j+"  ");
                            System.out.println(src);
                            b = false;
                        }

                    }
                }
            }
        }

        if(b){
            return src;
        }else{
            return "errer";
        }
    }

    /**
     * 步骤5:下载
     * */
    public void step5(){
        System.out.println("第五阶段开始!");
        System.out.println("可用链接"+imgList.size());
        for(int i=0;i<imgList.size();i++){
            System.out.println("正在下载:"+i+" 图 / "+ imgList.size());
            dpcq.download(imgList.get(i),imgIdx.get(i));
        }
        System.out.println("第五阶段完成!");
    }
    /**
     * 步骤4:
     * */
    public void step4(){
        System.out.println("第四阶段开始!");
        String src = "";
        String idx = "";

        for(int i=0;i<bList.size();i++){
            for(int j=1;j<=bList.get(i);j++){
                src = dpcq.黑闸子((i+1),j);
                idx = "第"+(i+1)+"话"+j;

                imgList.add(src);
                imgIdx.add(idx);
            }
        }

        System.out.println("第四阶段完成!");
    }

    /**
     * 步骤3:删除掉不相关的章节
     * */
    public void step3(){
        System.out.println("第三阶段开始!");
        System.out.println("刚开始:"+map.size());
        try{
            for (Map.Entry<Integer, Integer> entry : map.entrySet()) {
                int key = entry.getKey();
                int value = entry.getValue();

                if (key==699 || key == 700 || key==719 || key==722) {
                    System.out.println(value+"::"+key);
                }else{
                    bList.add(map.get(key));
                }
            }
        }catch (Exception e){}
        for(int i=0;i<bList.size();i++){
            System.out.println((i+1) + "==" + bList.get(i));
        }
        System.out.println("第三阶段完成!");
    }

    /**
     * 步骤2:遍历aList 里面的所有链接,并分析每话有多少张
     * */
    public void step2(){
        System.out.println("第二阶段开始!");

        try{
            for(int i=0;i<aList.size();i++){
//            for(int i=0;i<300;i++){
                System.out.println("正在加载:"+i+" / "+aList.size() +" 页!......");
                document = Jsoup.connect(HREF + aList.get(i)).get();
                String body = document.toString();
                int startStr = body.indexOf("<span class=\"totalPage\">");
                int endStr = body.indexOf("</span> \n" +
                        "      <i class=\"ift-down\"></i>");
                String str = body.substring(startStr+24,endStr);
                //字符串转数字
                int num = Integer.parseInt(str);
                map.put(i,num);
            }
        }catch (Exception e){}
        System.out.println("第二阶段完成!");

        System.out.println(bList.size());
    }
    /**
     * 步骤1:分析网页有多少个链接
     * */
    public void step1(){
        System.out.println("第一阶段开始!");
        try{
            document = Jsoup.connect(HREF).get();
            links = document.select("a[href]");
            for (Element link : links) {
                String url = link.attr("href");
                int end = url.indexOf(".");
                if(end!=-1){
                    String sub = url.substring(0,end);
                    if(sub.matches("[0-9]+")){
                        aList.add(link.attr("href"));
                    }
                }
            }
            //aList 倒序
            Collections.reverse(aList);
        }catch(IOException e) {
            e.printStackTrace();
        }
        System.out.println("第一阶段完成!");
    }
}

效果我就不展示了,手机开的热点,我流量不够用。
源码包自己找。
发布了21 篇原创文章 · 获赞 2 · 访问量 6493

猜你喜欢

转载自blog.csdn.net/weixin_43386443/article/details/90552868
今日推荐