java实现 斗罗大陆漫画 爬虫
直接上代码:《斗罗大陆1》
package com.mdr.ManHua;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
public class Dldl1 {
private Document document;
private Elements links;
private HttpURLConnection con;
private URL url;
private static String HREF = "https://www.zymk.cn/2/";
private static Dldl1 dldl1;
private static List<String> aList= new ArrayList();
private static List<Integer> bList = new ArrayList();
private static HashMap<Integer, Integer> map = new HashMap<>();
private static List<String> imgList= new ArrayList();
private static List<Integer> FanWai= new ArrayList();
private static List<String> imgIdx= new ArrayList();
public static void main(String[] args){
dldl1 = new Dldl1();
dldl1.step1(HREF);
dldl1.step2();
dldl1.step3();
dldl1.step4();
dldl1.step5();
}
public Boolean ojbkImg(String src){
try {
url = new URL(src);
con = (HttpURLConnection) url.openConnection();
int state = con.getResponseCode();
if(state==200){
return true;
}else{
return false;
}
}catch (Exception ex) {
System.out.print("URL不可用 ");
System.out.println(src);
}
return false;
}
private void download(String urlList,String idx) {
URL url = null;
int imageNumber = 0;
try {
url = new URL(urlList);
DataInputStream dataInputStream = new DataInputStream(url.openStream());
String imageName = "D:\\Temp\\images\\dldl1\\"+idx+".jpg";
FileOutputStream fileOutputStream = new FileOutputStream(new File(imageName));
ByteArrayOutputStream output = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int length;
while ((length = dataInputStream.read(buffer)) > 0) {
output.write(buffer, 0, length);
}
byte[] context=output.toByteArray();
fileOutputStream.write(output.toByteArray());
dataInputStream.close();
fileOutputStream.close();
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public String 黑闸子(int i,int j){
System.out.println(i+"=="+j);
boolean b = true;
boolean isSX = false;
String src = "http://mhpic.zymkcdn.com/comic/D%2F%E6%96%97%E7%BD%97%E5%A4%A7%E9%99%86%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9D%2F"+j+".jpg-zymk.middle.webp";
if(!isSX){
if(!dldl1.ojbkImg(src)){
src = "http://mhpic.zymkcdn.com/comic/D%2F%E6%96%97%E7%BD%97%E5%A4%A7%E9%99%86%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9DGQ%2F"+j+".jpg-zymk.middle.webp";
if(!dldl1.ojbkImg(src)){
src = "http://mhpic.zymkcdn.com/comic/D%2F%E6%96%97%E7%BD%97%E5%A4%A7%E9%99%86%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9DGQV%2F"+j+".jpg-zymk.middle.webp";
if(!dldl1.ojbkImg(src)){
System.out.print("出错 = "+i+"::"+j+" ");
System.out.println(src);
b = false;
}
}
}
}
if(b){
return src;
}else{
return "errer";
}
}
public void step5(){
System.out.println("第五阶段开始!");
System.out.println("番外:"+FanWai.size());
System.out.println("可用链接"+imgList.size());
for(int i=0;i<imgList.size();i++){
System.out.println("正在下载:"+i+" 图 / "+ imgList.size());
dldl1.download(imgList.get(i),imgIdx.get(i));
}
System.out.println("第五阶段完成!");
}
public void step4(){
System.out.println("第四阶段开始!");
String src = "";
String idx = "";
for(int i=0;i<bList.size();i++){
for(int j=1;j<=bList.get(i);j++){
src = dldl1.黑闸子((i+1),j);
idx = "第"+(i+1)+"话"+j;
imgList.add(src);
imgIdx.add(idx);
}
}
System.out.println("第四阶段完成!");
}
public void step3(){
System.out.println("第三阶段开始!");
System.out.println("刚开始:"+map.size());
try{
for (Map.Entry<Integer, Integer> entry : map.entrySet()) {
int key = entry.getKey();
int value = entry.getValue();
if (( key <=637 & key>= 593 & key%2== 1) || key == 590) {
System.out.println(value+"::"+key);
FanWai.add(map.get(key));
}else{
bList.add(map.get(key));
}
}
}catch (Exception e){}
for(int i=0;i<bList.size();i++){
System.out.println((i+1) + "==" + bList.get(i));
}
System.out.println("第三阶段完成!");
}
public void step2(){
System.out.println("第二阶段开始!");
try{
for(int i=0;i<aList.size();i++){
System.out.println("正在加载:"+i+" / "+aList.size() +" 页!......");
document = Jsoup.connect(HREF + aList.get(i)).get();
String body = document.toString();
int startStr = body.indexOf("<span class=\"totalPage\">");
int endStr = body.indexOf("</span> \n" +
" <i class=\"ift-down\"></i>");
String str = body.substring(startStr+24,endStr);
int num = Integer.parseInt(str);
if(i==34){
System.out.println("第35章:"+num);
}
map.put(i,num);
}
}catch (Exception e){}
System.out.println("第二阶段完成!");
System.out.println(bList.size());
}
public void step1(String src){
System.out.println("第一阶段开始!");
try{
document = Jsoup.connect(src).get();
links = document.select("a[href]");
for (Element link : links) {
String url = link.attr("href");
int end = url.indexOf(".");
if(end!=-1){
String sub = url.substring(0,end);
if(sub.matches("[0-9]+")){
aList.add(link.attr("href"));
}
}
}
Collections.reverse(aList);
}catch(IOException e) {
e.printStackTrace();
}
System.out.println("第一阶段完成!");
}
}
《斗罗大陆2》:
package com.mdr.ManHua;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class Dldl2 {
private List<String> aList;
private List<Integer> bList;
private List<String> imgList = new ArrayList<String>();
private List<String> imgIdx = new ArrayList<String>();
private ArrayList<String> errOne = new ArrayList<String>();
private Document document;
private Elements links;
private URL url;
private HttpURLConnection con;
private static String SRC = "https://www.zymk.cn/609/";
private static Dldl2 test;
public static void main(String[] args){
test = new Dldl2();
test.step1(SRC);
test.step2();
test.step3();
test.step4();
test.step5();
}
private void download(String urlList,String idx) {
URL url = null;
int imageNumber = 0;
try {
url = new URL(urlList);
DataInputStream dataInputStream = new DataInputStream(url.openStream());
String imageName = "D:\\Temp\\images\\dldl2\\"+idx+".jpg";
FileOutputStream fileOutputStream = new FileOutputStream(new File(imageName));
ByteArrayOutputStream output = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int length;
while ((length = dataInputStream.read(buffer)) > 0) {
output.write(buffer, 0, length);
}
byte[] context=output.toByteArray();
fileOutputStream.write(output.toByteArray());
dataInputStream.close();
fileOutputStream.close();
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public Boolean ojbkImg(String src){
try {
url = new URL(src);
con = (HttpURLConnection) url.openConnection();
int state = con.getResponseCode();
if(state==200){
return true;
}else{
return false;
}
}catch (Exception ex) {
System.out.print("URL不可用 ");
System.out.println(src);
}
return false;
}
public void step6(){
}
public void step5(){
for(int i=0;i<imgList.size();i++){
System.out.println("正在下载:"+i+" 图");
test.download(imgList.get(i),imgIdx.get(i));
}
}
public String 黑闸子(int i,int j){
System.out.println(i+"=="+j);
boolean b = true;
String src = "http://mhpic.zymkcdn.com/comic/J%2F%E7%BB%9D%E4%B8%96%E5%94%90%E9%97%A8%E6%9D%A1%E6%BC%AB%E7%89%88%2F"+i+"%E8%AF%9DGQ%2F"+j+".jpg-zymk.middle.webp";
if(!test.ojbkImg(src)){
src = "http://mhpic.zymkcdn.com/comic/J%2F%E7%BB%9D%E4%B8%96%E5%94%90%E9%97%A8%E6%8B%86%E5%88%86%E7%89%88%2F%E7%AC%AC"+i+"%E8%AF%9D%2F"+j+".jpg-zymk.middle.webp";
if(!test.ojbkImg(src)){
src = "http://mhpic.zymkcdn.com/comic/J%2F%E7%BB%9D%E4%B8%96%E5%94%90%E9%97%A8%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9DGQ%2F"+j+".jpg-zymk.middle.webp";
if(!test.ojbkImg(src)){
src = "http://mhpic.zymkcdn.com/comic/J%2F%E7%BB%9D%E4%B8%96%E5%94%90%E9%97%A8%E6%9D%A1%E6%BC%AB%E7%89%88%2F"+i+"%E8%AF%9DGQV%2F"+j+".jpg-zymk.middle.webp";
if(!test.ojbkImg(src)){
System.out.println("出错 = "+i+"::"+j);
b = false;
errOne.add(src);
}
}
}
}
if(b){
return src;
}else{
return "errer";
}
}
public void step4(){
System.out.println("第四阶段开始!");
String src = "";
String idx = "";
for(int i=0;i<bList.size();i++){
for(int j=1;j<=bList.get(i);j++){
if(i==0){
src = "http://mhpic.zymkcdn.com/comic/J%2F%E7%BB%9D%E4%B8%96%E5%94%90%E9%97%A8%2F%E5%BC%95%2F"+j+".jpg-zymk.middle.webp";
idx = "第"+i+"话"+j+"--序章";
}else{
src = test.黑闸子(i,j);
idx = "第"+i+"话"+j;
if(src.equals("errer")){
src = test.黑闸子(i,bList.get(i)+1);
idx = "第"+i+"话"+bList.get(i)+1;
}
}
imgList.add(src);
imgIdx.add(idx);
}
}
System.out.println("第四阶段完成!");
}
public void step3(){
System.out.println("第三阶段开始!");
bList = new ArrayList();
try{
for(int i=0;i<aList.size();i++){
System.out.println("正在加载:"+i+" / "+aList.size() +" 页!......");
document = Jsoup.connect(SRC + aList.get(i)).get();
String body = document.toString();
int startStr = body.indexOf("<span class=\"totalPage\">");
int endStr = body.indexOf("</span> \n" +
" <i class=\"ift-down\"></i>");
String str = body.substring(startStr+24,endStr);
int num = Integer.parseInt(str);
bList.add(num);
}
}catch (Exception e){}
System.out.println("第三阶段完成!");
}
public void step2() {
System.out.println("第二阶段开始!");
for (int i = 0; i < aList.size(); i++) {
if (i == 421) {
aList.remove(aList.get(i));
}
}
System.out.println("第二阶段完成!");
}
public void step1(String src){
System.out.println("第一阶段开始!");
aList = new ArrayList();
try{
document = Jsoup.connect(src).get();
links = document.select("a[href]");
for (Element link : links) {
String url = link.attr("href");
int end = url.indexOf(".");
if(end!=-1){
String sub = url.substring(0,end);
if(sub.matches("[0-9]+")){
aList.add(link.attr("href"));
}
}
}
Collections.reverse(aList);
}catch(IOException e) {
e.printStackTrace();
}
System.out.println("第一阶段完成!");
}
}
《斗破苍穹》:
package com.mdr.ManHua;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
public class Dpcq {
private Document document;
private Elements links;
private HttpURLConnection con;
private URL url;
private static Dpcq dpcq;
private static List<String> aList = new ArrayList<String>();
private static List<Integer> bList = new ArrayList<Integer>();
private static List<String> imgList = new ArrayList<String>();
private static HashMap<Integer, Integer> map = new HashMap<>();
private static List<String> imgIdx= new ArrayList();
private static String HREF = "https://www.zymk.cn/1/";
public static void main(String[] args){
dpcq = new Dpcq();
dpcq.step1();
dpcq.step2();
dpcq.step3();
dpcq.step4();
dpcq.step5();
}
private void download(String urlList,String idx) {
URL url = null;
int imageNumber = 0;
try {
url = new URL(urlList);
DataInputStream dataInputStream = new DataInputStream(url.openStream());
String imageName = "D:\\Temp\\images\\dpcq\\"+idx+".jpg";
FileOutputStream fileOutputStream = new FileOutputStream(new File(imageName));
ByteArrayOutputStream output = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int length;
while ((length = dataInputStream.read(buffer)) > 0) {
output.write(buffer, 0, length);
}
byte[] context=output.toByteArray();
fileOutputStream.write(output.toByteArray());
dataInputStream.close();
fileOutputStream.close();
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public Boolean ojbkImg(String src){
try {
url = new URL(src);
con = (HttpURLConnection) url.openConnection();
int state = con.getResponseCode();
if(state==200){
return true;
}else{
return false;
}
}catch (Exception ex) {
System.out.print("URL不可用 ");
System.out.println(src);
}
return false;
}
public String 黑闸子(int i,int j){
System.out.println(i+"=="+j);
boolean b = true;
String src = "http://mhpic.zymkcdn.com/comic/D%2F%E6%96%97%E7%A0%B4%E8%8B%8D%E7%A9%B9%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9D%2F"+j+".jpg-zymk.middle.webp";
if(!dpcq.ojbkImg(src)){
src = "http://mhpic.zymkcdn.com/comic/D%2F%E6%96%97%E7%A0%B4%E8%8B%8D%E7%A9%B9%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9DGQ%2F"+j+".jpg-zymk.middle.webp";
if(!dpcq.ojbkImg(src)){
src = "http://mhpic.zymkcdn.com/comic/D%2F%E6%96%97%E7%A0%B4%E8%8B%8D%E7%A9%B9%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9DV1%2F"+j+".jpg-zymk.middle.webp";
if(!dpcq.ojbkImg(src)){
src = "http://mhpic.zymkcdn.com/comic/D%2F%E6%96%97%E7%A0%B4%E8%8B%8D%E7%A9%B9%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9DV%2F"+j+".jpg-zymk.middle.webp";
if(!dpcq.ojbkImg(src)){
src = "http://mhpic.zymkcdn.com/comic/D%2F%E6%96%97%E7%A0%B4%E8%8B%8D%E7%A9%B9%E6%8B%86%E5%88%86%E7%89%88%2F"+i+"%E8%AF%9DGQV%2F"+j+".jpg-zymk.middle.webp";
if(!dpcq.ojbkImg(src)){
System.out.print("出错 = "+i+"::"+j+" ");
System.out.println(src);
b = false;
}
}
}
}
}
if(b){
return src;
}else{
return "errer";
}
}
public void step5(){
System.out.println("第五阶段开始!");
System.out.println("可用链接"+imgList.size());
for(int i=0;i<imgList.size();i++){
System.out.println("正在下载:"+i+" 图 / "+ imgList.size());
dpcq.download(imgList.get(i),imgIdx.get(i));
}
System.out.println("第五阶段完成!");
}
public void step4(){
System.out.println("第四阶段开始!");
String src = "";
String idx = "";
for(int i=0;i<bList.size();i++){
for(int j=1;j<=bList.get(i);j++){
src = dpcq.黑闸子((i+1),j);
idx = "第"+(i+1)+"话"+j;
imgList.add(src);
imgIdx.add(idx);
}
}
System.out.println("第四阶段完成!");
}
public void step3(){
System.out.println("第三阶段开始!");
System.out.println("刚开始:"+map.size());
try{
for (Map.Entry<Integer, Integer> entry : map.entrySet()) {
int key = entry.getKey();
int value = entry.getValue();
if (key==699 || key == 700 || key==719 || key==722) {
System.out.println(value+"::"+key);
}else{
bList.add(map.get(key));
}
}
}catch (Exception e){}
for(int i=0;i<bList.size();i++){
System.out.println((i+1) + "==" + bList.get(i));
}
System.out.println("第三阶段完成!");
}
public void step2(){
System.out.println("第二阶段开始!");
try{
for(int i=0;i<aList.size();i++){
System.out.println("正在加载:"+i+" / "+aList.size() +" 页!......");
document = Jsoup.connect(HREF + aList.get(i)).get();
String body = document.toString();
int startStr = body.indexOf("<span class=\"totalPage\">");
int endStr = body.indexOf("</span> \n" +
" <i class=\"ift-down\"></i>");
String str = body.substring(startStr+24,endStr);
int num = Integer.parseInt(str);
map.put(i,num);
}
}catch (Exception e){}
System.out.println("第二阶段完成!");
System.out.println(bList.size());
}
public void step1(){
System.out.println("第一阶段开始!");
try{
document = Jsoup.connect(HREF).get();
links = document.select("a[href]");
for (Element link : links) {
String url = link.attr("href");
int end = url.indexOf(".");
if(end!=-1){
String sub = url.substring(0,end);
if(sub.matches("[0-9]+")){
aList.add(link.attr("href"));
}
}
}
Collections.reverse(aList);
}catch(IOException e) {
e.printStackTrace();
}
System.out.println("第一阶段完成!");
}
}
效果我就不展示了,手机开的热点,我流量不够用。
源码包自己找。