[Java] 用java实现的电影天堂,飘花电影网的电影的下载地址抓取

1.之前看了一些论坛上有一个坛友用python写的抓取电影下载链接的,于是心血来潮的我也打算用java来写一个!其实并不是很难,下面附上代码
这是对电影天堂的电影的抓取的方法,(在此期间尝试设置代{过}{滤}理,以及用线程池,但貌似均没有成功) 说明下主要的jar包主要有httpclient4.5以及jsoup1.7

1.
[Java] 纯文本查看 复制代码
?
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package downloade;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
 
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
 
import com.sun.corba.se.spi.orbutil.threadpool.ThreadPool;
 
import Pojo.DyUrl;
import dao.JDBCUtils;
 
public class Dyttdownload {
     static int id= 1 ;
     public static HttpClient client= null ;
     
public static void main(String[] args) {
//ExecutorService fixedThreadPool = Executors.newFixedThreadPool(10);
            Map<Integer,String> map= new HashMap<>();
     for ( int i= 1 ;i< 50 ;i++){
//    http://www.ygdy8.net/html/gndy/dyzz/list_23_2.html
         map.put(i, "http://www.ygdy8.net/html/gndy/dyzz/list_23_" +i+ ".html" );
     }
            for (String string : map.values()) {
              getUrl(string);
//                      Thread.currentThread().sleep(2000);
             
         }
 
//  getDownloadUrl("http://www.ygdy8.net/html/gndy/dyzz/20170926/55094.html");
}
 
public static void  getUrl(String uri){
     JDBCUtils utils= new JDBCUtils();
 
     try {
         client=HttpClients.createDefault();
//  RequestConfig config=RequestConfig.custom().setProxy(new HttpHost("110.73.14.161",8123)).build();
     HttpGet get= new HttpGet(uri);
//  get.setConfig(config);
     HttpResponse response=client.execute(get);
         String result =EntityUtils.toString(response.getEntity(), "gb2312" );
         Document doc=Jsoup.parse(result);
         //css选择器
     Elements elements=  doc.select( "table.tbspan " );
   for (Element element : elements) {
       element.setBaseUri( "http://www.ygdy8.net" );
DyUrl dy=getDownloadUrl(element.select( "tr" ).get( 1 ).select( "a" ).text(),element.select( "tr" ).get( 1 ).select( "a" ).attr( "abs:href" ));
dy.setId(id);
utils.insert(dy);
id++;
   }
 
 
     } catch (IOException e) {
         // TODO Auto-generated catch block
         e.printStackTrace();
     }
}
public static DyUrl  getDownloadUrl(String name,String dyurl){
     DyUrl dy= new DyUrl();
//  RequestConfig config=RequestConfig.custom().setProxy(new HttpHost("110.73.14.161",8123)).build();
     try {
         client=HttpClients.createDefault();
         HttpGet get = new HttpGet(dyurl);
//      get.setConfig(config);
         HttpResponse response=client.execute(get);
         String result =EntityUtils.toString(response.getEntity(), "gb2312" );
         Document doc=Jsoup.parse(result);
           Elements elements=doc.select( "div#Zoom table tr td a " );
           dy.setDyname(name);
           dy.setDyUrl(elements.get( 0 ).text());
 
     } catch (IOException e) {
         // TODO Auto-generated catch block
         e.printStackTrace();
     }
     return dy;
}
 
 
}



2.下面的飘花电影网的,其实可以看到爬取的过程是大同小异的,只是选择器有所差别而已
[Java] 纯文本查看 复制代码
?
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
package downloade;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
 
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
 
import Pojo.DyUrl;
import dao.JDBCUtils;
 
public class piaohuadownload {
     static int id= 1 ;
     public static HttpClient client= null ;
public static void main(String[] args) {
     Map<Integer,String> map= new HashMap<>();
for ( int i= 16 ;i< 50 ;i++){
   map.put(i, "http://www.piaohua.com/html/dongzuo/list_" +i+ ".html" );
}
for (String string : map.values()) {
 
System.out.println( "正在爬这个" +string+ "网页" );
         // TODO Auto-generated method stub
         getUrl(string);
     
}
 
}  
     
public static void getUrl(String uri){
     JDBCUtils utils= new JDBCUtils();
try {
     client =HttpClientBuilder.create().build();
     HttpResponse response=client.execute( new HttpGet(uri));
     String result =EntityUtils.toString(response.getEntity(), "utf-8" );
     Document doc=Jsoup.parse(result);
     doc.setBaseUri( "http://www.piaohua.com" );
     Elements elements=doc.select( "#list dl" );
     for (Element element : elements) {
         String name=element.select( "font" ).first().text();
         String dyurl=element.select( "a" ).first().absUrl( "href" );
     DyUrl dy=getDownloadUrl(name, dyurl);
     dy.setId(id);
     utils.insert(dy);
     id++;
     }
} catch (IOException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
}
}
 
public static DyUrl getDownloadUrl(String name,String dyurl){
     DyUrl dUrl= new DyUrl();
     try {
         client=HttpClients.createDefault();
         HttpResponse response;
         response = client.execute( new HttpGet(dyurl));
         String result =EntityUtils.toString(response.getEntity(), "utf-8" );
         Document doc=Jsoup.parse(result);
         Elements elements=doc.select( "#showinfo" ).select( "a" );
         dUrl.setDyname(name);
         dUrl.setDyUrl(elements.first().text());
     } catch (IOException e) {
         // TODO Auto-generated catch block
         e.printStackTrace();
     }
return dUrl;
     
}
}
         

最后附上成功的截图           
       最后一张是在网页上的应用

猜你喜欢

转载自blog.csdn.net/weixin_41722928/article/details/86506176