爬取豆瓣电影

此脚本仅用于学习,并无其他用途!!!

1、此脚本用到的模块:

 1 requests 2 lxml  

2、代码:

 1 #!/usr/bin/env python3
 2 # -*- coding: utf-8 -*-
 3 
 4 """
 5     author: 辉_少
 6     date: 2019-03-16
 7     explain:
 8         此脚本仅用于学习,不可用于其他用途
 9 """
10 
11 """
12     参考网址:
13         豆瓣URL:https://movie.douban.com/
14         最新上映:https://movie.douban.com/cinema/nowplaying/hangzhou/
15         即将上映:https://movie.douban.com/cinema/later/hangzhou/
16 """
17 
18 import requests
19 from lxml import etree
20 
21 
22 #查看即将上映的电影
23 def DBJJ():
24     city = input(" 请输入你所在的城市:")
25     reposen = requests.get("https://movie.douban.com/cinema/later/%s/"%(city))
26     html = etree.HTML(reposen.text)
27     Biglist = html.xpath('//div[@class="bd"]/div/div/div/h3/a/text()')
28     #Bsrlist = html.xpath('//div[@class="bd"]/div/div/div/h3/a/@href')
29     Detalist = html.xpath('//div[@class="bd"]/div/div/div/ul/li/text()')
30     for i in zip(Biglist, Detalist):
31         n = 0
32         d = 0
33         f = 3
34         print ("\t", Biglist[n], '\n\t', Detalist[d:f])
35         n += 1
36         d += 3
37         f += 3
38 
39 
40 #查看最新上映的电影
41 def DBZX():
42     city = input(" 请输入你所在的城市:")
43     reposen = requests.get("https://movie.douban.com/cinema/nowplaying/%s/"%(city))
44     html = etree.HTML(reposen.text)
45     Big_namelist = html.xpath('//div[@class="article"]/div/div/ul/li/ul/li/a/text()')
46     Big_pfenlist = html.xpath('//div[@class="article"]/div/div/ul/li/ul/li/span/text()')
47 
48     m = 2
49     p = 0
50 
51     for n in zip(Big_namelist, Big_pfenlist):
52         print (Big_namelist[m], Big_pfenlist[p])
53 
54         m += 4
55         p += 1
56 
57 #选择要查询的类型(1、即将上映, 2、最新电影)
58 
59 while True:
60     SEL = input("请输入要查询类型对应的数字:\n\t1、即将上映\n\t2、最新电影\n\t3、退出\n\t此处输入:")
61 
62     if SEL == "1":
63         DBJJ()
64         print ("*******************************************************************\n")
65     elif SEL == "2":
66         DBZX()
67         print("*******************************************************************\n")
68     elif SEL == "3":
69         print ("感谢使用,已经退出!")
70         exit()
71     else:
72         print ("无法查询此类型,请重新输入", SEL)

此脚本仅用于学习参考,有错误的地方欢迎指出!

猜你喜欢

转载自www.cnblogs.com/hui-shao/p/douban.html