Simple reptiles a US drama (two: download progress bar)

Part get to the download url of each episode, to use this to get the url to download video

Want to download a file can prompt class progress bars, in a wave of online search and found a library can be achieved: tqdm library, specific reference to the use of this blog: https: //www.jianshu.com/p / 1ed2a8b2c77b

In the original class below, together with a method for downloading files, the following

    DEF download_file (URL, name):
         "" " download file " "" 
        the try : 
            Response = requests.get (URL = URL, Stream = True) 
            content_size = int (Response.Headers [ ' the Content-the Length ' ]) / 1024   # file size is acquired from the response header 

            IF content_size: 
                with Open (name, " WB " ) AS F:
                     Print ( " Total: " , content_size, ' K ' )
                     for Data intqdm (Iterable = response.iter_content (1024), Total = content_size, Unit = ' K ' ): # Implementation progress bar 
                        f.write (Data) 
                        # f.close () 
                    Print ( " \ n-DONE " + name) 

        the except aS E requestexception:
             Print ( " interface error information R & lt% " , E)

Plus a main function based on the original

    DEF main (Self):
         "" " The main function " "" 
        ROOT_DIR = os.path.abspath with (the os.path.join (os.path.dirname ( __FILE__ ), os.pardir))
         Print (ROOT_DIR) 
        DOWNLOAD_URL = Self. get_tv_url ( " blood of infected season " )   # call get_tv_url (), Get Episode url 
        for T in DOWNLOAD_URL: 
            name = t.split ( ' / ' ) - [. 1] #  a text link for downloading the url / after , used as a file name 
            file_path = ROOT_DIR + " / Movies / "  #  Set file path 
            if Not os.path.exists (file_path): 
                os.makdirs (file_path) 
            Print ( " Downloading [] {} " .format (name)) 
            self.download_file (T, file_path + name)   # call download_file (), based on the download link to start downloading the file

Finally, the complete code is as follows

  1 # coding: utf-8
  2 """
  3 author: hmk
  4 describe: 爬虫80s电影网
  5 create_time: 2019/01/18
  6 """
  7 
  8 import re
  9 
 10 import os
 11 from bs4 import BeautifulSoup
 12 from requests.exceptions import RequestException
 13 import requests
 14 from tqdm import tqdm
 15 
 16 class DownloadTV:
 17     @staticmethod
 18     def get_html(url, data=None, header=None, method=None):
 19         """获取一个url的html格式文本内容"""
 20 
 21         if method == "get":
 22             response = requests.get(url, params=data, headers=header)
 23         else:
 24             response = requests.post(url, data=data, headers=header)
 25         try:
 26             if response.status_code == 200:
 27                 response.encoding =response.apparent_encoding
 28                  # Print (response.status_code) 
29                  # Print (response.text) 
30                  return response.text
 31 is              return None
 32          the except requestexception:
 33 is              Print ( " request failed " )
 34 is              return None
 35  
36      DEF get_tv_id (Self, tv_name ):
 37          "" " Get id corresponding to the queried drama " "" 
38 is          headers = {
 39              , " the Content-the Type " : "file application / X-WWW-form-urlencoded " 
40          }
 41 is  
42 is          Data = {
 43 is              " search_typeid " : " . 1 " ,
 44 is              " SKey " : tv_name,   # using a variable to represent mean worder 
45              " the Input " : " Search " 
46 is          }
 47  
48          URL = " http://www.y80s.com/movie/search/ "   # request URL 
49  
50         = self.get_html Response (URL, Data, headers, " POST " )
 51 is  
52 is          HTML = Response
 53 is          # Print (HTML) 
54 is  
55          Soup = the BeautifulSoup (HTML, " html.parser " )
 56 is          name_label soup.find_all = ( " A " , title = tv_name)   # get all <a> tag title attribute is the name of the TV drama, with a dynamic variable to indicate the name of drama 
57          # Print (soup.prettify ()) 
58          # Print (name_label) 
59          # Print (name_label [0] .get ( 'the href')) 
60  
61 is         = the re.compile ju_id (R & lt ' (\ + D) ' , re.S)   #   define a regular expression, to extract digital content label 
62 is          IF name_label:
 63 is              href_value = ju_id.search (name_label [0] .get ( ' the href ' ))
 64              IF href_value:
 65                  TV_ID = href_value.group ()
 66                  Print ( " query television drama corresponding id is: {} " .format (TV_ID))
 67                  # Print (type (TV_ID)) acquired View # TV_ID the data type, then if int in splicing requires subsequent str () translated into strings 
68                  return TV_ID
 69  
70      DEFget_tv_url (Self, tv_name):
 71 is          "" " Get drama download URL " "" 
72          TV_ID = self.get_tv_id (tv_name) # call get_tv_id () method, obtaining TV_ID 
73 is          URL = " http://www.y80s.com / Ju / " + TV_ID   # use TV_ID splicing URL 
74  
75          R & lt self.get_html = (URL, Method = " GET " )
 76          HTML = R & lt
 77          Soup = the BeautifulSoup (HTML, " html.parser " )
 78          a_tv_url = soup.find_all ( 'a", Title = " Local Download " )   # extract title attribute is "local download" of a label, returns a list of all tags of a 
79          # Print (a_tv_url) 
80          tv_url = []
 81          for T in a_tv_url:
 82              tv_url.append (t.get ( ' href ' ))   # Get the value href attribute get method with a tab for each 
83          Print (tv_url)
 84          return tv_url
 85  
86      @staticmethod
 87      DEF download_file (URL, name):
 88          "" " Download file "" "
 89         try:
 90             response = requests.get(url=url, stream=True)
 91             content_size = int(response.headers['Content-Length']) / 1024  # 文件大小,从响应头中获取
 92 
 93             if content_size:
 94                 with open(name, "wb") as f:
 95                     print("total: ", content_size, 'k')
 96                     for data in tqdm(iterable=response.iter_content(1024), total=content_size, unit='k'):
 97                         f.write(data)
 98                         # f.close()
 99                     print("\n done " + name)
100 
101         except RequestException as e:
102             print("接口错误信息为  %r", e)
103 
104     def main(self):
105         """主函数"""
106         root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
107         print(root_dir)
108          DOWNLOAD_URL = self.get_tv_url ( " blood of infected Season " )   # call get_tv_url (), Get Episode URL 
109          for T in DOWNLOAD_URL:
 110              name = t.split ( ' / ' ) [-. 1] #  get the download link in a text url / after as file names 
111              file_path = ROOT_DIR + " / Movies / "   #  set file path 
112              IF  Not os.path.exists (file_path):
 113                  os.makdirs (file_path)
 114              Print ( " is Download [] {} ".format (name))
 115              self.download_file (T, + file_path name)   # call download_file (), according to the download link to start downloading the file 
1 16  
117  IF  the __name__ == ' __main__ ' :
 1 18      Test = DownloadTV ()
 119      test.main ( )

Running about

 

Guess you like

Origin www.cnblogs.com/hanmk/p/12323678.html