The reptile crawling movie heaven (request)

# Need to open https://www.dytt8.net/ site through the code, to get the site content 
from urllib.request Import urlopen # get urlopen 
Import Re 
Content = urlopen ( " https://www.dytt8.net/ " ) .read (). decode ( " GBK " )
 # get links to the latest movie part 
# regular expression matching string 
LST = re.findall ( " the latest movie download </a>] <a href = '( . * ?) ' " , Content, re.S) 

for Item in LST:
     # 3. splicing a URL subpages 
    one_page_content = urlopen ( " https://www.dytt8.net/ "Item +) .read (). Decode ( " GBK " ) 
    one_page_lst = re.findall (
         ' <div the above mentioned id = "Zoom">. *? ◎ title \ u3000 (. *?) <br />. *? < style = TD "WORD-the WRAP: BREAK-Word" bgcolor = "# fdfddf"> <a href="(.*?)"> ' , 
        one_page_content, re.S) 
    Print (one_page_lst) 

# 4. Get the name of the movie and movie links saved in txt file 
    MOVIE_NAME is playing =   one_page_lst [0] [0] 
    DOWNLOAD_URL = one_page_lst [0] [1 ]
     # file operations 
    f = Open ( " movie.txt " , the MODE = " w " ,encoding="utf-8") 
    F.write ( " Movie name: " + MOVIE_NAME is playing + " \ the n- " ) 
    f.write ( " Download Link: " + DOWNLOAD_URL)
     Print ( " a complete " ) 
    f.close ()

 

Guess you like

Origin www.cnblogs.com/tengteng0520/p/11275537.html