Reptile small case: watercress movie Top250

Get watercress Top250 list of movies, then to their e-mail

Directly on the code:

import requests,os,csv,time,smtplib
from email.mime.text import MIMEText
from email.utils import formataddr
from email.header import Header
from email.header import  make_header
from email.mime.multipart import MIMEMultipart

from bs4 import BeautifulSoup

# Write data to a file 
file_path = os.getcwd () + " / watercress Top250 movie .csv " 
IF  not os.path.isfile (file_path):
     # encoding utf-8-sig: support python3, does not support python2 
    with Open ( file_path, ' W ' , NEWLINE = '' , encoding = ' UTF--SIG. 8 ' ) AS F:
        writer = csv.writer(f)
        writer.writerow ([ ' ranking ' , ' Movie Name ' , ' year of release ' , ' region ' , ' type ' , ' score ' , ' recommended language ' , ' link ' ])
 # list of movies 
filmlist = []
 for the X- in the Range (10 ):
    url = 'https://movie.douban.com/top250?start={}&filter='.format(x * 25)

    # Is the anti-climb avoidance mechanism, disguised browser request header 
    headers = { ' the User-- Agent ' : ' the Mozilla / 5.0 (the Macintosh; the Intel the Mac the OS X-10_14_3) AppleWebKit / 537.36 (KHTML, like the Gecko) the Chrome / 78.0.3904.108 Safari / 537.36 the OPR / 65.0.3467.78 (Baidu Edition) ' }
    RES = requests.get (URL, headers = headers)
     IF res.status_code == 200 is :
         Print ( ' being fetched on the movie data {p} ... ' .format (X +. 1 ))
        htmltext = res.text
        soup = BeautifulSoup(htmltext, 'html.parser')
        ol = soup.find('ol', class_='grid_view')
        for li in ol.find_all('li'):
            # 排名
            num = li.find('div', class_='pic').find('em').text

            info = li.find('div', class_='info')

            # Title 
            title = []
            title_spans = info.find('div', class_='hd').find('a').find_all('span')
            for title_span in title_spans:
                title.append(title_span.text)
            title = ''.join(title)

            # 链接
            link = info.find('div', class_='hd').find('a')['href']

            # Score 
            rating_num = info.find ( ' span ' , the class_ = ' rating_num ' ) .text

            # Recommended language 
            INQ = info.find ( ' span ' , class_ = ' INQ ' ) .text

            # Release time, area, type 
            BD = info.find ( ' div ' , the class_ = ' BD ' ) .find ( ' P ' ) .contents [2 ]
            bd = bd.split ( ' / ' )

            # Saved to a file 
            with Open (file_path, ' A ' , NEWLINE = '' , encoding = ' UTF--SIG. 8 ' ) AS F:
                writer = csv.writer(f)
                writer.writerow([num, title, bd[0].strip(), bd[1].strip(), bd[2].strip(), rating_num, inq, link])

            filmlist.append ( " {{0}}. 1:. {2} / {. 3} / {}. 4, Rating: {5}, Recommended language: {6}, link: {}. 7 " .format (NUM, title , BD [0] .strip (), BD [. 1] .strip (), BD [2 ] .strip (), rating_num, INQ, Link))

        the time.sleep ( 0.75 )
             # Print (NUM) 
            # Print (title) 
            # Print (Link) 
            # Print (rating_num) 
            # Print (INQ) 
            # Print (BD) 
            # Print ( '0 {{}}. 1 -. { 2}, recommended language: {3}, link:. {4} 'format ( rating_num, title, inq, link))

        
    the else :
         Print ( ' Request failed! ' )

# Send Mail 
my_sender = 'xxx '   # sender's email account 
MY_PASS = 'xxx '   # sender's email password

the try :
     Print (The time.strftime ( ' % Y-M-% D%% H:% M:% S ' , time.localtime ()) + " : Send Mail Preparation " )

    # Create an instance of a band attachment 
    # multi a combination 
    MSG = MimeMultipart ()
    MSG [ ' the From ' ] = formataddr ([ " LJQ " , my_sender])   # in parentheses correspond to the sender nickname mailbox, sender mail account 
    MSG [ ' the To ' ] = formataddr ([ " lsjljq " , ' lsjljq @ 163.com ' ])   # in parentheses correspond to the recipient's mailbox nickname, the recipient mail account

    # Mail Title 
    Subject = ' watercress Top250 movie ' 
    msg [ ' Subject ' ] = Header (Subject, ' UTF-8 ' )

    # Message body content 
    contenttext = " \ the n- " .join (filmlist)
    msg.attach(MIMEText(contenttext, 'plain', 'utf-8'))

    # Configured Annex 1, transfer files in the current directory test.txt 
    the ATTl = MimeText (Open ( ' watercress Top250 film .csv ' , ' RB ' ) .read (), ' Base64 ' , ' UTF-. 8 ' )
     # filename If Chinese is, the conversion is required at 
    the ATTl [ " the Content-the Type " ] = ' file application / OCTET-Stream; name = "{0}" ' .format (make_header ([( ' watercress Top250 movie ' , ' UTF-. 8 ' )]). encode ( ' UTF-. 8 ' ))
     #Where filename can be arbitrarily write, write What is the name, the name of the message to show what 
    the ATTl [ " the Content-Disposition " ] = ' Attachment; filename = "{0} .csv" ' .format (make_header ([( ' watercress Top250 movie ' , ' UTF-. 8 ' )]). encode ( ' UTF-. 8 ' ))
    msg.attach(att1)

    Print (The time.strftime ( ' % Y-M-% D%% H:% M:% S ' , time.localtime ()) + " : positive connecting Mail Server ... " )
    Server = smtplib.SMTP_SSL ( " smtp.exmail.qq.com " , 465)   # sender mailbox SMTP server, port 25 
    Print (The time.strftime ( ' % Y-M-% D%% H:% M:% S ' , time.localtime ()) + " : the login ... " )
    server.login (my_sender, MY_PASS)   # in parentheses corresponds to the sender's mailbox account, the mailbox password 
    Print (The time.strftime ( ' % Y-M-% D%% H:% M:% S ' , time.localtime ()) + " : sending a message ... " )
    server.sendmail (my_sender, [ ' [email protected] ' ], msg.as_string ())   # brackets mail account corresponding to the sender, recipient email account, send mail 
    server.quit ()   # close the connection 
    Print ( ' email has been sent successfully! ' )
 the except Exception AS ERR:
     Print ( ' send error:! {0} ' .format (ERR))

 

Guess you like

Origin www.cnblogs.com/KeenLeung/p/12157923.html