Get watercress Top250 list of movies, then to their e-mail
Directly on the code:
import requests,os,csv,time,smtplib from email.mime.text import MIMEText from email.utils import formataddr from email.header import Header from email.header import make_header from email.mime.multipart import MIMEMultipart from bs4 import BeautifulSoup # Write data to a file file_path = os.getcwd () + " / watercress Top250 movie .csv " IF not os.path.isfile (file_path): # encoding utf-8-sig: support python3, does not support python2 with Open ( file_path, ' W ' , NEWLINE = '' , encoding = ' UTF--SIG. 8 ' ) AS F: writer = csv.writer(f) writer.writerow ([ ' ranking ' , ' Movie Name ' , ' year of release ' , ' region ' , ' type ' , ' score ' , ' recommended language ' , ' link ' ]) # list of movies filmlist = [] for the X- in the Range (10 ): url = 'https://movie.douban.com/top250?start={}&filter='.format(x * 25) # Is the anti-climb avoidance mechanism, disguised browser request header headers = { ' the User-- Agent ' : ' the Mozilla / 5.0 (the Macintosh; the Intel the Mac the OS X-10_14_3) AppleWebKit / 537.36 (KHTML, like the Gecko) the Chrome / 78.0.3904.108 Safari / 537.36 the OPR / 65.0.3467.78 (Baidu Edition) ' } RES = requests.get (URL, headers = headers) IF res.status_code == 200 is : Print ( ' being fetched on the movie data {p} ... ' .format (X +. 1 )) htmltext = res.text soup = BeautifulSoup(htmltext, 'html.parser') ol = soup.find('ol', class_='grid_view') for li in ol.find_all('li'): # 排名 num = li.find('div', class_='pic').find('em').text info = li.find('div', class_='info') # Title title = [] title_spans = info.find('div', class_='hd').find('a').find_all('span') for title_span in title_spans: title.append(title_span.text) title = ''.join(title) # 链接 link = info.find('div', class_='hd').find('a')['href'] # Score rating_num = info.find ( ' span ' , the class_ = ' rating_num ' ) .text # Recommended language INQ = info.find ( ' span ' , class_ = ' INQ ' ) .text # Release time, area, type BD = info.find ( ' div ' , the class_ = ' BD ' ) .find ( ' P ' ) .contents [2 ] bd = bd.split ( ' / ' ) # Saved to a file with Open (file_path, ' A ' , NEWLINE = '' , encoding = ' UTF--SIG. 8 ' ) AS F: writer = csv.writer(f) writer.writerow([num, title, bd[0].strip(), bd[1].strip(), bd[2].strip(), rating_num, inq, link]) filmlist.append ( " {{0}}. 1:. {2} / {. 3} / {}. 4, Rating: {5}, Recommended language: {6}, link: {}. 7 " .format (NUM, title , BD [0] .strip (), BD [. 1] .strip (), BD [2 ] .strip (), rating_num, INQ, Link)) the time.sleep ( 0.75 ) # Print (NUM) # Print (title) # Print (Link) # Print (rating_num) # Print (INQ) # Print (BD) # Print ( '0 {{}}. 1 -. { 2}, recommended language: {3}, link:. {4} 'format ( rating_num, title, inq, link)) the else : Print ( ' Request failed! ' ) # Send Mail my_sender = 'xxx ' # sender's email account MY_PASS = 'xxx ' # sender's email password the try : Print (The time.strftime ( ' % Y-M-% D%% H:% M:% S ' , time.localtime ()) + " : Send Mail Preparation " ) # Create an instance of a band attachment # multi a combination MSG = MimeMultipart () MSG [ ' the From ' ] = formataddr ([ " LJQ " , my_sender]) # in parentheses correspond to the sender nickname mailbox, sender mail account MSG [ ' the To ' ] = formataddr ([ " lsjljq " , ' lsjljq @ 163.com ' ]) # in parentheses correspond to the recipient's mailbox nickname, the recipient mail account # Mail Title Subject = ' watercress Top250 movie ' msg [ ' Subject ' ] = Header (Subject, ' UTF-8 ' ) # Message body content contenttext = " \ the n- " .join (filmlist) msg.attach(MIMEText(contenttext, 'plain', 'utf-8')) # Configured Annex 1, transfer files in the current directory test.txt the ATTl = MimeText (Open ( ' watercress Top250 film .csv ' , ' RB ' ) .read (), ' Base64 ' , ' UTF-. 8 ' ) # filename If Chinese is, the conversion is required at the ATTl [ " the Content-the Type " ] = ' file application / OCTET-Stream; name = "{0}" ' .format (make_header ([( ' watercress Top250 movie ' , ' UTF-. 8 ' )]). encode ( ' UTF-. 8 ' )) #Where filename can be arbitrarily write, write What is the name, the name of the message to show what the ATTl [ " the Content-Disposition " ] = ' Attachment; filename = "{0} .csv" ' .format (make_header ([( ' watercress Top250 movie ' , ' UTF-. 8 ' )]). encode ( ' UTF-. 8 ' )) msg.attach(att1) Print (The time.strftime ( ' % Y-M-% D%% H:% M:% S ' , time.localtime ()) + " : positive connecting Mail Server ... " ) Server = smtplib.SMTP_SSL ( " smtp.exmail.qq.com " , 465) # sender mailbox SMTP server, port 25 Print (The time.strftime ( ' % Y-M-% D%% H:% M:% S ' , time.localtime ()) + " : the login ... " ) server.login (my_sender, MY_PASS) # in parentheses corresponds to the sender's mailbox account, the mailbox password Print (The time.strftime ( ' % Y-M-% D%% H:% M:% S ' , time.localtime ()) + " : sending a message ... " ) server.sendmail (my_sender, [ ' [email protected] ' ], msg.as_string ()) # brackets mail account corresponding to the sender, recipient email account, send mail server.quit () # close the connection Print ( ' email has been sent successfully! ' ) the except Exception AS ERR: Print ( ' send error:! {0} ' .format (ERR))