# - * - Coding: UTF-8 - * -
"" " get time movie critic " ""
Import Requests
from BS4 Import BeautifulSoup
from datetime Import datetime, timedelta
Import pymysql
# for class operation of the database
class the MySqlCommand (Object):
# class initialization
DEF __init__ (Self):
self.host = " 127.0.0.1 "
self.port = 3306 # port number
self.user = " root " # username
self.password = "" #密码
self.db = "" #库
self.table = "" #表
#连接数据库
def connectMysql(self):
try:
self.conn = pymysql.connect(host=self.host,port=self.port,user=self.user,
passwd=self.password,db=self.db,charset='utf8')
self.cursor = self.conn.cursor()
return self.cursor,self.conn
except:
Print ( ' . Connect MySQL error ' )
# Gets the start of ranking movies url
DEF GET_URL (root_url, Start):
return root_url + " Start =? " + str (Start) + " & "
DEF get_review (page_url):
"" " access to relevant information film "" "
the Cursor, db = the MySqlCommand (). connectMysql ()
# creat_table =" "" CREATTE TABLE douban (the above mentioned id INT (11) the NOT NULL AUTO_INCREMENT PRIMARY KEY, Rank VARCHAR (128), title VARCHAR (128 ), Score VARCHAR (128), descs VARCHAR (128)) "" "
creat_table = ( "CREATE TABLE douban("
"rank varchar(255),"
"title varchar(255),"
"score varchar(255),"
"descs varchar(255))")
cursor.execute("DROP TABLE IF EXISTS douban")
cursor.execute(creat_table)
movies_list = []
reponse = requests.get(page_url)
soup =BeautifulSoup(reponse.text,'lxml')
soup = soup.find("ol","grid_view")
dict ={}
for tag_li in soup.find_all("li"):
dict = {}
dict['rank'] = tag_li.find("em").string
dict['title'] = tag_li.find_all("span","title")[0].string
dict['score'] = tag_li.find("span","rating_num").string
if tag_li.find("span","inq"):
dict['desc'] =tag_li.find("span","inq").string
else:
dict['desc'] = '无评词'
cursor.execute("INSERT INTO douban(rank,title,score,descs)\
VALUES(%s,%s,%s,%s)",\
(dict['rank'],dict['title'],dict['score'],dict['desc']))
db.commit()
db.close()
#movies_list.append(dict)
#return movies_list
if __name__ == '__main__':
root_url = "https://movie.douban.com/top250"
start =0
movies_list =get_review(get_url(root_url,start))
# for movies in movies_list:
# print(movies)
result: