python crawling IMDb top250 data into the database

# - * - Coding: UTF-8 - * - 
"" " get time movie critic " "" 
Import Requests
 from BS4 Import BeautifulSoup
 from datetime Import datetime, timedelta
 Import pymysql 

# for class operation of the database 
class the MySqlCommand (Object):
     # class initialization 
    DEF  __init__ (Self): 
        self.host = " 127.0.0.1 " 
        self.port = 3306 # port number 
        self.user = " root "  # username
        self.password = "" #密码
        self.db = "" #
        self.table = "" #

    #连接数据库
    def connectMysql(self):
        try:
            self.conn = pymysql.connect(host=self.host,port=self.port,user=self.user,
                                        passwd=self.password,db=self.db,charset='utf8')
            self.cursor = self.conn.cursor()
            return self.cursor,self.conn
        except:
            Print ( ' . Connect MySQL error ' ) 


# Gets the start of ranking movies url 
DEF GET_URL (root_url, Start):
     return root_url + " Start =? " + str (Start) + " & " 

DEF get_review (page_url): 

    "" " access to relevant information film "" " 

    the Cursor, db = the MySqlCommand (). connectMysql () 

    # creat_table =" "" CREATTE TABLE douban (the above mentioned id INT (11) the NOT NULL AUTO_INCREMENT PRIMARY KEY, Rank VARCHAR (128), title VARCHAR (128 ), Score VARCHAR (128), descs VARCHAR (128)) "" " 
    creat_table = ( "CREATE TABLE douban("
           "rank varchar(255),"
           "title varchar(255),"
           "score varchar(255),"
           "descs varchar(255))")

    cursor.execute("DROP TABLE IF EXISTS douban")
    cursor.execute(creat_table)
    movies_list = []
    reponse = requests.get(page_url)
    soup =BeautifulSoup(reponse.text,'lxml')
    soup = soup.find("ol","grid_view")
    dict ={}
    for tag_li in soup.find_all("li"):
        dict = {}
        dict['rank'] = tag_li.find("em").string
        dict['title'] = tag_li.find_all("span","title")[0].string
        dict['score'] = tag_li.find("span","rating_num").string
        if tag_li.find("span","inq"):
            dict['desc'] =tag_li.find("span","inq").string
        else:
            dict['desc'] = '无评词'
        cursor.execute("INSERT INTO douban(rank,title,score,descs)\
          VALUES(%s,%s,%s,%s)",\
        (dict['rank'],dict['title'],dict['score'],dict['desc']))

    db.commit()
    db.close()

        #movies_list.append(dict)
    #return movies_list

if __name__ == '__main__':

    root_url = "https://movie.douban.com/top250"
    start =0
    movies_list =get_review(get_url(root_url,start))
    # for movies in movies_list:
    #     print(movies)

result:

 

Guess you like

Origin www.cnblogs.com/venvive/p/11360654.html