猫眼电影top100

from urllib import request,parse
from bs4 import BeautifulSoup

class CatEye():
    def __init__(self):
        self.url='https://maoyan.com/board/4?offset={}'
        # https://maoyan.com/board/4?offset=10
        self.headers = { 'User-Agent':'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; InfoPath.3)' }

    #获取url
    def get_url(self,page_n):
        params=page_n*10
        full_url=self.url.format(params)
        return full_url

    #获取html
    def get_html(self,url):
        req=request.Request(url,headers=self.headers)
        response=request.urlopen(req)
        html=response.read().decode()
        return html


    #解析
    def get_info(self,html):
        soup=BeautifulSoup(html)
        movie_list_soup=soup.find("dl",attrs={'class':"board-wrapper"})
        movie_name_list=[]
        for dd in movie_list_soup.find_all('dd'):
            movie_name_tmp=dd.find("p",attrs={'class':'name'})
            movie_name=movie_name_tmp.find("a").getText().strip()
            actors=dd.find("p",attrs={'class':'star'}).getText().strip()
            time=dd.find("p",attrs={'class':'releasetime'}).getText().strip()
            movie_info=movie_name+'      '+actors+'     '+time
            movie_name_list.append(movie_info)

        return movie_name_list

    #存储
    def save_info(self,filename,info):
        with open(filename,'w') as f:
            f.write(info)

    def runforever(self):
        info=''
        for i in range(10):
            url=self.get_url(i)
            html=self.get_html(url)
            movie_list=self.get_info(html)
            info+='\r\n'.join(movie_list)
            info+='\r\n'
        filename='CatEye_Board'
        self.save_info(filename,info)


if __name__ == '__main__':
    spider=CatEye()
    spider.runforever()

猫眼电影top100链接:

https://maoyan.com/board/4

基本思路和豆瓣TOP250是一样的,需要用到request和beautifulsoup模块。输出有点丑不要介意哈。

猜你喜欢

转载自www.cnblogs.com/jiaqi77/p/12167293.html