[Rookie] learn to use Python object-oriented and climb Xpath take Cat's Eye Film Top 100

Object Oriented Film and Xpath crawling cat Top 100

import time
import json
import requests
from lxml import etree


class MaoYanTop100(object):
    def __init__(self):
        self.url = 'https://maoyan.com/board/4'
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36"
        }

    def get_data(self, url, param):
        response = requests.get(url, params=param, headers=self.headers, verify=False)
        print(response.request.url)
        return response.text

    def parse_content(self, text):
        dom = etree.HTML(text)
        return dom

    def parse(self, dom):
        nodes = dom.xpath('//dd')
        for node in nodes:
            yield {
                'title': node.xpath('.//p/a/@title')[0],
                'actor': node.xpath('.//p[@class="star"]/text()')[0].strip(),
                'time': node.xpath('.//p[@class="releasetime"]/text()')[0]
            }

    def save(self, f, item):
        f.write(json.dumps(item, indent=2, ensure_ascii=False) + ',\n')

    def run(self):
        page = int(input('请输入页码:'))
        with open('maoyan.json', ''in) as f_obj:
            for i in range(page):
                param = {
                    'start': i * 10
                }
                text = self.get_data(self.url, param)
                time.sleep(1)
                dom = self.parse_content(text)
                my_generator = self.parse(dom)
                while True:
                    try:
                        # item_list = []
                        item =Next (my_generator)
                         # item_list.append (Item) 
                        self.save (f_obj, Item)
                     the except :
                         BREAK 
                Print (F ' of {i + 1} completed page data saved ' ) 


IF  the __name__ == ' __main__ ' : 
    maoyan = MaoYanTop100 () 
    maoyan.run ()

 

Guess you like

Origin www.cnblogs.com/liduo0413/p/11513092.html