Object Oriented Film and Xpath crawling cat Top 100
import time import json import requests from lxml import etree class MaoYanTop100(object): def __init__(self): self.url = 'https://maoyan.com/board/4' self.headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36" } def get_data(self, url, param): response = requests.get(url, params=param, headers=self.headers, verify=False) print(response.request.url) return response.text def parse_content(self, text): dom = etree.HTML(text) return dom def parse(self, dom): nodes = dom.xpath('//dd') for node in nodes: yield { 'title': node.xpath('.//p/a/@title')[0], 'actor': node.xpath('.//p[@class="star"]/text()')[0].strip(), 'time': node.xpath('.//p[@class="releasetime"]/text()')[0] } def save(self, f, item): f.write(json.dumps(item, indent=2, ensure_ascii=False) + ',\n') def run(self): page = int(input('请输入页码:')) with open('maoyan.json', ''in) as f_obj: for i in range(page): param = { 'start': i * 10 } text = self.get_data(self.url, param) time.sleep(1) dom = self.parse_content(text) my_generator = self.parse(dom) while True: try: # item_list = [] item =Next (my_generator) # item_list.append (Item) self.save (f_obj, Item) the except : BREAK Print (F ' of {i + 1} completed page data saved ' ) IF the __name__ == ' __main__ ' : maoyan = MaoYanTop100 () maoyan.run ()