借鉴scrapy三大基本模块

request

class Request(object):
def init(self, url, method=‘GET’, params=None, headers=None, data=None,parse=‘parse’,meta=None):
self.url = url
self.method = method
self.params = params
self.headers = headers
self.data = data
self.parse = parse
self.meta = meta

response

from lxml import html
import json
import re

class Response(object):
def init(self, url, status_code, headers, body, meta=None):
self.url = url
self.status_code = status_code
self.headers = headers
self.body = body
self.meta = meta

def xpath(self, rule):  # 添加xpath的方法
    res = html.etree.HTML(self.body)
    return res.xpath(rule)

@property
def json(self):  # 添加解析json的方法
    return json.loads(self.body)

def findall(self, rule, body):  # 添加正则的方法
    return re.findall(rule, body)

item

class Item(object):
def init(self,data):
self._data = data
@property
def data(self):
return self._data

猜你喜欢

转载自blog.csdn.net/s2965713873/article/details/82862064