scrapy 自定义扩展的功能

scrapy.telnet里有一个TelnetConsole类

class TelnetConsole(protocol.ServerFactory):

    def __init__(self, crawler):
        if not crawler.settings.getbool('TELNETCONSOLE_ENABLED'):
            raise NotConfigured
        if not TWISTED_CONCH_AVAILABLE:
            raise NotConfigured
        self.crawler = crawler
        self.noisy = False
        self.portrange = [int(x) for x in crawler.settings.getlist('TELNETCONSOLE_PORT')]
        self.host = crawler.settings['TELNETCONSOLE_HOST']
        self.crawler.signals.connect(self.start_listening, signals.engine_started)
        self.crawler.signals.connect(self.stop_listening, signals.engine_stopped)

    @classmethod
    def from_crawler(cls, crawler):
        return cls(crawler)

    def start_listening(self):
        self.port = listen_tcp(self.portrange, self.host, self)
        h = self.port.getHost()
        logger.debug("Telnet console listening on %(host)s:%(port)d",
                     {'host': h.host, 'port': h.port},
                     extra={'crawler': self.crawler})

    def stop_listening(self):
        self.port.stopListening()

    def protocol(self):
        telnet_vars = self._get_telnet_vars()
        return telnet.TelnetTransport(telnet.TelnetBootstrapProtocol,
            insults.ServerProtocol, manhole.Manhole, telnet_vars)

可以自己扩展爬虫在出现哪个信号时做出什么行为。

里面有以下信号:

engine_started = object()
engine_stopped = object()
spider_opened = object()
spider_idle = object()
spider_closed = object()
spider_error = object()
request_scheduled = object()#请求放入调度器
request_dropped = object()#丢弃请求
response_received = object()#响应被接收
response_downloaded = object()#响应被下载
item_scraped = object()#获得item
item_dropped = object()#丢弃item

例如我在更目录下创建一个extend.py文件,里面创建一个类MyExtend,在settings中设置ITEM_PIPELINES = { 'shan.pipelines.ShanPipeline': 300, }

from scrapy import signals


class MyExtend:

    def __init__(self, crawler):
        self.crawler = crawler
        self.crawler.signals.connect(self.start, signals.engine_started)
        self.crawler.signals.connect(self.stop, signals.engine_stopped)
    @classmethod
    def from_crawler(cls, crawler):
        return cls(crawler)


    def start(self):
        print("signals.engine_started")

    def stop(self):
        print("signals.engine_stop")

我规定在引擎开启时打印signals.engine_started,引擎关闭时打印signals.engine_stop。

(venv) D:\shan>scrapy crawl chouti --nolog
D:\shan\shan\spiders\chouti.py:9: ScrapyDeprecationWarning: Module `scrapy.dupefilter` is deprecated, use `scrapy.dupefilters` instead
  from scrapy.dupefilter import RFPDupeFilter
D:\shan\shan\spiders\chouti.py:11: ScrapyDeprecationWarning: Module `scrapy.telnet` is deprecated, use `scrapy.extensions.telnet` instead
  from scrapy.telnet import TelnetConsole
signals.engine_started
{"result":{"code":"9999", "message":"", "data":{"complateReg":"0","destJid":"cdu_53923279913"}}}
{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671818766000","lvCount":"13","nick":"Danbro","uvCount":"30","voteTime":"小于1分钟前"}
}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819521000","lvCount":"7","nick":"Danbro","uvCount":"31","voteTime":"小于1分钟前"}}
}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819621000","lvCount":"9","nick":"Danbro","uvCount":"35","voteTime":"小于1分钟前"}}
}
{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819634000","lvCount":"20","nick":"Danbro","uvCount":"35","voteTime":"小于1分钟前"}
}}
{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819614000","lvCount":"124","nick":"Danbro","uvCount":"35","voteTime":"小于1分钟前"
}}}
{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819663000","lvCount":"32","nick":"Danbro","uvCount":"35","voteTime":"小于1分钟前"}
}}
signals.engine_stop

开启爬虫时可以看见这两句话。

猜你喜欢

转载自blog.csdn.net/u014248032/article/details/83088358