pyspider添加请求头部headers

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u014535666/article/details/83653064

class Handler(BaseHandler):
    crawl_config = {     

    }
    @every(minutes=24 * 60)
    def on_start(self):
        
        self.crawl('http://www.doutula.com/article/list/?page=1',headers={
        'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding':'gzip, deflate',
        'Accept-Language':'zh-CN,zh;q=0.8',
        'Cache-Control':'max-age=0',
        'Connection':'keep-alive',
        'Host':'www.doutula.com',         
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.90 Safari/537.36 2345Explorer/9.5.0.17997'
    },
         callback=self.index_page,validate_cert=False)

    @config(age=10 * 24 * 60 * 60)
    def index_page(self, response):
        for each in response.doc('a[href^="http"]').items():
            self.crawl(each.attr.href, callback=self.detail_page)

    @config(priority=2)
    def detail_page(self, response):
        return {
            "url": response.url,
            "title": response.doc('title').text(),
        }
   

猜你喜欢

转载自blog.csdn.net/u014535666/article/details/83653064