Scrapy框架使用代理

使用代理抓取https://www.baidu.com/s?wd=ip

# -*- coding: utf-8 -*-
'''
一、在settings.py中开启中间件
DOWNLOADER_MIDDLEWARES = {
   'daili.middlewares.DailiDownloaderMiddleware': 543,
}

二、在middlewares.py中重写DailiDownloaderMiddleware的process_request方法
class DailiDownloaderMiddleware(object):
    def process_request(self, request, spider):
        request.meta['proxy'] = 'http://124.205.155.156:9090'
        return None
'''
import scrapy

class DlSpider(scrapy.Spider):
    name = 'dl'
    allowed_domains = ['www.baidu.com']
    start_urls = ['https://www.baidu.com/s?wd=ip']

    def parse(self, response):
        content = response.text
        print("*"*100)
        print(content)
        with open('ip.html','w',encoding='utf-8') as fp:
            fp.write(content)

  

猜你喜欢

转载自www.cnblogs.com/huanggaoyu/p/10657225.html