域名错误 class WendaSpider(scrapy.Spider): name = 'wenda' # allowed_domains = ['autohome.com'] #域名错误会导致后续的URL无法继续解析,所以要求要么不写。要写就必须确保写对~@@@@ start_urls = ['https://wenda.autohome.com.cn/topic/list-0-0-0-0-0-1'] item = QichezhijiaItem() n = 1 def parse(self, response): detail_url = response.xpath('//ul[@class="question-list"]//h4/a/@href').extract() for url in detail_url: url = response.urljoin(url) yield scrapy.Request(url, callback=self.parse_info) print(response.urljoin(url)) next_url = response.xpath('//div[@class="athm-page__info"]/a/@href').extract_first() if next_url: yield scrapy.Request(response.urljoin(next_url), callback=self.parse) def parse_info(self, response): titles = response.xpath('//h1[@class="card-title"]/text()').extract_first() requests = response.xpath('//div[@class="card-content "]//p/text()').extract_first() level = '--'.join(response.xpath('//ul[@class="card-tag-list"]/li/text()').extract()) answer_url = response.xpath('//div[@class="text-wrap"]/a[@class="text"]/@href').extract() # answer_imgs = [] # contentss = [] # zan = [] for url in answer_url: yield scrapy.Request(response.urljoin(url),callback=self.parse_answer)
scrapy 框架中遇到的bug~~~~~~持续更新
猜你喜欢
转载自blog.csdn.net/qq_42709587/article/details/81877249
今日推荐
周排行