关于XPath和CSSpath路径选取可以用打开浏览器F12选中元素点击鼠标右键选取。
weather/items.py
# -*- coding: utf-8 -*- # Define here the models for your scraped items # # See documentation in: # https://doc.scrapy.org/en/latest/topics/items.html import scrapy class WeatherItem(scrapy.Item): # define the fields for your item here like: # name = scrapy.Field() date = scrapy.Field() week = scrapy.Field() weather = scrapy.Field() maxTemperature = scrapy.Field() minTemperature = scrapy.Field()weather/pipelines.py
# -*- coding: utf-8 -*- # Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html import time, os.path class WeatherPipeline(object): def process_item(self, item, spider): today = time.strftime('%Y%m%d', time.localtime()) filename = today + '.txt' with open(filename, 'a') as fp: fp.write(item['date'] + '\t') fp.write(item['week'] + '\t') fp.write(item['weather'] + '\t') fp.write(item['maxTemperature'] + '\t') fp.write(item['minTemperature'] + '\n\n') return item
weather/spider/WeatherSpider.py
# -*- coding: utf-8 -*- import scrapy from weather.items import WeatherItem class WeatherspiderSpider(scrapy.Spider): name = 'WeatherSpider' allowed_domains = ['www.tianqi.com/shaoxing'] start_urls = ['http://www.tianqi.com/shaoxing/'] def parse(self, response): subSelector = response.xpath('//div[@class="day7"]') tmp = [[], [], [], [], []] for sub in subSelector: for date in sub.xpath('./ul[@class="week"]/li/b//text()').extract(): tmp[0].append(date) for week in sub.xpath('./ul[@class="week"]/li/span//text()').extract(): tmp[1].append(week) for weather in sub.xpath('./ul[@class="txt txt2"]/li//text()').extract(): tmp[2].append(weather) for maxTemperature in sub.xpath('./div[@class="zxt_shuju"]/ul/li/span//text()').extract(): tmp[3].append(maxTemperature) for minTemperature in sub.xpath('./div[@class="zxt_shuju"]/ul/li/b//text()').extract(): tmp[4].append(minTemperature) items = [] for i in range(len(tmp[0])): item = WeatherItem() item['date'] = tmp[0][i] item['week'] = tmp[1][i] item['weather'] = tmp[2][i] item['maxTemperature'] = tmp[3][i] item['minTemperature'] = tmp[4][i] items.append(item) return items今日略懒,明天火力全开!