学习随笔 Scrapy项目抓取天气预报

关于XPath和CSSpath路径选取可以用打开浏览器F12选中元素点击鼠标右键选取。

weather/items.py

# -*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html

import scrapy


class WeatherItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    date = scrapy.Field()
    week = scrapy.Field()
    weather = scrapy.Field()
    maxTemperature = scrapy.Field()
    minTemperature = scrapy.Field()

weather/pipelines.py

# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
import time, os.path

class WeatherPipeline(object):
    def process_item(self, item, spider):
        today = time.strftime('%Y%m%d', time.localtime())
        filename = today + '.txt'

        with open(filename, 'a') as fp:
            fp.write(item['date'] + '\t')
            fp.write(item['week'] + '\t')
            fp.write(item['weather'] + '\t')
            fp.write(item['maxTemperature'] + '\t')
            fp.write(item['minTemperature'] + '\n\n')

        return item

weather/spider/WeatherSpider.py

# -*- coding: utf-8 -*-
import scrapy
from weather.items import WeatherItem


class WeatherspiderSpider(scrapy.Spider):
    name = 'WeatherSpider'
    allowed_domains = ['www.tianqi.com/shaoxing']
    start_urls = ['http://www.tianqi.com/shaoxing/']

    def parse(self, response):
        subSelector = response.xpath('//div[@class="day7"]')

        tmp = [[], [], [], [], []]
        for sub in subSelector:
           for date in sub.xpath('./ul[@class="week"]/li/b//text()').extract():
               tmp[0].append(date)

           for week in sub.xpath('./ul[@class="week"]/li/span//text()').extract():
               tmp[1].append(week)

           for weather in sub.xpath('./ul[@class="txt txt2"]/li//text()').extract():
               tmp[2].append(weather)

           for maxTemperature in sub.xpath('./div[@class="zxt_shuju"]/ul/li/span//text()').extract():
               tmp[3].append(maxTemperature)

           for minTemperature in sub.xpath('./div[@class="zxt_shuju"]/ul/li/b//text()').extract():
               tmp[4].append(minTemperature)

        items = []
        for i in range(len(tmp[0])):
            item = WeatherItem()
            item['date'] = tmp[0][i]
            item['week'] = tmp[1][i]
            item['weather'] = tmp[2][i]
            item['maxTemperature'] = tmp[3][i]
            item['minTemperature'] = tmp[4][i]
            items.append(item)

        return items

今日略懒，明天火力全开！

学习随笔 Scrapy项目抓取天气预报

猜你喜欢