scrapy-items

items定义字段名字

import scrapy


class SpideranythingItem(scrapy.Item):
    # define the fields for your item here like:
    title = scrapy.Field()
    position = scrapy.Field()
    pub_date = scrapy.Field()

当怕爬取到数据时

            item = SpideranythingItem()
            item['title'] = data.xpath("./td[1]/a/text()").extract_first()
            item['position'] = data.xpath("./td[2]/text()").extract_first()
            item['pub_date'] = data.xpath("./td[5]/text()").extract_first()

pipelines储存进mongodb, 需将数据转换成dict

from pymongo import MongoClient
client = MongoClient()
collection = client['SpiderAnything']['hr']


class SpideranythingPipeline(object):
    def process_item(self, item, spider):
        if spider.name == 'hr':
            print(item)

            collection.insert(dict(item))
            return item

猜你喜欢

转载自www.cnblogs.com/tangpg/p/10685266.html