the data is written in scrapy mongodb

Open the pipeline in 1.setting.py

ITEM_PIPELINES = {
   # 'tianmao.pipelines.TianmaoPipeline': 300,
}

2.setting.py write configuration mongodb

# MongoDB 
HOST = " 127.0.0.1 "   # server address 
PORT = 27017   # mongo default port number for 
the USER = " User Name " 
PWD = " password " 
DB = " database name " 
TABLE = " table name "

3.pipeline.py file Pour pymongo, data is written to the database

from pymongo import MongoClient
class TianmaoPipeline(object):
    def __init__(self, host, port, user, pwd, db, table):
        self.host = host
        self.port = port
        self.user = user
        self.pwd = pwd
        self.db = db
        self.table = table

    @classmethod
    def from_crawler(cls, crawler):
        HOST = crawler.settings.get('HOST')
        PORT = crawler.settings.get('PORT')
        USER = crawler.settings.get('USER')
        PWD = crawler.settings.get('PWD')
        DB = crawler.settings.get('DB')
        TABLE = crawler.settings.get('TABLE')
        return cls(HOST, PORT, USER, PWD, DB, TABLE)

    def open_spider(self, spider):
        self.client = MongoClient('mongodb://%s:%s@%s:%s' %(self.user,self.pwd,self.host,self.port))

    def close_spider(self, spider):
        self.client.close()

    def process_item(self, item, spider):
        self.client[self.db][self.table].save(dict(item))

 

Guess you like

Origin www.cnblogs.com/ptwg/p/11538302.html