Open the pipeline in 1.setting.py
ITEM_PIPELINES = { # 'tianmao.pipelines.TianmaoPipeline': 300, }
2.setting.py write configuration mongodb
# MongoDB HOST = " 127.0.0.1 " # server address PORT = 27017 # mongo default port number for the USER = " User Name " PWD = " password " DB = " database name " TABLE = " table name "
3.pipeline.py file Pour pymongo, data is written to the database
from pymongo import MongoClient
class TianmaoPipeline(object): def __init__(self, host, port, user, pwd, db, table): self.host = host self.port = port self.user = user self.pwd = pwd self.db = db self.table = table @classmethod def from_crawler(cls, crawler): HOST = crawler.settings.get('HOST') PORT = crawler.settings.get('PORT') USER = crawler.settings.get('USER') PWD = crawler.settings.get('PWD') DB = crawler.settings.get('DB') TABLE = crawler.settings.get('TABLE') return cls(HOST, PORT, USER, PWD, DB, TABLE) def open_spider(self, spider): self.client = MongoClient('mongodb://%s:%s@%s:%s' %(self.user,self.pwd,self.host,self.port)) def close_spider(self, spider): self.client.close() def process_item(self, item, spider): self.client[self.db][self.table].save(dict(item))