import pymysql
class HongxiuPipeline(object):
# open_spider()和close_spider():只在爬虫被打开和关闭时,执行一次。
def open_spider(self, spider):
self.connect = pymysql.connect(
host='localhost',
user='root',
port=3306,
passwd='123456',
db='hongxiu',
charset='utf8'
)
self.cursor = self.connect.cursor()
def process_item(self, item, spider):
insert_sql = "INSERT INTO hx(title, author, tags, total_word_num, keep_num, click_num, info) VALUES (%s, %s, %s, %s, %s, %s, %s)"
self.cursor.execute(insert_sql, (
item['title'], item['author'], item['tags'], item['total_word_num'], item['keep_num'], item['click_num'],
item['info']))
self.connect.commit()
def close_spider(self, spider):
self.cursor.close()
self.connect.close()
四、我们还要在settings中放开我们的 ITEM_PIPELINES
ITEM_PIPELINES = {
'hongxiu.pipelines.HongxiuPipeline': 300,
}