持久化存储管道

import pymysql
from redis import Redis


class HuyaproPipeline(object):
fp = None

def open_spider(self, spider):
print('i am open_spider()')
self.fp = open('huyazhibo.txt', 'w', encoding='utf-8')

def process_item(self, item, spider): # item就是接收到爬虫类提交过来的item对象

self.fp.write(item['title'] + ':' + item['author'] + ':' + item['hot'] + '\n')
print(item['title'], '写入成功!!!')
return item

def close_spider(self, spider):
self.fp.close()
print('i am close_spider()')


class mysqlPipeLine(object):
# 连接状态
conn = None
# 定义游标
cursor = None

# 开启爬虫
def open_spider(self, spider):
# 连接数据库
self.conn = pymysql.Connect(host='127.0.0.1', port=3306, user='root', password='123', db='Spider',
charset='utf8') # utf-8会报错
print(self.conn)

def process_item(self, item, spider):
# 格式化
sql = 'insert into huya values("%s","%s","%s")' % (item['title'], item['author'], item['hot'])
self.cursor = self.conn.cursor()
try:
self.cursor.execute(sql)
# 没有异常时提交数据
self.conn.commit()
except Exception as e:
print(e)
# 回滚
self.conn.rollback()
return item

# 关闭爬虫
def close_spider(self, spider):
self.cursor.close()
self.conn.close()


class RedisPipeLine(object):
conn = None

def open_spider(self, spider):
self.conn = Redis(host='127.0.0.1', port=6379)

def process_item(self, item, spider):
self.conn.lpush('huyaList', item)
return item

猜你喜欢

转载自www.cnblogs.com/zhang-da/p/12350128.html