1.在pipelines.py中自定义自己的pipeline
from openpyxl import Workbook
class ExcelPipeline(object):
def __init__(self):
self.wb = Workbook()
self.ws = self.wb.active
self.ws.append(['工作名称', '工作地点', '薪资', '公司名称', '工作经验', '学历', '招收人数', '发布时间', '工作信息'])
def process_item(self, item, spider):
line = [item['job_name'], item['job_place'], item['salary'], item['company'], item['experience'], item['education_level'], item['person_num'], item['release_time'], item['info']]
self.ws.append(line)
self.wb.save('job.xlsx')
return item
2.在settings.py中开启自己的pipeline
ITEM_PIPELINES = {
'Job.pipelines.ExcelPipeline': 1,
}