# -*- coding: utf-8 -*- # Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html import csv import xlwt class GanjiPipeline(object): def __init__(self): self.line = 1 self.file_name = "赶集.xls" self.book = xlwt.Workbook(encoding="utf8") self.sheet = self.book.add_sheet("租房数据") self.sheet.col(0).width = 256 * 70 # Set the column w self.sheet.col(1).width = 256 * 10 self.sheet.col(2).width = 256 * 10 self.sheet.col(3).width = 256 * 10 self.sheet.col(4).width = 256 * 50 self.tall_style = xlwt.easyxf('font:height 300') first_row = self.sheet.row(0) first_row.set_style(self.tall_style) head = ["标题","价格","面积","朝向","地址"] for h in head: self.sheet.write(0,head.index(h),h) def process_item(self, item, spider): self.sheet.write(self.line, 0, item['title']) self.sheet.write(int(self.line), 1, item['price']) self.sheet.write(int(self.line), 2, item['size']) self.sheet.write(int(self.line), 3, item['chaoxiang']) self.sheet.write(int(self.line), 4, item['address']) self.sheet.row(self.line).set_style(self.tall_style) self.line = self.line + 1 def close_spider(self,spider): self.book.save(self.file_name) def optimizeContent(self,res): res = res.replace('b\'', '') res = res.replace('\\n', '') res = res.replace('\'', '') res = res.replace('style', 'nouse') res = res.replace('\.', '') return res