scrapy collected data into excel

# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
import csv
import xlwt

class GanjiPipeline(object):

    def __init__(self):
        self.line = 1
        self.file_name = "赶集.xls"
        self.book = xlwt.Workbook(encoding="utf8")
        self.sheet = self.book.add_sheet("租房数据")
        self.sheet.col(0).width = 256 * 70  # Set the column w
        self.sheet.col(1).width = 256 * 10
        self.sheet.col(2).width = 256 * 10
        self.sheet.col(3).width = 256 * 10
        self.sheet.col(4).width = 256 * 50

        self.tall_style = xlwt.easyxf('font:height 300')
        first_row = self.sheet.row(0)
        first_row.set_style(self.tall_style)

        head = ["标题","价格","面积","朝向","地址"]
        for h in head:
            self.sheet.write(0,head.index(h),h)



    def process_item(self, item, spider):
        self.sheet.write(self.line, 0, item['title'])
        self.sheet.write(int(self.line), 1, item['price'])
        self.sheet.write(int(self.line), 2, item['size'])
        self.sheet.write(int(self.line), 3, item['chaoxiang'])
        self.sheet.write(int(self.line), 4, item['address'])
        self.sheet.row(self.line).set_style(self.tall_style)
        self.line = self.line + 1

    def close_spider(self,spider):
        self.book.save(self.file_name)

    def optimizeContent(self,res):
        res = res.replace('b\'', '')
        res = res.replace('\\n', '')
        res = res.replace('\'', '')
        res = res.replace('style', 'nouse')
        res = res.replace('\.', '')
        return res

 

Guess you like

Origin www.cnblogs.com/php-linux/p/12504835.html