scrapy中的items是进行数据中的清洗

import scrapy

from scrapy.loader import ItemLoader

from scrapy.loader.processors import TakeFirst,MapCompose

注释:求最低工资和最高工资的平均值
def chage_salary(s_value):
注释:如果-不在s_value中就说明是工资面议
if “-” not in s_value:
return s_value
all_salary = s_value.split("-")
avg_salary = 0
if len(all_salary) == 2:
注释:这是做的工资的切片
avg_salary = (float(all_salary[0][:-1]) + float(all_salary[1][:-1]))/2
else:
注释:只有最低或者最高工资
all_salary = float(all_salary[0][:-1])

return "{}k".format(avg_salary)

class JobItemLoder(ItemLoader):
default_output_processor = TakeFirst()

class SimulatorspiderItem(scrapy.Item):
# define the fields for your item here like:
salary = scrapy.Field(
input_processor = MapCompose(chage_salary)
)
position = scrapy.Field()

猜你喜欢

转载自blog.csdn.net/weixin_44274975/article/details/88358260