scrapy inline_requests 多次请求连用

# -*- coding: utf-8 -*-
import json
import time
import scrapy
from inline_requests import inline_requests


class CoscoSpider(scrapy.Spider):
    name = 'cosco'
    allowed_domains = ['elines.coscoshipping.com']
    start_urls = ['http://elines.coscoshipping.com/ebusiness/vesselParticulars/vesselParticularsByServices']

    def parse(self, response):
        t = int(time.time() * 10000)
        url = 'http://elines.coscoshipping.com/ebbase/public/general/findLineGroup?timestamp={}'.format(t)
        yield scrapy.Request(url=url, callback=self.parse_zhou_line, meta={'t': t})

    # 解析洲信息
    @inline_requests
    def parse_zhou_line(self, response):
        t = response.meta['t']
        data_line = json.loads(response.text)
        #获取大洲信息
        zhou_line_all = data_line.get('data').get('content')
        for zhou_line in zhou_line_all:
            # 洲名
            zhou_name = zhou_line.get('description')
            # 洲代码
            line_code = zhou_line.get('code')

            url = 'http://elines.coscoshipping.com/ebbase/public/general/findLines?lineCode={}&timestamp={}'.format(
                line_code, t)
            response = yield scrapy.Request(method='GET', url=url, dont_filter=True)
            zhou_de = json.loads(response.text)
            zhou_line_detail = zhou_de.get('data').get('content')
            for line_detail in zhou_line_detail:
                # 创建次航线名字典
                code = line_detail.get('code')
                url = 'http://elines.coscoshipping.com/ebbase/public/vesselParticulars/search?pageSize=3' \
                      '&pageNum=1&state=lines&code={}&timestamp={}'.format(code, t)
                response = yield scrapy.Request(url=url, dont_filter=True)
                voyage_de = json.loads(response.text)
                voyage_line_detail = voyage_de.get('data').get('content')
                if voyage_line_detail:
                    for voyage in voyage_line_detail:
                        # 遍历州航线列表详情,获取航线信息
                        voyage_line = dict()
                        voyage_line['line_code'] = voyage.get('serviceLoopAbbrv')
                        voyage_line['voyage_code'] = voyage.get('vesselCode')
                        voyage_line['vessel_Name'] = voyage.get('vesselName')
                        voyage_line['lloyds_number'] = voyage.get('lloydsNumber')
                        voyage_line['flag'] = voyage.get('flagCountry')
                        voyage_line['built_year'] = voyage.get('yearBuilt')
                        voyage_line['callSign'] = voyage.get('callSign')
                        url = 'http://elines.coscoshipping.com/ebbase/public/general/findVesselByCode?code={}&timestamp={}'.format(
                            voyage_line['voyage_code'], t)
                        response = yield scrapy.Request(method='GET', url=url, dont_filter=True)
                        # 空列表(船信息)
                        voyage_line['vessel_info'] = []
                        data = json.loads(response.text)
                        # 船东
                        op_name = data.get('data').get('content').get('optName')
                        # 所有人
                        owner = data.get('data').get('content').get('owner')
                        # 注册港
                        registry_port = data.get('data').get('content').get('registryPort')
                        # 船籍社
                        class_society = data.get('data').get('content').get('classSociety')
                        # 总重
                        weight = data.get('data').get('content').get('grossTonnage')
                        # 净重
                        net_tonnage = data.get('data').get('content').get('netTonnage')
                        # 装载量
                        to_teuCap = data.get('data').get('content').get('totTeuCap')
                        # 最大航速
                        max_speed = data.get('data').get('content').get('maxSpeed')
                        vessel_detail = {
                            'op_name': op_name,
                            'owner': owner,
                            'registry_port': registry_port,
                            'class_society': class_society,
                            'weight': weight,
                            'net_tonnage': net_tonnage,
                            'to_teuCap': to_teuCap,
                            'max_speed': max_speed,
                        }
                        voyage_line['vessel_info'].append(vessel_detail)
                        yield voyage_line

猜你喜欢

转载自www.cnblogs.com/linpd/p/10316012.html