Reptile Case - operation and maintenance work orders climb

 

Source:

# coding=utf-8
import requests
from lxml import etree


class ChaxunSpdier:
    def __init__(self):
        self.start_url = 'http://111.40.232.237:9000/eoms35/sheet/complaint/complaint.do?method=performQuery'
        self.part_url = 'http://111.40.232.237:9000/eoms35/sheet/complaint/'
        self.headers = {
            'Connection': 'keep-alive',
            'Cookie': ' TSJSESSIONID = 0000YvxNFfPYx8EBo8lsKNrKIl6: 1bkt8lo7d ' , # each had a change 
            ' Host ' : ' 111.40.232.237:9000 ' ,
             ' Referer ' : ' http://111.40.232.237:9000/eoms35/sheet/complaint/complaint ? .do Method, = showQueryPage & of the type = interface & urlType = Complaint & userName = liuhaoce & workSerial = 0 & isDutyMaster = false & workSerialTime = & startDuty = & endDuty = ' ,
             ' the User-Agent ' : ' Mozilla / 5.0 (Windows NT 6.1; Win64; x64) AppleWebKit / 537.36 (KHTML, like Gecko ) Chrome / 80.0.3987.116 Safari/537.36'}

    def parse_url(self, url):
        formdata = {
            'sheetIdStringExpression': 'like',
            'main.sheetId': '',  # 工单流水号
            'titleStringExpression': 'like',
            'main.title': '',
            'main.status': '',
            'statusChoiceExpression': '0',
            'task.taskName': '',
            'sendRoleIdStringExpression': 'in',
            'main.sendRoleId': '',
            'sendDeptIdStringExpression': 'in',
            'main.sendDeptId': '',
            'sendUserIdStringExpression': 'in',
            'main.sendUserId': '',
            'operateRoleIdStringExpression': 'in',
            'link.operateRoleId': '',
            'operateDeptIdStringExpression': 'in',
            'link.operateDeptId': '',
            'operateUserIdStringExpression': 'in',
            'link.operateUserId ' : ' ' ,
             ' toDeptIdStringExpression ' : ' in ' ,
             ' showArea ' : ' Daqing, China Railcom ' ,   # complaints provinces 
            ' main.toDeptId ' : ' 1005, 1021 ' ,
             ' main.complaintType1 ' : '' ,
             ' complaintType1ChoiceExpression ' : ' 1010615100202 ',  #Type a Complaint: Jiakuan business 
            ' main.complaintType2 ' : '' ,
             ' complaintType2ChoiceExpression ' : '' ,
             ' main.complaintType ' : '' ,
             ' main.complaintType4 ' : '' ,
             ' main.complaintType5 ' : '' ,
             ' main.complaintType6 ' : '' ,
             ' main.complaintType7 ' :'',
            'complaintNumStringExpression': '',
            'main.complaintNum': '',
            'parentCorrelationStringExpression': '',
            'main.parentCorrelation': '',
            'customAttributionStringExpression': 'like',
            'main.customAttribution': '',
            'repeatComplaintTimesStringExpression': '>=',
            'main.repeatComplaintTimes': '',
            'complaintDescStringExpression': 'like',
            'main.complaintDesc': '',
            'main.sendTime': '',
            'sendTimeStartDateExpression': '>=',
            'sendTimeStartDate': '2020-02-02 20:13:35 ' ,   # start time 
            ' sendTimeLogicExpression ' : ' and ' ,
             ' sendTimeEndDateExpression ' : ' <= ' ,
             ' sendTimeEndDate ' : ' 2020-02-23 20:13:35 ' ,   # end time 
            ' queryType ' : ' Record ' 
        } 
        Response = requests.post (URL, Data = FormData, headers = Self.
        headers)
        returnresponse.content 

    DEF get_content_list (Self, html_raw): 
        HTML = etree.HTML (html_raw) 
        tr_list = html.xpath ( ' // tbody / tr ' )   # each tr put a line of complaints 
        CONTENT_LIST = []
         for Content in tr_list : 
            Item = {} 
            zineirong = content.xpath ( ' ./td ' )   # each row complaints are packaged under the n td tag 
            Item [ ' ticket subject ' ] = zineirong [0] .xpath ( ' .//text () ')[0]
            item['工单流水号'] = zineirong[1].xpath('./a/text()')[0]
            # item['处理时限'] = zineirong[3].xpath('./text()')[0]
            detail_link = self.part_url + zineirong[1].xpath('./a/@href')[0]
            detail_dict = self.get_gongdan_detail(detail_link)
            item['xiangqing'] = detail_dict
            content_list.append(item)
        next_gongdan_url = self.part_url + html.xpath("//a[text()='下一页']/@href") [0] IF len (html.xpath ( " // A [text () = 'Next'] / @ the href " ))> 0 the else None   # Next ticket listing details 
        return CONTENT_LIST, next_gongdan_url 

    DEF get_gongdan_detail (Self, URL): 
        html_raw = self.parse_url (URL) 
        HTML = etree.HTML (html_raw) 
        xiangqing_dict = {} 
        xiangqing_dict [ ' complaint ' ] = html.xpath ( ' // * [@ ID = "complainttext"] / text () ' ) 
        xiangqing_dict [ ' sent to the object "] = html.xpath('//div[@id="ext-gen47"]/table/tbody/tr[4]/td[4]/text()')#ifram里了,查不到
        xiangqing_dict['qita'] = html.xpath('//*[@id="ext-gen47"]/text()')

        return xiangqing_dict

    def save_content_list(self, content_list):
        for i, v in enumerate(content_list, start=1):
            print(i, v)

    def run(self):
        next_url = self.start_url#工单查询主界面
        content_total_list = []
        the while next_url IS  not None: 
            html_raw = self.parse_url (next_url)   # get access to the source data for each page work order 
            CONTENT_LIST, next_url = self.get_content_list (html_raw)   # extract specific content on the url in the list, the Next Page link 
            + = content_total_list CONTENT_LIST content_total_list   # extract the contents of each page loaded into the list 
        self.save_content_list (content_total_list)   # every single print of your content work 

IF  __name__ == ' __main__ ' : 
    Spdier = ChaxunSpdier () 
    Spdier.run ()

 

Guess you like

Origin www.cnblogs.com/iamorz/p/12358379.html