简单爬虫查找

# ***** Copyright © SUNING *****
# -*- coding: utf-8 -*-
from Hephaestus.constant import CONFIG_DICT
from Hephaestus.process import main_proc
from Hephaestus.forge import forging
import sn_win as sn_win
sn_win=forging(sn_win)
import sncrawler as sncrawler
sncrawler=forging(sncrawler)
import sndict as sndict
sndict=forging(sndict)
import snweb as snweb
snweb=forging(snweb)


@main_proc("else")
def main(): # 主流程
    #打开浏览器
    driver = snweb.open_browser(browser_name="chrome",retry=True)
    #打开资金管理系统
    snweb.open_url(url='***')
    #点击
    snweb.click_element(locator="xpath=//INPUT[@id='username']",timeout=10,intr=True,index=0)
    #获取值
    fundAccount = sndict.get_value(dic=CONFIG_DICT,key='fundAccount')
    #输入资金管理系统用户名
    result = snweb.input_text(locator="xpath=//INPUT[@id='username']",text=fundAccount[0],timeout=10,intr=True,index=0)
    #清空文本框
    snweb.clear_element(locator="xpath=//INPUT[@id='password']",timeout=10,intr=True,index=0)
    #输入资金管理系统密码
    result = snweb.input_text(locator="xpath=//INPUT[@id='password']",text=fundAccount[1],timeout=10,intr=True,index=0)
    #点击登录
    result = snweb.click_element(locator="xpath=//a[@id='loginbtn']",timeout=10,intr=True,index=0)
    cookieStr =driver.get_cookies()

    cookies_dict = {cookiedic["name"]: cookiedic["value"] for cookiedic in cookieStr}
    from requests.utils import cookiejar_from_dict
    from requests import Session
    from lxml import etree
    cookies = cookiejar_from_dict(cookies_dict)
    session = Session()
    session.cookies = cookies
    
    #data
    data = sndict.create_dict()
    #设置值
    sndict.set_value(dic=data,key='bankAcc',value='32001881700052502564')
    #设置值
    sndict.set_value(dic=data,key='isOnline',value='1')
    #设置明细类型
    sndict.set_value(dic=data,key='balanceType',value=0)
    #设置公司代码
    sndict.set_value(dic=data,key='corpCode',value='G002')
    #设置起始日期
    sndict.set_value(dic=data,key='dateStart',value='2019-05-01')
    #设置截止日期
    sndict.set_value(dic=data,key='dateEnd',value='2019-05-31')
    #设置排序
    sndict.set_value(dic=data,key='orderByField',value='B.CORP_CODE,A.ROWID')
    text = session.post(url='http://funddc.cnsuning.com/snweb_datacenter/queryAccountDetails.do',params=None,data=data,json=None,files=None,headers=None,timeout=None,stream=None,allow_redirects=True,verify=True)
    
    #获取请求返回体的文本
    text = sncrawler.get_req_text(r=text)
    #正则表达式查找全部
    totalpage = sn_win.findallstr(sText=text,pattern=r"共(\d+)页")
    #正则表达式查找全部
    totalnumber = sn_win.findallstr(sText=text,pattern=r'共(\d+)条记录')
    #设置值
    sndict.set_value(dic=data,key='pageControlData.changePageNumber',value=totalpage)
    #设置值
    sndict.set_value(dic=data,key='pageControlData.pageSize',value='50')
    #设置值
    sndict.set_value(dic=data,key='formAction',value='list')
    #设置值
    sndict.set_value(dic=data,key='pageControlData.currentPage',value='-1')
    #设置值
    sndict.set_value(dic=data,key='pageControlData.resultCount',value=totalnumber)
    #header
    headDict = sndict.create_dict()
    #设置值
    sndict.set_value(dic=headDict,key='Referer',value='http://funddc.cnsuning.com/snweb_datacenter/queryAccountDetails.do')
    #设置值
    sndict.set_value(dic=headDict,key='Host',value='funddc.cnsuning.com')
    #设置值
    sndict.set_value(dic=headDict,key='Content-Type',value='application/x-www-form-urlencoded')
    from bs4 import BeautifulSoup
    text = session.post(url='***',params=None,data=data,json=None,files=None,headers=headDict,timeout=None,stream=None,allow_redirects=True,verify=True)
    text = sncrawler.get_req_text(r=text)
    soup = BeautifulSoup(text, 'html.parser')
    money = soup.find('table', attrs={'class': 'list', 'align': 'center'}).find_all('tr')[-3].find_all('td')[24].text.strip()
    print(money)
    


if __name__ == '__main__':
    main()
猜你喜欢