python爬取hnist教务处,输出成绩学分

import requests
import js2py
import time
import matplotlib.pyplot as plt # plt 用于显示图片
import matplotlib.image as mpimg # mpimg 用于读取图片
from lxml import etree
def md5_pass(password):
    js_text=requests.get(md5_url).text
    ctx = js2py.EvalJs()  # 初始化context对象
    ctx.execute(js_text)  # 执行js
    return ctx.hex_md5(password)  #   获取js变量
def check_contain_chinese(check_str):
    for ch in check_str:
        if u'\u4e00' <= ch <= u'\u9fff':
            return True
    return False
def open_captcha():
    img = mpimg.imread('img.png',0) 
    plt.imshow(img)
    plt.axis('off')
    plt.show(block=False) 
def login():
    http_session = requests.session()
    img_response = http_session.get(img_url,headers=header, stream=True)
    pic = img_response.content
    with open('img.png', 'wb') as f:
        f.write(pic)
    time.sleep(1)
    open_captcha()
    captcha=input("输入验证码  :")
    # print(captcha)
    data={
        "j_username": username,
        "j_password": password,
        "j_captcha": captcha,
    }
    response = http_session.post(tar_url,data=data,headers=header)
    response.encoding='utf-8'
    data=etree.HTML(response.text) 
    chenji_url=origin_url+ str(data.xpath('//*[@id="1443377"]/a/@href')[0])
    response2 = http_session.get(chenji_url,headers=header)
    data2=etree.HTML(response2.text) 
    chenji_list=data2.xpath('//*[@class="col-xs-12 jihua"]/table/tbody/tr/td/text()')
    inmb=data2.xpath('//*[@class="infobox-container"]/div/div[2]/span/text()')
    ism=data2.xpath('//*[@class="infobox-container"]/div/div/div/text()')
    cj=list()
    not_pass=[]
    tp=[]
    i=0
    for x in  chenji_list:
        x=str(x).replace('\r','').replace('\n','').replace('\t','').replace(' ','')
        tp.append(x)
        i=i+1
        if(i%6==0):
            cj.append(tp)
            tp=[]
        if(i%6==4):
            if (check_contain_chinese(x) ):
                if( x == '不及格'):
                    not_pass.append(tp[1])
            elif (float(x)<60):
                not_pass.append(tp[1])
            
    cj.reverse()
    for x in  cj:
        for out in x[1:]:
            print(out,end=' | ')
        print('')
    for x in  range(0,len(ism)):
        print(ism[x],inmb[x],end='|')
    print('\n不及格的科目有:',not_pass)

if __name__ == "__main__":
    username="账号"
    password="密码"
    captcha=""
    origin_url="http://bkjw.hnist.cn"
    tar_url="http://bkjw.hnist.cn/j_spring_security_check"
    img_url="http://bkjw.hnist.cn/img/captcha.jpg"
    md5_url="http://bkjw.hnist.cn/js/md5/md5.js"
    header={
        "User-Agent": "User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36",
    }
    password=md5_pass(password)
    login()

代码比较简陋,输出的内容也没有拓展,但是已经把最难的验证码和密码MD5转换完成了

接下来全看个人发挥

填写好账号和密码

把该装的包装好就能运行了

运行结果:

发布了18 篇原创文章 · 获赞 15 · 访问量 1752

猜你喜欢

转载自blog.csdn.net/kiasszz/article/details/105695419