Python学习笔记第24天

谏言:穷则独善其身,达则兼济天下

爬取高考网数据:

import requests
from bs4 import BeautifulSoup
from pyecharts import options
from pyecharts.charts import Bar
# 文科
def wenkechax(html):
    soup = BeautifulSoup(html, 'html.parser')
    wenke=[]
    nianfen =[]

    tables = soup.findAll('table')
    tab = tables[0]   # 要第一个table
    for tr in tab.findAll('tr'):
        for th in tr.findAll('th'): # 年份
            if th.text !='':
                #print(th.text)
                nianfen.append(th.text)
        for td in tr.findAll("td"): # 分数
            tdNum = (td.text).strip()               # str对象-除去不规则符号\r\n\t\t\t\t,不除也可以。
            #print(tdNum)               # 所有的批次都获取到了。
            wenke.append(tdNum)
    #print(wenke,type(wenke),len(wenke))
    # 为了好看,只要一二本数据。
    return wenke,nianfen



# 理科
def likechax(html):
    soup = BeautifulSoup(html, 'html.parser')
    like = []         # 理科
    nianfen = []       # 年份
    tables = soup.findAll('table')
    tab = tables[1]   # 要第二个table
    for tr in tab.findAll('tr'):
        for th in tr.findAll('th'):  # 年份
            if th.text !='':
                #print(th.text)
                nianfen.append(th.text)
        for td in tr.findAll('td'):  # 分数
            #print(td.text)
            tdNum = (td.text).strip()               # str对象-除去不规则符号\r\n\t\t\t\t,不除也可以。
            like.append(tdNum)
    # 为了好看,只要一二本数据。
    return like

def gkw():
    diquname=input("请输入你要查询的省份地区拼音: ")
    url='http://www.gaokao.com/'+diquname+'/fsx/'
    html=requests.get(url).content.decode("gbk")
#     print(html)
    wenke,nianfen=wenkechax(html)
    print("年份",nianfen)
    print("文科",wenke)
    like=likechax(html)
    print("理科",like)
    keshihua(diquname,like,wenke,nianfen)


def keshihua(diquname,like,wenke,nianfen):
    wenkeyb=wenke[1:12]
    wenkeeb=wenke[13:24]
    wenkesb=wenke[25:36]
    wenkezk=wenke[37:48]

    
    likeyb=like[1:12]
    likeeb=like[13:24]
    likesb=like[25:36]
    likezk=like[37:48]

    
    c=Bar()
    c.add_xaxis(nianfen)
    c.add_yaxis("文科一本",wenkeyb)
    c.add_yaxis("文科二本",wenkeeb)
    c.add_yaxis("文科三本",wenkesb)
    c.add_yaxis("文科专科",wenkezk)


    c.add_yaxis("理科一本",likeyb)
    c.add_yaxis("理科二本",likeeb)
    c.add_yaxis("理科三本",likesb)
    c.add_yaxis("理科专科",likezk)

    
    c.set_global_opts(title_opts=options.TitleOpts(title=diquname+"高考分数线",subtitle="2009-2019"))
    c.render(diquname+"历年高考分数线统计图.html")
    print("图像绘制完成")
    
gkw()
请输入你要查询的省份地区拼音: fujian
年份 ['2019', '2018', '2017', '2016', '2015', '2014', '2013', '2012', '2011', '2010', '2009']
文科 ['一本', '550', '551', '489', '501', '549', '561', '513', '557', '564', '557', '582', '二本', '464', '446', '380', '403', '462', '482', '431', '466', '473', '494', '518', '专科', '220', '362', '300', '319', '352', '357', '227', '251', '325', '333', '355']
理科 ['一本', '493', '490', '441', '465', '525', '506', '501', '546', '573', '539', '569', '二本', '393', '378', '333', '352', '410', '408', '401', '435', '460', '472', '500', '专科', '220', '258', '236', '251', '266', '246', '201', '223', '220', '270', '309']
图像绘制完成

 

猜你喜欢

转载自www.cnblogs.com/python-study-notebook/p/12820408.html