谏言:穷则独善其身,达则兼济天下
爬取高考网数据:
import requests
from bs4 import BeautifulSoup
from pyecharts import options
from pyecharts.charts import Bar
# 文科
def wenkechax(html):
soup = BeautifulSoup(html, 'html.parser')
wenke=[]
nianfen =[]
tables = soup.findAll('table')
tab = tables[0] # 要第一个table
for tr in tab.findAll('tr'):
for th in tr.findAll('th'): # 年份
if th.text !='':
#print(th.text)
nianfen.append(th.text)
for td in tr.findAll("td"): # 分数
tdNum = (td.text).strip() # str对象-除去不规则符号\r\n\t\t\t\t,不除也可以。
#print(tdNum) # 所有的批次都获取到了。
wenke.append(tdNum)
#print(wenke,type(wenke),len(wenke))
# 为了好看,只要一二本数据。
return wenke,nianfen
# 理科
def likechax(html):
soup = BeautifulSoup(html, 'html.parser')
like = [] # 理科
nianfen = [] # 年份
tables = soup.findAll('table')
tab = tables[1] # 要第二个table
for tr in tab.findAll('tr'):
for th in tr.findAll('th'): # 年份
if th.text !='':
#print(th.text)
nianfen.append(th.text)
for td in tr.findAll('td'): # 分数
#print(td.text)
tdNum = (td.text).strip() # str对象-除去不规则符号\r\n\t\t\t\t,不除也可以。
like.append(tdNum)
# 为了好看,只要一二本数据。
return like
def gkw():
diquname=input("请输入你要查询的省份地区拼音: ")
url='http://www.gaokao.com/'+diquname+'/fsx/'
html=requests.get(url).content.decode("gbk")
# print(html)
wenke,nianfen=wenkechax(html)
print("年份",nianfen)
print("文科",wenke)
like=likechax(html)
print("理科",like)
keshihua(diquname,like,wenke,nianfen)
def keshihua(diquname,like,wenke,nianfen):
wenkeyb=wenke[1:12]
wenkeeb=wenke[13:24]
wenkesb=wenke[25:36]
wenkezk=wenke[37:48]
likeyb=like[1:12]
likeeb=like[13:24]
likesb=like[25:36]
likezk=like[37:48]
c=Bar()
c.add_xaxis(nianfen)
c.add_yaxis("文科一本",wenkeyb)
c.add_yaxis("文科二本",wenkeeb)
c.add_yaxis("文科三本",wenkesb)
c.add_yaxis("文科专科",wenkezk)
c.add_yaxis("理科一本",likeyb)
c.add_yaxis("理科二本",likeeb)
c.add_yaxis("理科三本",likesb)
c.add_yaxis("理科专科",likezk)
c.set_global_opts(title_opts=options.TitleOpts(title=diquname+"高考分数线",subtitle="2009-2019"))
c.render(diquname+"历年高考分数线统计图.html")
print("图像绘制完成")
gkw()
请输入你要查询的省份地区拼音: fujian
年份 ['2019', '2018', '2017', '2016', '2015', '2014', '2013', '2012', '2011', '2010', '2009']
文科 ['一本', '550', '551', '489', '501', '549', '561', '513', '557', '564', '557', '582', '二本', '464', '446', '380', '403', '462', '482', '431', '466', '473', '494', '518', '专科', '220', '362', '300', '319', '352', '357', '227', '251', '325', '333', '355']
理科 ['一本', '493', '490', '441', '465', '525', '506', '501', '546', '573', '539', '569', '二本', '393', '378', '333', '352', '410', '408', '401', '435', '460', '472', '500', '专科', '220', '258', '236', '251', '266', '246', '201', '223', '220', '270', '309']
图像绘制完成