Python collects ranking data of more than 1,000 world universities and creates visual data displays

Preface

QS World University Rankings is an annual world university ranking published by Quacquarelli Symonds (QS), a British international education market consulting company.
Insert image description here

Collect global university ranking data (source code has been packaged, share it with private messages for free and get it yourself)

import requests     # 发送请求
import re
import csv

with open('rank.csv', mode='a', encoding='utf-8', newline='') as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(['country', 'rank', 'region', 'score_1', 'score_2', 'score_3', 'score_4', 'score_5', 'score_6', 'total_score', 'stars', 'university', 'year'])
def replace(str_):
    str_ = re.findall('<div class="td-wrap"><div class="td-wrap-in">(.*?)</div></div>', str_)[0]
    return str_
url = 'https://www.qschina.cn/sites/default/files/qs-rankings-data/cn/2057712_indicators.txt'
# 1. 发送请求
response = requests.get(url)
# <Response [200]>: 请求成功
# 2. 获取数据
json_data = response.json()     # Python 字典
# 3. 解析数据
# 字典
data_list = json_data['data']
for i in data_list:
    country = i['location']     # 国家/地区
    rank = i['overall_rank']    # 排名
    region = i['region']        # 大洲
    score_1 = replace(i['ind_76'])       # 学术声誉
    score_2 = replace(i['ind_77'])       # 雇主声誉
    score_3 = replace(i['ind_36'])       # 师生比
    score_4 = replace(i['ind_73'])       # 教员引用率
    score_5 = replace(i['ind_18'])       # 国际教师
    score_6 = replace(i['ind_14'])       # 国际学生
    total_score = replace(i['overall'])       # 总分
    stars = i['stars']       # 星级
    uni = i['uni']       # 大学名称
    university = re.findall('<div class="td-wrap"><div class="td-wrap-in"><a href=".*?" class="uni-link">(.*?)</a></div></div>', uni)[0]
    year = "2021"       # 年份
    print(country, rank, region, score_1, score_2, score_3, score_4, score_5, score_6, total_score, stars, university, year)
    with open('rank.csv', mode='a', encoding='utf-8', newline='') as f:
        csv_writer = csv.writer(f)
        csv_writer.writerow([country, rank, region, score_1, score_2, score_3, score_4, score_5, score_6, total_score, stars, university, year])

Insert image description here
Insert image description here

Visual display

Import required modules

from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
from pyecharts.components import Table
import re
import pandas as pd

Import Data

df = pd.read_csv('rank.csv')

# 香港,澳门与中国大陆地区等在榜单中是分开的记录的,这边都归为china
df['loc'] = df['country']
df['country'].replace(['China (Mainland)', 'Hong Kong SAR', 'Taiwan', 'Macau SAR'],'China',inplace=True)

Insert image description here

2021 World University Rankings (QS) TOP 100

bar = (Bar()
       .add_xaxis(university)
       .add_yaxis('', score, category_gap='30%')
       .set_global_opts(title_opts=opts.TitleOpts(title="2021年世界大学排名(QS) TOP 100",
                                                  pos_left="center",
                                                  title_textstyle_opts=opts.TextStyleOpts(font_size=20)),
                        datazoom_opts=opts.DataZoomOpts(range_start=70, range_end=100, orient='vertical'),
                        visualmap_opts=opts.VisualMapOpts(is_show=False, max_=100, min_=60, dimension=0,
                                range_color=['#00FFFF', '#FF7F50']),
                        legend_opts=opts.LegendOpts(is_show=False),
                        xaxis_opts=opts.AxisOpts(is_show=False, is_scale=True),
                        yaxis_opts=opts.AxisOpts(axistick_opts=opts.AxisTickOpts(is_show=False),
                                                 axisline_opts=opts.AxisLineOpts(is_show=False),
                                                 axislabel_opts=opts.LabelOpts(font_size=12)))
       .set_series_opts(label_opts=opts.LabelOpts(is_show=True,
                                                  position='right',
                                                  font_style='italic'),
                        itemstyle_opts={
    
    "normal": {
    
    
                                                    "barBorderRadius": [30, 30, 30, 30],
                                                    'shadowBlur': 10,
                                                    'shadowColor': 'rgba(120, 36, 50, 0.5)',
                                                    'shadowOffsetY': 5,
                                                }
                                       }
).reversal_axis())

grid = (
        Grid(init_opts=opts.InitOpts(theme='purple-passion', width='1000px', height='1200px'))
        .add(bar, grid_opts=opts.GridOpts(pos_right='10%', pos_left='20%'))
    )
grid.render_notebook()

Insert image description here

Chinese Universities in TOP 500

Insert image description here

Distribution of TOP 1000 Universities by Continent

Insert image description here

fmt_js = """function (params) {
    
    return params.name+': '+Number(params.value[2]);}"""

mp = Map()
mp.add(
        "高校数量",
        data_pair,
        "world",
        is_map_symbol_show=False,
        is_roam=False)

mp.set_series_opts(label_opts=opts.LabelOpts(is_show=False),
                          itemstyle_opts={
    
    'normal': {
    
    
                                                'areaColor': '#191970',
                                                'borderColor': '#1773c3',
                                                'shadowColor': '#1773c3',
                                                'shadowBlur': 20,
                                                'opacity': 0.8
                                                    }
                                        })
    
mp.set_global_opts(
        title_opts=opts.TitleOpts(title="TOP 1000高校按国家分布", pos_left='center',
                                  title_textstyle_opts=opts.TextStyleOpts(font_size=18)),
        legend_opts=opts.LegendOpts(is_show=False),
        visualmap_opts=opts.VisualMapOpts(is_show=False, 
                                          max_=100,
                                          is_piecewise=False,
                                          dimension=0,
                                          range_color=['rgba(255,228,225,0.6)', 'rgba(255,0,0,0.9)', 'rgba(255,0,0,1)'])
    )

data_pair = [[x, y] for x, y in data_pair if x in country_list]    
geo = Geo()
    
# 需要先将几个国家的经纬度信息加入到geo中
for k, v in loc.items():
    geo.add_coordinate(k, v[0], v[1])
# 这里将geo的地图透明度配置为0
geo.add_schema(maptype="world", is_roam=False, itemstyle_opts={
    
    'normal': {
    
    'opacity': 0}})
    
geo.add("", data_pair, symbol_size=1)
# 显示标签配置
geo.set_series_opts(
    label_opts=opts.LabelOpts(
            is_show=True,
            position='right',
            color='white',
            font_size=12,
            font_weight='bold',
            formatter=JsCode(fmt_js)),
    )
    
grid = (
        Grid(init_opts=opts.InitOpts(theme='chalk', width='1000px', height='600px'))
        .add(mp, grid_opts=opts.GridOpts(pos_top="12%"))
        .add(geo, grid_opts=opts.GridOpts(pos_bottom="12%"))
    )

grid.render_notebook()

Insert image description here

Continent-Country Distribution

c = (Sunburst(
        init_opts=opts.InitOpts(
            theme='purple-passion',
            width="1000px",
            height="1000px"))
    .add(
        "",
        data_pair=data_pair,
        highlight_policy="ancestor",
        radius=[0, "100%"],
        sort_='null',
        levels=[
            {
    
    },
            {
    
    
                "r0": "20%",
                "r": "48%",
                "itemStyle": {
    
    "borderColor": 'rgb(220,220,220)', "borderWidth": 2}
            },
            {
    
    "r0": "50%", "r": "80%", "label": {
    
    "align": "right"},
                "itemStyle": {
    
    "borderColor": 'rgb(220,220,220)', "borderWidth": 1}}
        ],
    )
    .set_global_opts(
        visualmap_opts=opts.VisualMapOpts(is_show=False, max_=300, min_=0, is_piecewise=False,
                                range_color=['#4285f4', '#34a853', '#fbbc05', '#ea4335', '#ea4335']),
        title_opts=opts.TitleOpts(title="TOP 1000\n\n大学地理分布",
                                               pos_left="center",
                                               pos_top="center",
                                               title_textstyle_opts=opts.TextStyleOpts(font_style='oblique', font_size=20),))
    .set_series_opts(label_opts=opts.LabelOpts(font_size=14, formatter="{b}: {c}"))
)

c.render_notebook()

Insert image description here

Python crawls world university rankings + data visualization

Guess you like

Origin blog.csdn.net/XM67_/article/details/132510769