Wikipedia entry editor contribution visualization

import requests
import re
import json
from bs4 import BeautifulSoup
from urllib.request import urljoin
import collections
from pyecharts import Map
def get_view_history_link(search_word):
    "该函数用来获取历史编辑连接地址"
    root_url = 'https://en.wikipedia.org'
    url = root_url + '/wiki/' + search_word
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) \
    AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'}
    res = requests.get(url,headers=headers)
    soup = BeautifulSoup(res.text,'lxml')
    relavtiv_link = soup.find('li',{'id':'ca-history'}).span.a['href']
    return urljoin(root_url,relavtiv_link)
def get_ip(search_word):
    '该函数用来获取匿名修改的IP地址'
    url = get_view_history_link(search_word)
    soup = BeautifulSoup(requests.get(url).text,'lxml')
    page_500_link = urljoin('https://en.wikipedia.org',soup.find_all('a',{'class':'mw-numlink'})[-1]['href'])
    soup = BeautifulSoup(requests.get(page_500_link).text,'lxml')
    ips = soup.find_all('a',{'class':'mw-anonuserlink'})
    set_ips = set()
    for ip in ips:
        set_ips.add(ip.text)
    return set_ips  
def get_country(ip):
    '该函数用来获取ip所对应的国家'
    try:
        res = requests.get('https://freegeoip.net/json/' + ip)
    except:
        print('无效的ip地址')
    data_json = json.loads(res.text)
    country = data_json.get('country_code')
    return(ip,country)        
if __name__ == '__main__':
    ips = get_ip('Python_(programming_language)')
    results = []
    for ip in ips:
        result = get_country(ip)
        results.append(result)
#生成echarts国家对应的简写字典
country_ab = {}
f = open(r'C:\Users\CW\Desktop\ab.txt','r')#文件可以百度搜索
for line in f:
    if len(line)>1:
        country_ab[re.findall('[a-zA-Z]+',line)[0]] = ' '.join(re.findall('[a-zA-Z]+',line)[1:])
f.close()      
countrys = [i[1] for i in results ]
stats = collections.Counter(countrys)
cut_stats = sorted(stats.items(),key=lambda x:x[1],reverse=True)
attr = [country_ab[i[0]] for i in cut_stats]
value = [i[1] for i in cut_stats]
word_map = Map("维基词条编辑每个国家贡献", width=800, height=400)
word_map.add("", attr, value, maptype="world", is_visualmap=True,
             is_piecewise=True,visual_text_color='#000', is_map_symbol_show=False,
             pieces=[{"max": 25, "min": 25, "label":"25"},{"max": 24, "min": 10, "label":"24-10"},
                    {"max": 10, "min": 0, "label":"<10"}])

write picture description here

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324842872&siteId=291194637