你是啥成份?

各种编程语言我都很喜欢,但平时用的最多的是什么呢?

一个github小爬虫,获取全部repo及其主要语言,画出饼图。

"""
你是什么成份?
"""

import requests
from pyquery import PyQuery as pq
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np


def parse_page(url):
    print(url)
    resp = requests.get(url)
    html = pq(resp.text)
    repo_list = html("#user-repositories-list li")
    repos = []
    for i in range(repo_list.length):
        repo = repo_list.eq(i)
        it = dict()
        repo_name = repo('h3').text()
        repo_language = repo("[itemprop='programmingLanguage']").text()
        it['name'] = repo_name
        it['language'] = repo_language
        repos.append(it)
    sons = html(".pagination a").eq(0).attr('href')
    sons = [sons] if sons else []
    return repos, sons


def analyze(repos):
    # unique
    ma = dict([(i['name'], i) for i in repos])
    repos = ma.values()
    cnt = Counter([i['language'] for i in repos if i['language']])
    labels = cnt.keys()
    sizes = np.array(list(cnt.values()))
    explode = np.zeros_like(sizes, dtype=np.float32)  # 0.1表示将Hogs那一块凸显出来
    explode[np.argsort(sizes)[-3:].reshape(-1, 1)] = 0.1  # 前三名突出显示
    plt.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=False, startangle=90)
    # startangle表示饼图的起始角度
    plt.show()


def schedule():
    user = "weiyinfu"
    q = []
    seed = "https://github.com/" + user + "?tab=repositories"
    q.append(seed)
    visited = set()
    repos = []
    while q:
        now = q.pop()
        repo_list, url_list = parse_page(now)
        for i in url_list:
            if i not in visited:
                q.append(i)
            visited.add(i)
        repos += repo_list

    return repos


def main():
    repos = schedule()
    print(repos)
    analyze(repos)


if __name__ == '__main__':
    main()

猜你喜欢

转载自www.cnblogs.com/weiyinfu/p/9704368.html