按浏览量或者下载量大小获取CSDN搜索的文章



import requests
from bs4 import BeautifulSoup

def getHTMLText(html):
    try:
        r = requests.get(html, timeout=0.2)
        r.encoding = r.apparent_encoding
        return 1,r.text
    except:
        return 0
        
def search_CSDN_html(key, skind, page):
    kind = ["&domain=&o=&s=&u=&l=&f=",
        "blog&domain=&o=&s=&u=&l=&f=&rbg=0",
        "doc&domain=&o=&s=all&u=&l=&f=",
        "discuss&domain=&o=simi&s=&u=&l=&f=",
        "course&o=&s=&l="]
    return "https://so.csdn.net/so/search/s.do?p=" + str(page) + "&q=" + key + "&t=" + kind[skind]

def NUmlist(numlist,soup, skind):
    if skind == 1 or skind == 3:
        for i in soup.select('.author-time'):
            numlist.append(int(i.text[i.text.find('浏览') + 3:i.text.find('次') -1]))
    elif skind == 2:
        for i in soup.select('.author-time'):
            numlist.append(int(i.text[i.text.find('下载') + 2:-2]))
    elif skind == 4:
        for i in soup.select('.author-time'):
            numlist.append(int(i.text[i.text.find('\\n') + 2:-3]))
    return numlist

def maxpage(key, skind):
    html = search_CSDN_html(key, skind, 1)
    soup = BeautifulSoup(getHTMLText(html)[1],"html.parser")
    i = soup.select('.text')[0].text
    return int(int(i[i.find('共')+1:i.find('条')])/21)

key = "Python GUI"#关键字 
skind = 1         #1:博客  2:下载  3:论坛  4:学院

print("最大页数为:",maxpage(key, skind))
maxP = eval(input('最后的页面:'))#搜索最大页数

numlist = []
weblist = []
i = 1

while i <= maxP:
    html = search_CSDN_html(key, skind, i)
    try:
        soup = BeautifulSoup(getHTMLText(html)[1],"html.parser")
        for j in soup.select('.search-link a'):
            weblist.append(j['href'])
        numlist = NUmlist(numlist, soup, skind)
        print(round(i / maxP * 100, 2),'%')
        i += 1
    except:
        continue
for i in range(len(numlist)):
    max = 0
    tag = i
    for j in range(i,len(numlist)):
        if max < numlist[j]:
            max = numlist[j]
            tag = j
    numlist[i],numlist[tag] = numlist[tag],numlist[i]
    weblist[i],weblist[tag] = weblist[tag],weblist[i]

print(weblist[:10])
###浏览人数,或者下载次数最多的##


猜你喜欢

转载自blog.csdn.net/qq_28969139/article/details/80952761
今日推荐