政府工作报告词云

学了嵩天老师的爬虫和python语言程序设计,就把语言程序设计里的一个实例结合了爬虫来写~

import requests
import os
import wordcloud
import jieba
def DownlodaTxt(root, url):
    path = root + str(url.split('/')[-1])[-10:]
    try:
        if not os.path.exists(root):
           os.mkdir(root)
        if not os.path.exists(path):
            r = requests.get(url)
            with open(path, 'wb') as f:
                f.write(r.content)
                f.close()
                print("文件保存成功")
        else:
            print("文件已存在")
        return path
    except:
        print("爬取失败")
        return ""
def GetTxt(path):
    try:
        f = open(path, "r", encoding = "utf-8")
        t = f.read()
        f.close()
        ls = jieba.lcut(t)
        txt = " ".join(ls)
        return txt
    except:
        print("打开文件失败")
        return ""
def DoWorlCloud(txt, root):
    w = wordcloud.WordCloud(font_path = "msyh.ttc", width = 1000, height = 700, background_color = "white")
    w.generate(txt)
    path = root + "grwordcloud1.png"
    w.to_file(path)
def main():
    url = "https://python123.io/resources/pye/%E6%96%B0%E6%97%B6%E4%BB%A3%E4%B8%AD%E5%9B%BD%E7%89%B9%E8%89%B2%E7%A4%BE%E4%BC%9A%E4%B8%BB%E4%B9%89.txt"
    root = "F://Sophomore//The_Second_Term//Python//Code//"
    path = DownlodaTxt(root, url)
    txt = GetTxt(path)
    DoWorlCloud(txt, root)
main()

猜你喜欢

转载自blog.csdn.net/m0_38015368/article/details/80185155