Python之词频统计

import turtle
#全局变量

#x,y,放大倍数
yScale = 80
xScale = 35
#单词频率列表
data = []
#单词名称列表
dwords = []
wordCounts = {}

# 统计一行词频processLine()
def processLine(words, wordCounts):
    for word in words:
        if word in wordCounts:
            wordCounts[word] += 1
        else:
            wordCounts[word] = 1

#字符串处理replacePunctuation(line)

def replacePunctuation(line):
    for ch in line:
        if ch in [':', ',', '@', '#', '!']:
            line = line.replace(ch, ' ')
    return(line.split())  #返回列表

#绘制柱状图

#划线(x1,y1)->(x2,y2)

def drawLine(t, x1, y1, x2, y2):
    t.penup()
    t.goto(x1, y1)
    t.pendown()
    t.goto(x2,y2)

#(x,y)处写字
def drawText(t, x, y, text):
    t.penup()
    t.goto(x, y)
    t.pendown()
    t.write(text)

#绘制单个柱体
def drawRectangle(t,x,y):
    x = x*xScale;
    y = y*yScale;
    t.color('black','red')
    t.begin_fill()
    drawLine(t, x-5, 0, x-5, y)
    drawLine(t, x-5, y, x+5, y)
    drawLine(t, x+5, y, x+5, 0)
    drawLine(t, x+5, 0, x-5, 0)
    t.end_fill()
#绘制多个柱体

def drawBar(t, counts):
    for i in range(counts):
        drawRectangle(t, i+1, data[i])

#绘制统计图

def drawGraph(t, counts):
    #绘制x/y轴
    drawLine(t, 0, 0, 360, 0)
    drawLine(t, 0, 0, 0, 360)

    #绘制 坐标描述

    for x in range(counts):
        x = x+1
        #绘制x轴信息
        drawText(t, x*xScale-4, -30, dwords[x-1])
        drawText(t, x*xScale-4, data[x-1]*yScale+10, data[x-1])

    #绘制柱体
    drawBar(t, counts)
def main():
    # 输入文件名并创建文件对象

    filename = input('please input a filename').strip().lower()

    infile = open(filename, 'r')

    # 统计多行词频
    for line in infile:
        line = line.lower()
        words = replacePunctuation(line)
        processLine(words, wordCounts)

    pairs = list(wordCounts.items())  # 以列表返回可遍历的(键, 值) 元组数组 元组->列表

    items = [[x, y] for (y, x) in pairs]  # 列表数组

    # 以key从小到大进行排序

    items.sort()

    counts = len(items)
    for i in range(counts):
        data.append(items[i][0])
        dwords.append(items[i][1])
    # 窗口初始化
    turtle.title('词频显示结果')
    turtle.setup(900, 750, 0, 0)
    t = turtle.Turtle()
    t.hideturtle()
    t.pensize(3)
    t.color('black', 'red')
    turtle.tracer(False)
    drawGraph(t, counts)
    turtle.done()
    turtle.tracer(True)
if __name__=='__main__':
    main()


注:文本文档需要自己添加

猜你喜欢

转载自blog.csdn.net/weixin_42143003/article/details/89041771
今日推荐