人生苦短，我学Python-006：组合数据类型

    #CalStatisticsV1.py
    def getNum():       #获取用户不定长度的输入
        nums = []
        iNumStr = input("请输入数字(回车退出): ")
        while iNumStr != "":
            nums.append(eval(iNumStr))
            iNumStr = input("请输入数字(回车退出): ")
        return nums

    def mean(numbers): #计算平均值
        s = 0.0
        for num in numbers:
            s = s + num
        return s / len(numbers)

    def dev(numbers, mean): #计算方差
        sdev = 0.0
        for num in numbers:
            sdev = sdev + (num - mean)**2
        return pow(sdev / (len(numbers)-1), 0.5)

    def median(numbers):    #计算中位数
        sorted(numbers)
        size = len(numbers)
        if size % 2 == 0:
            med = (numbers[size//2-1] + numbers[size//2])/2
        else:
            med = numbers[size//2]
        return med

    n = getNum() #主体函数
    m = mean(n)
    print("平均值:{},方差:{:.2},中位数:{}.".format(m, dev(n,m),median(n)))

Hamlet词频统计（含Hamlet原文文本）

    #CalHamletV1.py
    def getText():
        txt = open("hamlet.txt", "r").read()
        txt = txt.lower()
        for ch in '!"#$%&()*+,-./:;<=>?@[\\]^_‘{|}~':
            txt = txt.replace(ch, " ")   #将文本中特殊字符替换为空格
        return txt

    hamletTxt = getText()
    words = hamletTxt.split()
    counts = {}
    for word in words:
        counts[word] = counts.get(word,0) + 1
    items = list(counts.items())
    items.sort(key=lambda x:x[1], reverse=True)
    for i in range(10):
        word, count = items[i]

print ("{0:<10}{1:>5}".format(word, count))

《三国演义》人物出场统计（上）（含《三国演义》原文文本）

    #CalThreeKingdomsV1.py
    import jieba
    txt = open("threekingdoms.txt", "r", encoding='utf-8').read()
    words = jieba.lcut(txt)
    counts = {}
    for word in words:
        if len(word) == 1:
            continue
        else:
            counts[word] = counts.get(word,0) + 1
    items = list(counts.items())
    items.sort(key=lambda x:x[1], reverse=True)
    for i in range(15):
        word, count = items[i]
        print ("{0:<10}{1:>5}".format(word, count))

《三国演义》人物出场统计（下）（含《三国演义》原文文本）

    #CalThreeKingdomsV2.py
    import jieba
    excludes = {"将军","却说","荆州","二人","不可","不能","如此"}
    txt = open("threekingdoms.txt", "r", encoding='utf-8').read()
    words = jieba.lcut(txt)
    counts = {}
    for word in words:
        if len(word) == 1:
            continue
        elif word == "诸葛亮" or word == "孔明曰":
            rword = "孔明"
        elif word == "关公" or word == "云长":
            rword = "关羽"
        elif word == "玄德" or word == "玄德曰":
            rword = "刘备"
        elif word == "孟德" or word == "丞相":
            rword = "曹操"
        else:
            rword = word
        counts[rword] = counts.get(rword,0) + 1
    for word in excludes:
        del counts[word]
    items = list(counts.items())
    items.sort(key=lambda x:x[1], reverse=True)
    for i in range(10):
        word, count = items[i]
        print ("{0:<10}{1:>5}".format(word, count))

人生苦短，我学Python-006：组合数据类型

猜你喜欢