Use worldcloud python module, jieba do QQ message logging module word cloud

First, export QQ message record

Here Insert Picture Description
Here Insert Picture Description

And select [Export All message recording]

Here Insert Picture Description

Note here that you want to save the file type to txt

Second, the function processing

import re
import wordcloud
from PIL import Image
import numpy as np
import jieba


def delSysDateName(msgLine):
    """定义聊天文件处理函数:
    使用正则去除聊天文件中的日期、昵称等系统信息
    """
    # re.sub(pattern, repl, string, count=0, flags=0)
    # 参数1:需要被匹配到的模板             pattern
    # 参数2:根据第1个参数条件替换进去的内容replace
    # 参数3:传入要被替换处理的字符串       string

    # 如果匹配到了日期信息,这一行都不要了
    if None != re.search("^\d{4}-\d{2}-\d{2} \d{1,2}:\d{1,2}:\d{1,2}", msgLine)\
            or ('参加了投票' in msgLine) \
            or ('自动回复' in msgLine)\
            or ('申请加入' in msgLine)\
            or ('撤回' in msgLine)\
            or (r'[QQ红包]' in msgLine)\
            or ('我现在有事不在' in msgLine)\
            or (r'对方已' in msgLine):
        return ""

    # 否则就进行下一步处理
    else:
        return delUselessInfo(msgLine)


def delUselessInfo(msgLine):
    """
    使用正则去掉@信息
    替换掉txt中无法显示的图片和表情
    """
    # print("有用的信息msgLine:{}".format(msgLine))
    msgLine = re.sub(r'\'*', "", msgLine)
    return re.sub(r'@.* ', "", msgLine).replace(r"[图片]", "").replace(r"[表情]", "").strip()


if __name__ == '__main__':

    #  用作mask的image
    image = np.array(Image.open("pikaqiu.png"))	# 你自己找一个颜色界限分明的图片,如五角星、红色中国地图等都行

    with open(r"全部消息记录.txt", "r", encoding="utf8") as file:
        msg = file.read()

        # 将读入的txt按行分开,对每行进行处理,并过滤掉投票和红包信息。
        lines = msg.strip().split("\n")
        cleanedLines = []
        for line in lines:
            replacedStr = delSysDateName(line)
            if replacedStr != "":
                cleanedLines.append(replacedStr)

        usefulMsg = str(cleanedLines)

        # print("usefulMsg:{}".format(usefulMsg))

        stopWords = ['请使用手机QQ查看', '最近联系人\'','全体成员\'' '群签到', '消息分组', '滑呀滑\'',
                     '请使用新版手机QQ查看', '请使用最新版手机QQ查看', r'xa0', '多处登录上线通知\'', '多处登录下线通知\'']
        w = wordcloud.WordCloud(font_path="C:\Windows\Fonts\STKAITI.TTF", background_color="white", mask=image, stopwords=stopWords)
        w.generate("".join(jieba.lcut(usefulMsg)))

        w.to_file("msg_cloud.png")
Published 131 original articles · won praise 81 · views 60000 +

Guess you like

Origin blog.csdn.net/weixin_43469047/article/details/103950326