Python高亮文本中的关键词

版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接: https://blog.csdn.net/Yellow_python/article/details/100516921

print

全部高亮

w = '比赛'
t = '比赛开始没多久就结束了比赛,现在没有比赛'

def replace_color(text, word):
    new_word = '\033[031m' + word + '\033[0m'  # red
    len_w = len(word)
    len_t = len(text)
    for i in range(len_t - len_w, -1, -1):
        if text[i: i + len_w] == word:
            text = text[:i] + new_word + text[i + len_w:]
    return text

print(t)
print(replace_color(t, w))

单个高亮

from jieba import tokenize

text = '我用小米手机订购了一袋小米'
entity = '小米'

replace_color = lambda word: '\033[033m' + word + '\033[0m'
replace_word = lambda sentence, word, head, tail: sentence[:head] + word + sentence[tail:]

for word, head, tail in tokenize(text):
    if word == entity:
        word = replace_color(word)
        print(replace_word(text, word, head, tail))

mysql高亮查询

def highlight(self, field, table, keyword, n=99):
    sql = "SELECT %s FROM %s WHERE INSTR(%s,'%s')>0;" % (field, table, field, keyword)
    for i in self.fetchone(sql, n):
        text = i[0]
        highlight_word = '\033[031m' + keyword + '\033[0m'  # red
        len_w = len(keyword)
        len_t = len(text)
        for i in range(len_t - len_w, -1, -1):
            if text[i: i + len_w] == keyword:
                text = text[:i] + highlight_word + text[i + len_w:]
        print(text)

html

  • py文件
def replace_html_tag(text, word):
    new_word = '<font color="red">' + word + '</font>'
    len_w = len(word)
    len_t = len(text)
    for i in range(len_t - len_w, -1, -1):
        if text[i: i + len_w] == word:
            text = text[:i] + new_word + text[i + len_w:]
    return text


def save_html(ls_of_ls, prefix):
    fname = prefix + '.html'
    with open(fname, 'w', encoding='utf-8') as f:
        f.write('<html><head><meta charset="UTF-8"></head><body><table border="1">\n')
        for ls in ls_of_ls:
            f.write('<tr>')
            for i in ls:
                f.write('<td><font size="4">{}</font></td>'.format(i))
            f.write('</tr>\n')
        f.write('</table></body></html>')


texts = ['深扣菊花舔指笑', '菊花菊花一闪闪', '接天莲叶无穷碧', '硬日菊花别样红']
word = '菊花'

ls_of_ls = []
for text in texts:
    ls_of_ls.append([word, replace_html_tag(text, word)])
save_html(ls_of_ls, word)
  • 生成的html代码
<html><head><meta charset="UTF-8"></head><body><table border="1">
<tr><td><font size="4">菊花</font></td><td><font size="4">深扣<font color="red">菊花</font>舔指笑</font></td></tr>
<tr><td><font size="4">菊花</font></td><td><font size="4"><font color="red">菊花</font><font color="red">菊花</font>一闪闪</font></td></tr>
<tr><td><font size="4">菊花</font></td><td><font size="4">接天莲叶无穷碧</font></td></tr>
<tr><td><font size="4">菊花</font></td><td><font size="4">硬日<font color="red">菊花</font>别样红</font></td></tr>
</table></body></html>
  • html展示
菊花 深扣菊花舔指笑
菊花 菊花菊花一闪闪
菊花 接天莲叶无穷碧
菊花 硬日菊花别样红

存excel

from pandas import DataFrame
from jieba import tokenize
from xlwings import App

replace_word = lambda word: '【' + word + '】'
replace_sentence = lambda sentence, word, head, tail: sentence[:head] + word + sentence[tail:]


def ner(text):
    for sentence in text.split(','):  # 切句
        for word, head, tail in tokenize(sentence):  # 分词+位置
            if word in {'小米', '苹果'}:  # NER
                yield [
                    text,
                    replace_sentence(sentence, replace_word(word), head, tail),
                    word,
                ]


def lss2excel(ls_of_ls, columns, fname):
    DataFrame(ls_of_ls, columns=columns).to_excel(fname, index=False)


def merge_cells(fname):
    # 打开excel
    app = App(add_book=False, visible=False)
    # 关闭警告
    app.display_alerts = False
    # 打开book
    book = app.books.open(fname)
    try:
        # 打开sheet
        for sheet in book.sheets:
            # 当前区域
            current_region = sheet.cells(1, 1).current_region
            # 列宽
            current_region.column_width = 16
            # 字体格式
            current_region.api.Font.Size = 9
            # 最后一个单元格(的行)
            last_row_index = current_region.last_cell.row
            # 合并单元格
            i = 2
            while i < last_row_index:
                for j in range(i + 1, last_row_index + 2):
                    if sheet.cells(i, 1).value != sheet.cells(j, 1).value:
                        cells = sheet.range('A{}:A{}'.format(i, j - 1)).api
                        cells.MergeCells = True  # 合并
                        cells.WrapText = True  # 换行
                        i = j
    except Exception as e:
        print('\033[031m{}\033[0m'.format(e))
    # 开启警告
    app.display_alerts = True
    # 保存
    book.save()
    # 关闭excel
    app.quit()


fname = '手机.xlsx'
fields = ['text', 'phrase', 'word']
texts = ['买小米机,送了袋小米和苹果', '诺基亚', '买华为送苹果']
ls_of_ls = [ls for text in texts for ls in ner(text)]
lss2excel(ls_of_ls, fields, fname)
merge_cells(fname)

猜你喜欢

转载自blog.csdn.net/Yellow_python/article/details/100516921