版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
全部高亮
w = '比赛'
t = '比赛开始没多久就结束了比赛,现在没有比赛'
def replace_color(text, word):
new_word = '\033[031m' + word + '\033[0m' # red
len_w = len(word)
len_t = len(text)
for i in range(len_t - len_w, -1, -1):
if text[i: i + len_w] == word:
text = text[:i] + new_word + text[i + len_w:]
return text
print(t)
print(replace_color(t, w))
单个高亮
from jieba import tokenize
text = '我用小米手机订购了一袋小米'
entity = '小米'
replace_color = lambda word: '\033[033m' + word + '\033[0m'
replace_word = lambda sentence, word, head, tail: sentence[:head] + word + sentence[tail:]
for word, head, tail in tokenize(text):
if word == entity:
word = replace_color(word)
print(replace_word(text, word, head, tail))
mysql高亮查询
def highlight(self, field, table, keyword, n=99):
sql = "SELECT %s FROM %s WHERE INSTR(%s,'%s')>0;" % (field, table, field, keyword)
for i in self.fetchone(sql, n):
text = i[0]
highlight_word = '\033[031m' + keyword + '\033[0m' # red
len_w = len(keyword)
len_t = len(text)
for i in range(len_t - len_w, -1, -1):
if text[i: i + len_w] == keyword:
text = text[:i] + highlight_word + text[i + len_w:]
print(text)
存html
- py文件
def replace_html_tag(text, word):
new_word = '<font color="red">' + word + '</font>'
len_w = len(word)
len_t = len(text)
for i in range(len_t - len_w, -1, -1):
if text[i: i + len_w] == word:
text = text[:i] + new_word + text[i + len_w:]
return text
def save_html(ls_of_ls, prefix):
fname = prefix + '.html'
with open(fname, 'w', encoding='utf-8') as f:
f.write('<html><head><meta charset="UTF-8"></head><body><table border="1">\n')
for ls in ls_of_ls:
f.write('<tr>')
for i in ls:
f.write('<td><font size="4">{}</font></td>'.format(i))
f.write('</tr>\n')
f.write('</table></body></html>')
texts = ['深扣菊花舔指笑', '菊花菊花一闪闪', '接天莲叶无穷碧', '硬日菊花别样红']
word = '菊花'
ls_of_ls = []
for text in texts:
ls_of_ls.append([word, replace_html_tag(text, word)])
save_html(ls_of_ls, word)
- 生成的html代码
<html><head><meta charset="UTF-8"></head><body><table border="1">
<tr><td><font size="4">菊花</font></td><td><font size="4">深扣<font color="red">菊花</font>舔指笑</font></td></tr>
<tr><td><font size="4">菊花</font></td><td><font size="4"><font color="red">菊花</font><font color="red">菊花</font>一闪闪</font></td></tr>
<tr><td><font size="4">菊花</font></td><td><font size="4">接天莲叶无穷碧</font></td></tr>
<tr><td><font size="4">菊花</font></td><td><font size="4">硬日<font color="red">菊花</font>别样红</font></td></tr>
</table></body></html>
- html展示
菊花 | 深扣菊花舔指笑 |
菊花 | 菊花菊花一闪闪 |
菊花 | 接天莲叶无穷碧 |
菊花 | 硬日菊花别样红 |
存excel
from pandas import DataFrame
from jieba import tokenize
from xlwings import App
replace_word = lambda word: '【' + word + '】'
replace_sentence = lambda sentence, word, head, tail: sentence[:head] + word + sentence[tail:]
def ner(text):
for sentence in text.split(','): # 切句
for word, head, tail in tokenize(sentence): # 分词+位置
if word in {'小米', '苹果'}: # NER
yield [
text,
replace_sentence(sentence, replace_word(word), head, tail),
word,
]
def lss2excel(ls_of_ls, columns, fname):
DataFrame(ls_of_ls, columns=columns).to_excel(fname, index=False)
def merge_cells(fname):
# 打开excel
app = App(add_book=False, visible=False)
# 关闭警告
app.display_alerts = False
# 打开book
book = app.books.open(fname)
try:
# 打开sheet
for sheet in book.sheets:
# 当前区域
current_region = sheet.cells(1, 1).current_region
# 列宽
current_region.column_width = 16
# 字体格式
current_region.api.Font.Size = 9
# 最后一个单元格(的行)
last_row_index = current_region.last_cell.row
# 合并单元格
i = 2
while i < last_row_index:
for j in range(i + 1, last_row_index + 2):
if sheet.cells(i, 1).value != sheet.cells(j, 1).value:
cells = sheet.range('A{}:A{}'.format(i, j - 1)).api
cells.MergeCells = True # 合并
cells.WrapText = True # 换行
i = j
except Exception as e:
print('\033[031m{}\033[0m'.format(e))
# 开启警告
app.display_alerts = True
# 保存
book.save()
# 关闭excel
app.quit()
fname = '手机.xlsx'
fields = ['text', 'phrase', 'word']
texts = ['买小米机,送了袋小米和苹果', '诺基亚', '买华为送苹果']
ls_of_ls = [ls for text in texts for ls in ner(text)]
lss2excel(ls_of_ls, fields, fname)
merge_cells(fname)