from PIL import Image, ImageDraw, ImageFont, ImageFilter
import random
import glob
import numpy as np
import os
import cv2
from nespaper_semantics import seg_str
'''
1. 从文字库随机选择10个字符
2. 生成图片
3. 随机使用函数
'''
# 从字库中随机选择n个字符
def sto_choice_from_info_str(info_str,quantity):
start = random.randint(0, len(info_str)-(quantity+1))
end = start + quantity
random_word = info_str[start:end]
return random_word
def random_word_color():
font_color_choice = [[10, 10, 10], [5, 5, 5], [0, 0, 0]]
font_color = random.choice(font_color_choice)
noise = np.array([random.randint(0,2), random.randint(0,2), random.randint(0,2)])
font_color = (np.array(font_color) + noise).tolist()
return tuple(font_color)
# 生成一张背景图片
def create_an_image(bground_path, width, height):
bground_list = os.listdir(bground_path)
bground_choice = random.choice(bground_list)
bground = cv2.imread(os.path.join(bground_path, bground_choice))
x, y = random.randint(0, bground.shape[1]-width), random.randint(0, bground.shape[0]-height)
bground = bground[y:y+height, x:x+width, :]
return bground
# 随机选取文字贴合起始的坐标, 根据背景的尺寸和字体的大小选择
def random_x_y(bground_size, font_size):
height, width, _ = bground_size
print('bground_size:', bground_size)
print('font_size:', font_size)
# 为防止文字溢出图片,x,y要预留宽高
# x = random.randint(0, width - font_size * 10)
# y = random.randint(0, int((height-font_size)/4))
"""====notice notice===="""
#10个字要减140 9个字要减100 8个字要减80 7个字要减40 6个字要减20 5个字以下不用减
x = random.randint(3, int((width - font_size) / 2))
y = random.randint(10, height - font_size * 7)
return x, y
def random_font_size():
font_size = random.randint(22,25)
return font_size
def random_font(font_path):
font_list = os.listdir(font_path)
random_font = random.choice(font_list)
return os.path.join(font_path, random_font)
def add_white_noise(image):
rows, cols, dims = image.shape
random_index=random.randint(10,100)
for i in range(random_index):
x = np.random.randint(2, rows)
y = np.random.randint(2, cols)
if random.getrandbits(1):
image[x-1:x+1, y-1:y+1, :] = 180
else:
image[x, y, :] = 180
return image
def main(infostr, save_path, num,words_length,width,height,back_path,font_path):
# 随机选取5个字符
random_word_ori = sto_choice_from_info_str(infostr, words_length)
print('random_word_ori:', random_word_ori)
random_word = ''.join([i+'\n' for i in random_word_ori if i not in [':','(',')','「','」']])
print('random_word:\n', random_word)
# # 生成一张背景图片,已经剪裁好,宽高为280*32
raw_image = create_an_image(back_path, width, height)
# 随机选取字体大小
font_size = random_font_size()
# 随机选取字体
font_name = random_font(font_path)
print('font_name:', font_name)
# 随机选取字体颜色
font_color = random_word_color()
# 随机选取文字贴合的起始坐标 x,y
draw_x, draw_y = random_x_y(raw_image.shape, font_size)
# 将文本贴到背景图片
raw_image = Image.fromarray(raw_image[..., ::-1])
font = ImageFont.truetype(font_name, font_size)
draw = ImageDraw.Draw(raw_image)
draw.text((draw_x, draw_y), random_word, fill=font_color, font=font)
raw_image = raw_image.rotate(0.3)
image = add_white_noise(np.array(raw_image))
# cv2.imwrite('image.jpg', image)
# 保存文本信息和对应图片名称
img_name = save_path+'/'+'fake_'+str(words_length)+'_'+str(num)+'.jpg'
cv2.imwrite(img_name, image)
with open(img_name.replace('.jpg','.txt'), 'w', encoding='utf-8') as file:
[file.write(val) for val in random_word_ori]
def make_fake_data(total):
#背景图片地址
back_path = './background'
#字体地址
font_path = './font'
#语料库 #从make_fake_word_library函数得到得到的语料库
info_str = seg_str
print('len(info_str):', len(info_str))
# 生成的字符个数
words_length = 3
output_path = 'data_set/fake_one_'+str(words_length)
if not os.path.exists(output_path):
os.mkdir(output_path)
if words_length < 6:
width=32
height=200
elif 6<=words_length<7:
width = 32
height = 220
elif 7<=words_length<8:
width = 32
height = 240
elif 8 <= words_length < 9:
width = 32
height = 280
elif 9 <= words_length < 10:
width = 32
height = 300
else:
width = 32
height = 340
for i,num in enumerate(range(0, total)):
if i<1:
main(info_str, output_path, num, words_length, width, height,back_path,font_path)
if num % 1000 == 0:
print('[%d/%d]' % (num, total))
#根据标注的报纸制作语义库
def make_fake_word_library():
import pandas as pd
train_path = './label/train.txt'
val_path = './label/val.txt'
train_names = np.array(pd.read_csv(train_path, header=None))
val_names = np.array(pd.read_csv(val_path, header=None))
Semantics_list=[]
for i,train_name in enumerate(train_names):
words = train_name[0].split(' ')[-1]
Semantics_list.append(words)
for i,val_name in enumerate(val_names):
words = val_name[0].split(' ')[-1]
Semantics_list.append(words)
print(len(Semantics_list))
print(Semantics_list[:2])
Semantics_str=''.join(Semantics_list)
print(len(Semantics_str))
print(Semantics_str)
if __name__ == '__main__':
#用报纸制作的假数据
make_fake_data(total=1000)
# make_fake_word_library()
background:
Font: https://download.csdn.net/download/fanzonghao/11866723
Output: