from aip import AipSpeech bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A'] APP_ID, API_KEY, SECRET_KEY = bd_k_l def gen_bd_mp3(uid, str_): mp3_dir = 'C:\\Users\\sas\\PycharmProjects\\produce_video\\mymp3\\' mp3_dir = 'D: \\ mymp3 \\' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) result = client.synthesis(str_, 'zh', 1, { 'vol': 5, }) # If the recognition is correct and the voice binary error is returned, dict is returned. Refer to the following error code if not isinstance(result, dict): f_w = '{}{}{}{}'.format(mp3_dir, 'semHAND', uid, '.mp3') # with open('auido.b.mp3', 'wb') as f: with open(f_w, 'wb') as f: f.write(result) import them os_sep = os.sep this_file_abspath = os.path.abspath(__file__) this_file_dirname, this_file_name = os.path.dirname(this_file_abspath), os.path.abspath(__file__).split(os_sep)[-1] uid_d = [] f, uid_n = 'html.parp.txt', 0 you_l = ['。', '\n'] whole_s = '' with open(f, 'r', encoding='utf-8') as fr: for i in fr: whole_s = '{}{}'.format(whole_s, i) # Set the glyph of the clause; it can be modified according to actual needs # cutlist = "。!?".decode('utf-8') cutlist = ['\n', '\t', '。', ';', '?', '.', ';', '?', '...', '、、、', ':', '!', '!'] cutlist = ['\n', '\t', '。', ';', '?', '.', '?', '...', '、、、', '!', '!'] # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',','] # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、'] # A function to check if a character is a clause glyph; if so, return True, otherwise return False def FindToken(cutlist, char): if char in cutlist: return True else: return False # The core function of the clause def Cut(cutlist, lines): # Parameter 1: Quoted clause marker; Parameter 2: The text to be segmented, which is a line of Chinese characters l = [] # Sentence list, used to store the content of the entire sentence after a single sentence is successful, which is the return value of the function line = [] # Temporary list used to store each character before the clause marker is captured. Once the clause symbol is found, all its contents will be assigned to l, and then it will be emptied for i in lines: # Check each character in function parameter 2 one by one (in this function, it will be better if you swap the positions of if and else) if FindToken(cutlist, i): # if the current character is a clause line.append(i) # put this character into a temporary list l.append(''.join(line)) # and add the contents of the current temporary list to the sentence list line = [] # Empty the list of symbols for the next clause else: # If the current character is not a clause, put the character directly into the temporary list line.append(i) return l sentence_l = Cut(list(cutlist), list(whole_s)) sentence_l_noblank = [] for i in sentence_l: if i != '\n': sentence_l_noblank.append(i.replace('\n', '')) ''' Segment natural sentences with correct punctuation ''' ''' Generate sounds in units of sentences ''' from aip import AipSpeech bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A'] APP_ID, API_KEY, SECRET_KEY = bd_k_l def gen_bd_mp3(uid, str_): mp3_dir = 'C:\\Users\\sas\\PycharmProjects\\produce_video\\mymp3\\' mp3_dir = 'D: \\ mymp3 \\' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) result = client.synthesis(str_, 'zh', 1, { 'vol': 5, }) # If the recognition is correct and the voice binary error is returned, dict is returned. Refer to the following error code if not isinstance(result, dict): f_w = '{}{}{}{}'.format(mp3_dir, 'oneSen', uid, '.mp3') # with open('auido.b.mp3', 'wb') as f: with open(f_w, 'wb') as f: f.write(result) uid_d = {} uid, uid_n = 43020157, 0 uid_d[uid] = {} uid_d[uid]['sen_d'], uid_d[uid]['img_l'] = {}, [] for i in sentence_l_noblank: uid_sen = '{}{}{}'.format(uid, '_', uid_n) # gen_bd_mp3(uid_sen, i) uid_n += 1 mp3_dir = 'D: \\ mymp3 \\' f_w = '{}{}{}{}'.format(mp3_dir, 'oneSen', uid_sen, '.mp3') uid_d[uid]['sen_d'][f_w] = i import glob f_img_d = '{}{}{}{}{}'.format(this_file_dirname, os_sep, 'mypng', os_sep, '*.jpg') imgs = glob.glob(f_img_d) uid_d[uid]['img_l'] = [] for i in imgs: if 'logo' in i: if 'uid' in i: # print(i) uid_d[uid]['img_logo_uid'] = i else: uid_d[uid]['img_logo_our'] = i else: uid_d[uid]['img_l'].append(i) for i in uid_d[uid]: print(i) import os, time, glob import cv2 os_sep = os.sep this_file_abspath = os.path.abspath(__file__) this_file_dirname, this_file_name = os.path.dirname(this_file_abspath), os.path.abspath(__file__).split(os_sep)[-1] logo_f, logo_f_uid, imgs = uid_d[uid]['img_logo_our'], uid_d[uid]['img_logo_uid'], uid_d[uid]['img_l'] img_size_d = {} for i in imgs: img = cv2.imread(i) w_h_s = '{},{}'.format(img.shape[1], img.shape[0]) if w_h_s not in img_size_d: img_size_d[w_h_s] = 1 else: img_size_d[w_h_s] += 1 # take mode mode_img_size_wh = [int(i) for i in sorted(img_size_d.items(), key=lambda mytuple: mytuple[1], reverse=True)[0][0].split(',')] mode_img_size_wh = [1208, 720] os_sep = os.sep import imageio imageio.plugins.ffmpeg.download() from moviepy.editor import VideoFileClip # f_mp3 = 'g3dbG3g3uidnoBRBlankLine.06.mp3' import mutagen.id3 from mutagen.easyid3 import EasyID3 from mutagen.mp3 import MP3 # EasyID3.valid_keys["comment"] = "COMM::'XXX'" # id3info = MP3(f_mp3, ID3=EasyID3) # t_spend = id3info.info.length import cv2 import glob def resize_rescale_pilimg(img_f, w_h_tuple=(mode_img_size_wh[0], mode_img_size_wh[1]), mid_factor=1): img_n, img_type = img_f.split('.')[-2], img_f.split('.')[-1] img_n_resize_rescale_pilimg_dir = '{}{}{}'.format(os_sep.join(img_n.split(os_sep)[:-1]), 'resize_rescale_pilimg', os_sep, img_n.split(os_sep)[-1], os_sep) img_n_resize_rescale_pilimg = '{}{}{}'.format(img_n_resize_rescale_pilimg_dir, img_n.split(os_sep)[-1], '.PNG') img_type = 'PNG' img_f_new = img_n_resize_rescale_pilimg mid_icon = Image.open(img_f) mid_icon_w, mid_icon_h = w_h_tuple[0] * mid_factor, w_h_tuple[1] * mid_factor mid_icon = mid_icon.resize((mid_icon_w, mid_icon_h), Image.ANTIALIAS) mid_icon.save(img_n_resize_rescale_pilimg, img_type) return img_f_new from PIL import Image, ImageDraw, ImageFont myfont = ImageFont.truetype("simhei.ttf", encoding="utf-8") import cv2 import numpy as np import math br_step = math.floor((mode_img_size_wh[0]) * 0.0185) def gen_video(os_delay_factor=0.046, bear_error_second=0.05): audio_spend = 0 f_v = '{}{}{}{}'.format('D:\\myv\\', uid, int(time.time()), '.avi') fps, fourcc = 15, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') videoWriter = cv2.VideoWriter(f_v, fourcc, fps, (mode_img_size_wh[0], mode_img_size_wh[1])) img_seq = 0 for f_mp3 in uid_d[uid]['sen_d']: screen_str = uid_d[uid]['sen_d'][f_mp3] screen_str_l = [] br_step_times = math.ceil(len(screen_str) / br_step) for i in range(br_step_times): myrow = screen_str[i * br_step:(i + 1) * br_step] screen_str_l.append(myrow) screen_str = '\n'.join(screen_str_l) imgname = uid_d[uid]['img_l'][img_seq % len(uid_d[uid]['img_l'])] img_seq += 1 frame = cv2.imread(imgname) if (frame.shape[1], frame.shape[0]) != (mode_img_size_wh[0], mode_img_size_wh[1]): imgname = resize_rescale_pilimg(imgname) frame = cv2.imread(imgname) else: pass img1 = cv2.imread(imgname) # load image img2 = cv2.imread(logo_f) # logo rows, cols, channels = img2.shape roi = img1[0:rows, 0:cols] # Take this area of img1 to process img2gray = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY) # Create a binary image of the logo, and also create the opposite binary image ret, mask = cv2.threshold(img2gray, 175, 255, cv2.THRESH_BINARY) # binarization mask_inv = cv2.bitwise_not(mask) # Do non-operation, black becomes white, white becomes black, black 0, white 255 img1_bg = cv2.bitwise_and(roi, roi, mask=mask) # And operation, parameter input, output, and operation with mask, black is filled img2_fg = cv2.bitwise_and(img2, img2, mask=mask_inv) # AND operation dst = cv2.add(img1_bg, img2_fg) # add img1[0:rows, 0:cols] = dst # Assign the area where the logo is added back to the original place img3 = cv2.imread(logo_f_uid) # logo rows, cols, channels = img3.shape rows1, cols1, channels1 = img1.shape roi = img1[0:rows, cols1 - cols:cols1] # Take this area of img1 to process img3gray = cv2.cvtColor(img3, cv2.COLOR_BGR2GRAY) # Create a binary image of the logo, and also create the opposite binary image ret, mask = cv2.threshold(img3gray, 175, 255, cv2.THRESH_BINARY) # binarization mask_inv = cv2.bitwise_not(mask) # Do non-operation, black becomes white, white becomes black, black 0, white 255 img1_bg = cv2.bitwise_and(roi, roi, mask=mask) # And operation, parameter input, output, and operation with mask, black is filled img3_fg = cv2.bitwise_and(img3, img3, mask=mask_inv) # 与操作 dst = cv2.add(img1_bg, img3_fg) # add img1[0:rows, cols1 - cols:cols1] = dst # Assign the area where the logo is added back to the original place frame = img1 frame_cv2 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_pil = Image.fromarray(frame_cv2) # Convert to PIL image format font_size = math.floor((mode_img_size_wh[0]) * 0.040) font = ImageFont.truetype("simhei.ttf", font_size, encoding="utf-8") f_x, f_y = math.floor((mode_img_size_wh[0]) * 0.06), math.floor( mode_img_size_wh[1] * 0.85) - br_step_times * font_size ImageDraw.Draw(frame_pil).text((f_x, f_y), screen_str, (255, 0, 0), font) frame_cv2 = cv2.cvtColor(np.array(frame_pil), cv2.COLOR_RGB2BGR) img = frame_cv2 EasyID3.valid_keys["comment"] = "COMM::'XXX'" id3info = MP3(f_mp3, ID3=EasyID3) t_spend = id3info.info.length audio_spend += t_spend print(audio_spend) myinterval = t_spend * os_delay_factor print(myinterval, '---------------', screen_str) this_time = time.time() while time.time() - this_time < myinterval: videoWriter.write(img) videoWriter.release() time.sleep(1) print(f_v) video_playtime = VideoFileClip(f_v).duration print(math.fabs(video_playtime - audio_spend )) if math.fabs(video_playtime - audio_spend )>bear_error_second: os_delay_factor *= audio_spend / video_playtime gen_video(os_delay_factor, bear_error_second=0.05) else: os._exit(123) gen_video(os_delay_factor=0.001, bear_error_second=0.05) ddd = 9
The premise of the above code is that the system is linear
’