nonlinear

 

 

 

 

 

from aip import AipSpeech

bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A']
APP_ID, API_KEY, SECRET_KEY = bd_k_l


def gen_bd_mp3(uid, str_):
    mp3_dir = 'C:\\Users\\sas\\PycharmProjects\\produce_video\\mymp3\\'
    mp3_dir = 'D: \\ mymp3 \\'
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    result = client.synthesis(str_, 'zh', 1, {
        'vol': 5,
    })
    # If the recognition is correct and the voice binary error is returned, dict is returned. Refer to the following error code
    if not isinstance(result, dict):
        f_w = '{}{}{}{}'.format(mp3_dir, 'semHAND', uid, '.mp3')
        #  with open('auido.b.mp3', 'wb') as f:
        with open(f_w, 'wb') as f:
            f.write(result)


import them

os_sep = os.sep
this_file_abspath = os.path.abspath(__file__)
this_file_dirname, this_file_name = os.path.dirname(this_file_abspath), os.path.abspath(__file__).split(os_sep)[-1]

uid_d = []
f, uid_n = 'html.parp.txt', 0
you_l = ['。', '\n']

whole_s = ''
with open(f, 'r', encoding='utf-8') as fr:
    for i in fr:
        whole_s = '{}{}'.format(whole_s, i)

# Set the glyph of the clause; it can be modified according to actual needs
# cutlist = "。!?".decode('utf-8')

cutlist = ['\n', '\t', '。', ';', '?', '.', ';', '?', '...', '、、、', ':', '!', '!']
cutlist = ['\n', '\t', '。', ';', '?', '.', '?', '...', '、、、', '!', '!']


# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',']
# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、']


# A function to check if a character is a clause glyph; if so, return True, otherwise return False
def FindToken(cutlist, char):
    if char in cutlist:
        return True
    else:
        return False


# The core function of the clause
def Cut(cutlist, lines): # Parameter 1: Quoted clause marker; Parameter 2: The text to be segmented, which is a line of Chinese characters
    l = [] # Sentence list, used to store the content of the entire sentence after a single sentence is successful, which is the return value of the function
    line = [] # Temporary list used to store each character before the clause marker is captured. Once the clause symbol is found, all its contents will be assigned to l, and then it will be emptied

    for i in lines: # Check each character in function parameter 2 one by one (in this function, it will be better if you swap the positions of if and else)
        if FindToken(cutlist, i): # if the current character is a clause
            line.append(i) # put this character into a temporary list
            l.append(''.join(line)) # and add the contents of the current temporary list to the sentence list
            line = [] # Empty the list of symbols for the next clause
        else: # If the current character is not a clause, put the character directly into the temporary list
            line.append(i)
    return l


sentence_l = Cut(list(cutlist), list(whole_s))
sentence_l_noblank = []
for i in sentence_l:
    if i != '\n':
        sentence_l_noblank.append(i.replace('\n', ''))

'''
Segment natural sentences with correct punctuation
'''

'''
Generate sounds in units of sentences
'''

from aip import AipSpeech

bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A']
APP_ID, API_KEY, SECRET_KEY = bd_k_l


def gen_bd_mp3(uid, str_):
    mp3_dir = 'C:\\Users\\sas\\PycharmProjects\\produce_video\\mymp3\\'
    mp3_dir = 'D: \\ mymp3 \\'
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    result = client.synthesis(str_, 'zh', 1, {
        'vol': 5,
    })
    # If the recognition is correct and the voice binary error is returned, dict is returned. Refer to the following error code
    if not isinstance(result, dict):
        f_w = '{}{}{}{}'.format(mp3_dir, 'oneSen', uid, '.mp3')
        #  with open('auido.b.mp3', 'wb') as f:
        with open(f_w, 'wb') as f:
            f.write(result)


uid_d = {}
uid, uid_n = 43020157, 0
uid_d[uid] = {}
uid_d[uid]['sen_d'], uid_d[uid]['img_l'] = {}, []
for i in sentence_l_noblank:
    uid_sen = '{}{}{}'.format(uid, '_', uid_n)
    # gen_bd_mp3(uid_sen, i)
    uid_n += 1
    mp3_dir = 'D: \\ mymp3 \\'
    f_w = '{}{}{}{}'.format(mp3_dir, 'oneSen', uid_sen, '.mp3')
    uid_d[uid]['sen_d'][f_w] = i

import glob

f_img_d = '{}{}{}{}{}'.format(this_file_dirname, os_sep, 'mypng', os_sep, '*.jpg')
imgs = glob.glob(f_img_d)
uid_d[uid]['img_l'] = []
for i in imgs:
    if 'logo' in i:
        if 'uid' in i:
            #      print(i)
            uid_d[uid]['img_logo_uid'] = i
        else:
            uid_d[uid]['img_logo_our'] = i
    else:
        uid_d[uid]['img_l'].append(i)

for i in uid_d[uid]:
    print(i)

import os, time, glob
import cv2

os_sep = os.sep
this_file_abspath = os.path.abspath(__file__)
this_file_dirname, this_file_name = os.path.dirname(this_file_abspath), os.path.abspath(__file__).split(os_sep)[-1]

logo_f, logo_f_uid, imgs = uid_d[uid]['img_logo_our'], uid_d[uid]['img_logo_uid'], uid_d[uid]['img_l']
img_size_d = {}
for i in imgs:
    img = cv2.imread(i)
    w_h_s = '{},{}'.format(img.shape[1], img.shape[0])
    if w_h_s not in img_size_d:
        img_size_d[w_h_s] = 1
    else:
        img_size_d[w_h_s] += 1

# take mode
mode_img_size_wh = [int(i) for i in
                    sorted(img_size_d.items(), key=lambda mytuple: mytuple[1], reverse=True)[0][0].split(',')]
mode_img_size_wh = [1208, 720]
os_sep = os.sep

import imageio

imageio.plugins.ffmpeg.download()
from moviepy.editor import VideoFileClip
# f_mp3 = 'g3dbG3g3uidnoBRBlankLine.06.mp3'
import mutagen.id3
from mutagen.easyid3 import EasyID3
from mutagen.mp3 import MP3
# EasyID3.valid_keys["comment"] = "COMM::'XXX'"
# id3info = MP3(f_mp3, ID3=EasyID3)
# t_spend = id3info.info.length
import cv2
import glob


def resize_rescale_pilimg(img_f, w_h_tuple=(mode_img_size_wh[0], mode_img_size_wh[1]), mid_factor=1):
    img_n, img_type = img_f.split('.')[-2], img_f.split('.')[-1]
    img_n_resize_rescale_pilimg_dir = '{}{}{}'.format(os_sep.join(img_n.split(os_sep)[:-1]), 'resize_rescale_pilimg',
                                                      os_sep, img_n.split(os_sep)[-1], os_sep)
    img_n_resize_rescale_pilimg = '{}{}{}'.format(img_n_resize_rescale_pilimg_dir, img_n.split(os_sep)[-1], '.PNG')
    img_type = 'PNG'
    img_f_new = img_n_resize_rescale_pilimg
    mid_icon = Image.open(img_f)
    mid_icon_w, mid_icon_h = w_h_tuple[0] * mid_factor, w_h_tuple[1] * mid_factor
    mid_icon = mid_icon.resize((mid_icon_w, mid_icon_h), Image.ANTIALIAS)
    mid_icon.save(img_n_resize_rescale_pilimg, img_type)
    return img_f_new


from PIL import Image, ImageDraw, ImageFont

myfont = ImageFont.truetype("simhei.ttf", encoding="utf-8")
import cv2
import numpy as np
import math

br_step = math.floor((mode_img_size_wh[0]) * 0.0185)


def gen_video(os_delay_factor=0.046, bear_error_second=0.05):
    audio_spend = 0
    f_v = '{}{}{}{}'.format('D:\\myv\\', uid, int(time.time()), '.avi')
    fps, fourcc = 15, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
    videoWriter = cv2.VideoWriter(f_v, fourcc, fps, (mode_img_size_wh[0], mode_img_size_wh[1]))
    img_seq = 0
    for f_mp3 in uid_d[uid]['sen_d']:
        screen_str = uid_d[uid]['sen_d'][f_mp3]
        screen_str_l = []
        br_step_times = math.ceil(len(screen_str) / br_step)

        for i in range(br_step_times):
            myrow = screen_str[i * br_step:(i + 1) * br_step]
            screen_str_l.append(myrow)
        screen_str = '\n'.join(screen_str_l)


        imgname = uid_d[uid]['img_l'][img_seq % len(uid_d[uid]['img_l'])]
        img_seq += 1
        frame = cv2.imread(imgname)
        if (frame.shape[1], frame.shape[0]) != (mode_img_size_wh[0], mode_img_size_wh[1]):
            imgname = resize_rescale_pilimg(imgname)
            frame = cv2.imread(imgname)
        else:
            pass

        img1 = cv2.imread(imgname) # load image
        img2 = cv2.imread(logo_f)  # logo
        rows, cols, channels = img2.shape
        roi = img1[0:rows, 0:cols] # Take this area of ​​img1 to process
        img2gray = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY) # Create a binary image of the logo, and also create the opposite binary image
        ret, mask = cv2.threshold(img2gray, 175, 255, cv2.THRESH_BINARY) # binarization
        mask_inv = cv2.bitwise_not(mask) # Do non-operation, black becomes white, white becomes black, black 0, white 255
        img1_bg = cv2.bitwise_and(roi, roi, mask=mask) # And operation, parameter input, output, and operation with mask, black is filled
        img2_fg = cv2.bitwise_and(img2, img2, mask=mask_inv) # AND operation
        dst = cv2.add(img1_bg, img2_fg) # add
        img1[0:rows, 0:cols] = dst # Assign the area where the logo is added back to the original place

        img3 = cv2.imread(logo_f_uid)  # logo
        rows, cols, channels = img3.shape
        rows1, cols1, channels1 = img1.shape
        roi = img1[0:rows, cols1 - cols:cols1] # Take this area of ​​img1 to process
        img3gray = cv2.cvtColor(img3, cv2.COLOR_BGR2GRAY) # Create a binary image of the logo, and also create the opposite binary image
        ret, mask = cv2.threshold(img3gray, 175, 255, cv2.THRESH_BINARY) # binarization
        mask_inv = cv2.bitwise_not(mask) # Do non-operation, black becomes white, white becomes black, black 0, white 255
        img1_bg = cv2.bitwise_and(roi, roi, mask=mask) # And operation, parameter input, output, and operation with mask, black is filled
        img3_fg = cv2.bitwise_and(img3, img3, mask=mask_inv)  # 与操作
        dst = cv2.add(img1_bg, img3_fg) # add
        img1[0:rows, cols1 - cols:cols1] = dst # Assign the area where the logo is added back to the original place

        frame = img1
        frame_cv2 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_pil = Image.fromarray(frame_cv2) # Convert to PIL image format
        font_size = math.floor((mode_img_size_wh[0]) * 0.040)
        font = ImageFont.truetype("simhei.ttf", font_size, encoding="utf-8")
        f_x, f_y = math.floor((mode_img_size_wh[0]) * 0.06), math.floor(
            mode_img_size_wh[1] * 0.85) - br_step_times * font_size
        ImageDraw.Draw(frame_pil).text((f_x, f_y), screen_str, (255, 0, 0), font)
        frame_cv2 = cv2.cvtColor(np.array(frame_pil), cv2.COLOR_RGB2BGR)
        img = frame_cv2

        EasyID3.valid_keys["comment"] = "COMM::'XXX'"
        id3info = MP3(f_mp3, ID3=EasyID3)
        t_spend = id3info.info.length

        audio_spend += t_spend
        print(audio_spend)
        myinterval = t_spend * os_delay_factor
        print(myinterval, '---------------', screen_str)
        this_time = time.time()
        while time.time() - this_time < myinterval:
            videoWriter.write(img)
    videoWriter.release()
    time.sleep(1)

    print(f_v)
    video_playtime = VideoFileClip(f_v).duration
    print(math.fabs(video_playtime - audio_spend ))
    if math.fabs(video_playtime - audio_spend )>bear_error_second:
        os_delay_factor *= audio_spend / video_playtime
        gen_video(os_delay_factor, bear_error_second=0.05)
    else:
        os._exit(123)
gen_video(os_delay_factor=0.001, bear_error_second=0.05)
ddd = 9

  

The premise of the above code is that the system is linear 

 

 

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324576678&siteId=291194637