20230811 Use python3 to batch merge Chinese and English subtitles in SRT format under WIN11

20230811 Use python3 to batch merge Chinese and English subtitles in SRT format under WIN112023
/8/11 8:35


Origin: Merge the simplified Chinese subtitles translated by Google and the English subtitles in SRT format obtained by Silhouette/RP2023 directly through voice OCR into Chinese and English subtitles!
Since the UTF8 encoding problem has been solved, this article will not provide the merging problem of ANSI/GBK encoding!
Interested students can refer to the modifications!


1. Review: J:\! ! ! ! Documentation 20230625\en2cn\20230809 Use python3 to batch convert TXT files to SRT files under WIN10
utf8txt2srt3all.py

# coding=utf-8
import os

# Get the current directory
path = os.getcwd()
# View all files in the current directory files
= os.listdir(path)

# Traverse all files
for file in files:
    # Determine whether the file is a txt file
    if file.endswith('.txt'):
        # Construct a new file name
        #new_file = file.replace('.txt', '.json')
        #new_file = file.replace('.txt', '.srt')
        new_file = file.replace('.txt', '.cn.srt')
        #cn+en_file = file.replace('.txt', ' .cn+en.srt')
        cn3en_file = file.replace('.txt', '.cn+en.srt')
        #en4cn_file = file.replace('.txt', '.en+cn.srt')
        orig_srt_en_file = file.replace('.txt', '.srt')
        
        # Rename the file
        #os.rename(os.path.join(path, file), os.path.join(path,new_file))
        
        
        #f2=open(new_file,"wb")
        f2 = open(new_file, "w", encoding="UTF-8")
        f3 = open(cn3en_file, "w", encoding="UTF-8")
        
        
        temp = 1
        xuhao = 1;
        
        with open(file, "r", encoding="UTF-8") as f:
            lines = f.readlines()
        
        for line in lines:
            if temp == 1:
                f2.write(str(xuhao))
                f2.write(str('\n'))
                
                f3.write(str(xuhao))
                f3.write(str('\n'))
                temp=0
            else:
                if len(line) == 1:
                    temp=1
                    xuhao = xuhao+1
                f2.write(line)
                
                f3.write(line)

        f2.close()
        f3.close()


2. Preface: An
accidental discovery: For SRT subtitles, it seems that the player will rearrange/sort the order of the timeline!
In other words, you can put the first subtitles at the end without any problem!

# coding=utf-8
import os

# Get the current directory
path = os.getcwd()
# View all files in the current directory files
= os.listdir(path)

# Traverse all files
for file in files:
    # Determine whether the file is a txt file
    if file.endswith('.txt'):
        # Construct a new file name
        #new_file = file.replace('.txt', '.json')
        #new_file = file.replace('.txt', '.srt')
        new_file = file.replace('.txt', '.cn.srt')
        #cn+en_file = file.replace('.txt', ' .cn+en.srt')
        cn3en_file = file.replace('.txt', '.cn+en.srt')
        #en4cn_file = file.replace('.txt', '.en+cn.srt')
        orig_srt_en_file = file.replace('.txt', '.srt')
        
        # Rename the file
        #os.rename(os.path.join(path, file), os.path.join(path,new_file))
        
        
        #f2=open(new_file,"wb")
        f2 = open(new_file, "w", encoding="UTF-8")
        f3 = open(cn3en_file, "w", encoding="UTF-8")
        
        
        temp = 1
        xuhao = 1;
        
        with open(file, "r", encoding="UTF-8") as f:
            lines = f.readlines()
        
        print(lines)
        
        with open(orig_srt_en_file, "r", encoding="UTF-8") as f3_en:
            lines_en = f3_en.readlines()
        
        print(lines_en)
        
        for line in lines:
            if temp == 1:
                f2.write(str(xuhao))
                f2.write(str('\n'))
                
                f3.write(str(xuhao))
                f3.write(str('\n'))
                temp=0
            else:
                if len(line) == 1:
                    temp=1
                    xuhao = xuhao+1
                f2.write(line)
                
                f3.write(line)

        
        #for line in lines:
        for line_en in lines_en:
            if temp == 1:
                #f2.write(str(xuhao))
                #f2.write(str('\n'))
                
                f3.write(str(xuhao))
                f3.write(str('\n'))
                temp=0
            else:
                #if len(line) == 1:
                if len(line_en) == 1:
                    temp=1
                    xuhao = xuhao+1
                #f2.write(line)
                
                f3.write(line_en)
                
        
        f2.close()
        f3.close()


Result:
1
00:02:12,766 --> 00:02:16,099
Tyler Bob and Marian Tyler are with me

2
00:02:16,100 --> 00:02:16,900
one second


1138
01:23:55,966 --> 01:23:58,133
I love you

1139
01:24:13,700 --> 01:24:15,133
for me

1140
00:02:12,766 --> 00:02:16,099
Taylor Bob and Marianne Taylor here with me

1141
00:02:16,100 --> 00:02:16,900
one second


2277
01:23:55,966 --> 01:23:58,133
i love you

2278
01:24:13,700 --> 01:24:15,133
for me


3. Now it is necessary to merge Chinese and English subtitles.
English subtitles: August 7.srt
1
00:02:12,766 --> 00:02:16,099
Taylor Bob and Marianne Taylor here with me

2
00:02:16,100 --> 00:02:16,900
one second


Chinese subtitles: August 7.cn.srt
1
00:02:12,766 --> 00:02:16,099
Taylor Bob and Marian Taylor are with me

2
00:02:16,100 --> 00:02:16,900
one second


The combined result:
1
00:02:12,766 --> 00:02:16,099
Taylor Bob and Marianne Taylor
here with me

2
00:02:16,100 --> 00:02:
16,900one
second

 

 


Problem points:
1. There is a problem with the Simplified Chinese SRT format subtitles of Google Translate, and they need to be translated! Needs correction: utf8txt2srt3all.py
2. Add English subtitles, but need to remove three lines: space line, serial number line, and timeline line!


cn5en6utf8txt2srt3all.py

# coding=utf-8
import os

# Get the current directory
path = os.getcwd()
# View all files in the current directory files
= os.listdir(path)

# Traverse all files
for file in files:
    # Determine whether the file is a txt file
    if file.endswith('.txt'):
        # Construct a new file name
        #new_file = file.replace('.txt', '.json')
        #new_file = file.replace('.txt', '.srt')
        new_file = file.replace('.txt', '.cn.srt')
        #cn+en_file = file.replace('.txt', ' .cn+en.srt')
        cn3en_file = file.replace('.txt', '.cn+en.srt')
        #en4cn_file = file.replace('.txt', '.en+cn.srt')
        orig_srt_en_file = file.replace('.txt', '.srt')
        
        # Rename the file
        #os.rename(os.path.join(path, file), os.path.join(path,new_file))
        
        
        #f2=open(new_file,"wb")
        f2 = open(new_file, "w", encoding="UTF-8")
        f3 = open(cn3en_file, "w", encoding="UTF-8")
        
        
        temp = 1
        xuhao = 1;
        
        with open(file, "r", encoding="UTF-8") as f:
            lines = f.readlines()
        
        #print(lines)
        
        #with open(orig_srt_en_file, "r", encoding="UTF-8") as f3_en:
        #    lines_en = f3_en.readlines()
        f3_en = open(orig_srt_en_file, "r", encoding="UTF-8")
        
        #print(lines_en)
        
        for line in lines:
        #for line_en in lines_en:
        #for line, line_en in lines, lines_en:
            #line = f.readline() 
            line_en = f3_en.readline()
            
            # 2023/8/11 9:08 Serial number arrangement of SRT after Google translation
            if temp == 1:
                f2.write(str(xuhao))
                f2.write(str('\n'))
                
                f3.write(str(xuhao))
                f3.write(str('\n'))
                #temp =0
                temp=2
                
                #f3.write(str(xuhao))
                #f3.write(str('\n'))
                #temp=0
            else:
                # 2023/8/11 9:09 Found the space of SRT
                if len( line) == 1:
                    temp=1
                    xuhao = xuhao+1
                f2.write(line)
                # 2023/8/11 9:20 Chinese and English SRT subtitles must also be written!
                f3.write(line)
                    
                # 2023/8/11 9:34 Chinese and English subtitles timeline after removing blank lines
                if temp == 2:
                    temp = 3
                #else if temp == 2:
                elif temp == 3:
                    f3.write(line_en)
        
        
        f2. close()
        f3.close()

 

 

 


4. The third method:
English first, Chinese second!
J:\! ! ! ! Document organization 20230625\en2cn\20230811 Use python3 to batch merge Chinese and English subtitles in SRT format under WIN11\utf8i TXT\en6cn5utf8txt2srt3all.py

# coding=utf-8
import os

# Get the current directory
path = os.getcwd()
# View all files in the current directory files
= os.listdir(path)

# Traverse all files
for file in files:
    # Determine whether the file is a txt file
    if file.endswith('.txt'):
        # Construct a new file name
        #new_file = file.replace('.txt', '.json')
        #new_file = file.replace('.txt', '.srt')
        new_file = file.replace('.txt', '.cn.srt')
        #cn+en_file = file.replace('.txt', ' .cn+en.srt')
        cn3en_file = file.replace('.txt', '.cn+en.srt')
        en4cn_file = file.replace('.txt', '.en+cn.srt')
        #en4cn_file = file.replace('.txt', '.en+cn.srt')
        orig_srt_en_file = file.replace('.txt', '.srt')
        
        # Rename file
        #os.rename(os.path.join(path, file), os.path.join(path, new_file))
        
        
        #f2=open(new_file,"wb")
        f2 = open(new_file, "w", encoding="UTF-8")
        f3 = open(cn3en_file, "w", encoding="UTF-8")
        f4 = open(en4cn_file, "w", encoding="UTF-8")
        
        
        temp = 1
        xuhao = 1;
        
        with open(file, "r", encoding="UTF-8") as f:
            lines = f.readlines()
        
        #print(lines)
        
        #with open(orig_srt_en_file, "r", encoding="UTF-8") as f3_en:
        #    lines_en = f3_en.readlines()
        f3_en = open(orig_srt_en_file, "r", encoding="UTF-8")
        
        #print(lines_en)
        
        for line in lines:
        #for line_en in lines_en:
        #for line, line_en in lines, lines_en:
            #line = f.readline() 
            line_en = f3_en.readline()
            
            f4.write(line_en)
            
            # 2023/8/11 9:08 Serial number arrangement of SRT after google translation
            if temp == 1:
                f2.write(str(xuhao))
                f2.write(str ('\n'))
                
                f3.write(str(xuhao))
                f3.write(str('\n'))
                
                #f4.write(str(xuhao))
                #f4.write(str('\n') )
                #temp=0
                temp=2
                
                #f3.write(str(xuhao))
                #f3.write(str('\n'))
                #temp=0
            else:
                # 2023/8/11 9:09 Found SRT spaces
                if len(line) == 1:
                    temp=1
                    xuhao = xuhao+1
                f2.write(line)
                # 2023/8/11 9:20 Chinese and English SRT subtitles must also be written!
                f3.write(line)
                    
                # 2023/8/11 9:34 Chinese and English subtitles timeline after removing blank lines
                if temp == 2:
                    temp = 3
                #else if temp == 2:
                elif temp == 3:
                    f3. write(line_en)
                    f4.write(line)
        
        
        f2.close()
        f3.close()


 

 

 

Guess you like

Origin blog.csdn.net/wb4916/article/details/132227216