20230811 Use python3 to batch merge Chinese and English subtitles in SRT format under WIN112023
/8/11 8:35
Origin: Merge the simplified Chinese subtitles translated by Google and the English subtitles in SRT format obtained by Silhouette/RP2023 directly through voice OCR into Chinese and English subtitles!
Since the UTF8 encoding problem has been solved, this article will not provide the merging problem of ANSI/GBK encoding!
Interested students can refer to the modifications!
1. Review: J:\! ! ! ! Documentation 20230625\en2cn\20230809 Use python3 to batch convert TXT files to SRT files under WIN10
utf8txt2srt3all.py
# coding=utf-8
import os
# Get the current directory
path = os.getcwd()
# View all files in the current directory files
= os.listdir(path)
# Traverse all files
for file in files:
# Determine whether the file is a txt file
if file.endswith('.txt'):
# Construct a new file name
#new_file = file.replace('.txt', '.json')
#new_file = file.replace('.txt', '.srt')
new_file = file.replace('.txt', '.cn.srt')
#cn+en_file = file.replace('.txt', ' .cn+en.srt')
cn3en_file = file.replace('.txt', '.cn+en.srt')
#en4cn_file = file.replace('.txt', '.en+cn.srt')
orig_srt_en_file = file.replace('.txt', '.srt')
# Rename the file
#os.rename(os.path.join(path, file), os.path.join(path,new_file))
#f2=open(new_file,"wb")
f2 = open(new_file, "w", encoding="UTF-8")
f3 = open(cn3en_file, "w", encoding="UTF-8")
temp = 1
xuhao = 1;
with open(file, "r", encoding="UTF-8") as f:
lines = f.readlines()
for line in lines:
if temp == 1:
f2.write(str(xuhao))
f2.write(str('\n'))
f3.write(str(xuhao))
f3.write(str('\n'))
temp=0
else:
if len(line) == 1:
temp=1
xuhao = xuhao+1
f2.write(line)
f3.write(line)
f2.close()
f3.close()
2. Preface: An
accidental discovery: For SRT subtitles, it seems that the player will rearrange/sort the order of the timeline!
In other words, you can put the first subtitles at the end without any problem!
# coding=utf-8
import os
# Get the current directory
path = os.getcwd()
# View all files in the current directory files
= os.listdir(path)
# Traverse all files
for file in files:
# Determine whether the file is a txt file
if file.endswith('.txt'):
# Construct a new file name
#new_file = file.replace('.txt', '.json')
#new_file = file.replace('.txt', '.srt')
new_file = file.replace('.txt', '.cn.srt')
#cn+en_file = file.replace('.txt', ' .cn+en.srt')
cn3en_file = file.replace('.txt', '.cn+en.srt')
#en4cn_file = file.replace('.txt', '.en+cn.srt')
orig_srt_en_file = file.replace('.txt', '.srt')
# Rename the file
#os.rename(os.path.join(path, file), os.path.join(path,new_file))
#f2=open(new_file,"wb")
f2 = open(new_file, "w", encoding="UTF-8")
f3 = open(cn3en_file, "w", encoding="UTF-8")
temp = 1
xuhao = 1;
with open(file, "r", encoding="UTF-8") as f:
lines = f.readlines()
print(lines)
with open(orig_srt_en_file, "r", encoding="UTF-8") as f3_en:
lines_en = f3_en.readlines()
print(lines_en)
for line in lines:
if temp == 1:
f2.write(str(xuhao))
f2.write(str('\n'))
f3.write(str(xuhao))
f3.write(str('\n'))
temp=0
else:
if len(line) == 1:
temp=1
xuhao = xuhao+1
f2.write(line)
f3.write(line)
#for line in lines:
for line_en in lines_en:
if temp == 1:
#f2.write(str(xuhao))
#f2.write(str('\n'))
f3.write(str(xuhao))
f3.write(str('\n'))
temp=0
else:
#if len(line) == 1:
if len(line_en) == 1:
temp=1
xuhao = xuhao+1
#f2.write(line)
f3.write(line_en)
f2.close()
f3.close()
Result:
1
00:02:12,766 --> 00:02:16,099
Tyler Bob and Marian Tyler are with me
2
00:02:16,100 --> 00:02:16,900
one second
1138
01:23:55,966 --> 01:23:58,133
I love you
1139
01:24:13,700 --> 01:24:15,133
for me
1140
00:02:12,766 --> 00:02:16,099
Taylor Bob and Marianne Taylor here with me
1141
00:02:16,100 --> 00:02:16,900
one second
2277
01:23:55,966 --> 01:23:58,133
i love you
2278
01:24:13,700 --> 01:24:15,133
for me
3. Now it is necessary to merge Chinese and English subtitles.
English subtitles: August 7.srt
1
00:02:12,766 --> 00:02:16,099
Taylor Bob and Marianne Taylor here with me
2
00:02:16,100 --> 00:02:16,900
one second
Chinese subtitles: August 7.cn.srt
1
00:02:12,766 --> 00:02:16,099
Taylor Bob and Marian Taylor are with me
2
00:02:16,100 --> 00:02:16,900
one second
The combined result:
1
00:02:12,766 --> 00:02:16,099
Taylor Bob and Marianne Taylor
here with me
2
00:02:16,100 --> 00:02:
16,900one
second
Problem points:
1. There is a problem with the Simplified Chinese SRT format subtitles of Google Translate, and they need to be translated! Needs correction: utf8txt2srt3all.py
2. Add English subtitles, but need to remove three lines: space line, serial number line, and timeline line!
cn5en6utf8txt2srt3all.py
# coding=utf-8
import os
# Get the current directory
path = os.getcwd()
# View all files in the current directory files
= os.listdir(path)
# Traverse all files
for file in files:
# Determine whether the file is a txt file
if file.endswith('.txt'):
# Construct a new file name
#new_file = file.replace('.txt', '.json')
#new_file = file.replace('.txt', '.srt')
new_file = file.replace('.txt', '.cn.srt')
#cn+en_file = file.replace('.txt', ' .cn+en.srt')
cn3en_file = file.replace('.txt', '.cn+en.srt')
#en4cn_file = file.replace('.txt', '.en+cn.srt')
orig_srt_en_file = file.replace('.txt', '.srt')
# Rename the file
#os.rename(os.path.join(path, file), os.path.join(path,new_file))
#f2=open(new_file,"wb")
f2 = open(new_file, "w", encoding="UTF-8")
f3 = open(cn3en_file, "w", encoding="UTF-8")
temp = 1
xuhao = 1;
with open(file, "r", encoding="UTF-8") as f:
lines = f.readlines()
#print(lines)
#with open(orig_srt_en_file, "r", encoding="UTF-8") as f3_en:
# lines_en = f3_en.readlines()
f3_en = open(orig_srt_en_file, "r", encoding="UTF-8")
#print(lines_en)
for line in lines:
#for line_en in lines_en:
#for line, line_en in lines, lines_en:
#line = f.readline()
line_en = f3_en.readline()
# 2023/8/11 9:08 Serial number arrangement of SRT after Google translation
if temp == 1:
f2.write(str(xuhao))
f2.write(str('\n'))
f3.write(str(xuhao))
f3.write(str('\n'))
#temp =0
temp=2
#f3.write(str(xuhao))
#f3.write(str('\n'))
#temp=0
else:
# 2023/8/11 9:09 Found the space of SRT
if len( line) == 1:
temp=1
xuhao = xuhao+1
f2.write(line)
# 2023/8/11 9:20 Chinese and English SRT subtitles must also be written!
f3.write(line)
# 2023/8/11 9:34 Chinese and English subtitles timeline after removing blank lines
if temp == 2:
temp = 3
#else if temp == 2:
elif temp == 3:
f3.write(line_en)
f2. close()
f3.close()
4. The third method:
English first, Chinese second!
J:\! ! ! ! Document organization 20230625\en2cn\20230811 Use python3 to batch merge Chinese and English subtitles in SRT format under WIN11\utf8i TXT\en6cn5utf8txt2srt3all.py
# coding=utf-8
import os
# Get the current directory
path = os.getcwd()
# View all files in the current directory files
= os.listdir(path)
# Traverse all files
for file in files:
# Determine whether the file is a txt file
if file.endswith('.txt'):
# Construct a new file name
#new_file = file.replace('.txt', '.json')
#new_file = file.replace('.txt', '.srt')
new_file = file.replace('.txt', '.cn.srt')
#cn+en_file = file.replace('.txt', ' .cn+en.srt')
cn3en_file = file.replace('.txt', '.cn+en.srt')
en4cn_file = file.replace('.txt', '.en+cn.srt')
#en4cn_file = file.replace('.txt', '.en+cn.srt')
orig_srt_en_file = file.replace('.txt', '.srt')
# Rename file
#os.rename(os.path.join(path, file), os.path.join(path, new_file))
#f2=open(new_file,"wb")
f2 = open(new_file, "w", encoding="UTF-8")
f3 = open(cn3en_file, "w", encoding="UTF-8")
f4 = open(en4cn_file, "w", encoding="UTF-8")
temp = 1
xuhao = 1;
with open(file, "r", encoding="UTF-8") as f:
lines = f.readlines()
#print(lines)
#with open(orig_srt_en_file, "r", encoding="UTF-8") as f3_en:
# lines_en = f3_en.readlines()
f3_en = open(orig_srt_en_file, "r", encoding="UTF-8")
#print(lines_en)
for line in lines:
#for line_en in lines_en:
#for line, line_en in lines, lines_en:
#line = f.readline()
line_en = f3_en.readline()
f4.write(line_en)
# 2023/8/11 9:08 Serial number arrangement of SRT after google translation
if temp == 1:
f2.write(str(xuhao))
f2.write(str ('\n'))
f3.write(str(xuhao))
f3.write(str('\n'))
#f4.write(str(xuhao))
#f4.write(str('\n') )
#temp=0
temp=2
#f3.write(str(xuhao))
#f3.write(str('\n'))
#temp=0
else:
# 2023/8/11 9:09 Found SRT spaces
if len(line) == 1:
temp=1
xuhao = xuhao+1
f2.write(line)
# 2023/8/11 9:20 Chinese and English SRT subtitles must also be written!
f3.write(line)
# 2023/8/11 9:34 Chinese and English subtitles timeline after removing blank lines
if temp == 2:
temp = 3
#else if temp == 2:
elif temp == 3:
f3. write(line_en)
f4.write(line)
f2.close()
f3.close()