When downloading movie subtitles, it often appears that a subtitle file is divided into cd1.srt and cd2.srt, but the movie is a whole piece of embarrassment. In order to solve this problem, I wrote the following code to merge multiple such files
Read subtitle file
def read_srt(path):
content = ""
with open(path) as f:
content = f.read()
return content
small test
content = read_srt('1.srt')
print(content)
content = read_srt('2.srt')
print(content)
You can see that there are two points to merge subtitles, one is that the content needs to be merged, and the other is that the serial number needs to be aligned, because the serial number in 2.srt also starts from 1.
Subtitle split
def get_sequences(content):
sequences = content.split('\n\n')
sequences = [sequence.split('\n') for sequence in sequences]
# 去除每一句空值
sequences = [list(filter(None, sequence)) for sequence in sequences]
# 去除整体空值
return list(filter(None, sequences))
small test
sequences = get_sequences(content)
sequences
Subtitle modification
def change_sequences(sequences, start_index):
for sequence in sequences:
sequence[0] = str(start_index)
start_index += 1
Generate new subtitles
def save_srt(names):
new_content = []
start_index = 1
for name in names:
content = read_srt(name)
sequences = get_sequences(content)
change_sequences(sequences, start_index)
start_index = len(sequences) + 1
new_content += sequences
new_content = ['\n'.join(word) for word in new_content]
new_content = '\n\n'.join(new_content)
print(new_content)
with open('result.srt', 'a') as f:
f.write(new_content)
save_srt(['1.srt', '2.srt'])