SRT subtitle time stitching

import re
from mutagen.mp3 import MP3





def get_extention(my_fn):
    '''
    my_fn='E:\\0000\\hgf\\2018-1.2-14_105614.png'
    '''
    str_split=my_fn.split('\\')

    if '\\' in my_fn: #如果含有'\\'，则必有文件夹名,找到最后一个'\\'的索引
        n=0
        for i in my_fn:
            if i=='\\':
                dash_index=n
            n+=1
        mid0=my_fn[0:dash_index+1]
        mid12=my_fn[dash_index+1:]

        if '.' in mid12: #如果mid12含有'.',找到最后一个'.'的索引
            n=0
            for i in mid12:
                if i=='.':
                    spot_index=n
                n+=1
            mid1=mid12[0:spot_index]
            mid2=mid12[spot_index:]
        else:#没有'.'，则文件名无后缀名
            mid1=mid12
            mid2=''

    else:#没有'\\'，则只是文件名,找到最后一个'\\'的索引
        mid0=''
        mid12=my_fn

        if '.' in mid12: #如果mid12含有'.',找到最后一个'.'的索引
            n=0
            for i in mid12:
                #print(i)
                if i=='.':
                    spot_index=n
                n+=1
            mid1=mid12[0:spot_index]
            mid2=mid12[spot_index:]
        else:#没有'.'，则文件名无后缀名
            mid1=mid12
            mid2=''

    return(mid0,mid1,mid2)






class SrtConcator(object):
    '''
    # 根据转码后的MP3文件对SRT进行时长拼接，消除时间积累误差
    
    Usage:

    SrtList = [
    'D1T01 - Wuthering Heights.srt', 
    'D1T02 - Wuthering Heights.srt', 
    'D1T03 - Wuthering Heights.srt', 
    'D1T04 - Wuthering Heights.srt', 
    'D1T05 - Wuthering Heights.srt', 
    'D1T06 - Wuthering Heights.srt', 
    'D1T07 - Wuthering Heights.srt', 
    ]
    A = SrtConcator(SrtList=SrtList, ConvertedMp3Folder='mp3_converted\\')
    A.concat('out.srt')


    '''
    def __init__(self, SrtList, ConvertedMp3Folder):
        self.SrtList = SrtList
        self.ConvertedMp3Folder = ConvertedMp3Folder
        self.SRT_to_MP3 = []
        self.SRT_no_MP3 = []

        # 根据所提供的SrtList检查是否存在对应的MP3文件，查找不到则抛出异常
        for srt in SrtList:
            ttt = get_extention(srt)
            mp3 = f'{self.ConvertedMp3Folder}{ttt[1]}.mp3'
            print(mp3)
            if exists(mp3):
                self.SRT_to_MP3.append((srt, mp3))
            else: # 找不到对应的MP3
                self.SRT_no_MP3.append(srt)



    def floatseconds2srthms(self, floatseconds=361.123): #秒转化为时分秒 返回Unicode
        ### print [floatseconds2hms(floatseconds=361.123)] [u'00:06:01.123']
        if floatseconds/3600>=1: #若满一个小时
            hours=int(floatseconds/3600) #取整数部分 小时数
            yu_1=floatseconds%3600  #余下的秒数
            if yu_1/60>=1:  #余下的秒数若满一分钟
                minutes=int(yu_1/60) #取整数部分 分钟数
                floatseconds=yu_1%60
            else: #余下的秒数若不满一分钟
                minutes=0
                floatseconds=yu_1
        else: #未满一个小时
            hours=0
            if floatseconds/60>=1:  #若满一分钟
                minutes=int(floatseconds/60) #取整数部分 分钟数
                floatseconds=floatseconds%60
            else: #未满一分钟
                minutes=0
                floatseconds=floatseconds

        floatseconds_ = round(floatseconds,3)
        seconds = int(floatseconds_)
        milliseconds = int((floatseconds_ - seconds)*1000)

        return f'{hours:0=2}:{minutes:0=2}:{seconds:0=2},{milliseconds:0=3}'



    def srthms2floatseconds(self, srthms='02:03:20,235'):
        l=re.findall(r'([0-9]{2}):([0-9]{2}):([0-9]{2},[0-9]{3})', srthms)
        h = float(l[0][0])
        m = float(l[0][1])
        s = float(l[0][2].replace(',','.'))
        floatseconds = h*3600 + m*60 + s
        return floatseconds


    def createSRTdata(self, srt='01－A Puma at Large.srt'):
        with open(srt,'r', encoding='utf8') as f:
            print([srt])
            data_list = f.readlines()

        key_l = []
        n = 0
        for i in data_list:
            if '-->' in i:
                key_l.append(n)
            n+=1

        srt_data = []
        for i in key_l:
            key = data_list[i]
            content = data_list[i+1]
            #print(key)
            k = key.split(' --> ')
            time_t = (self.srthms2floatseconds(srthms=k[0]), self.srthms2floatseconds(srthms=k[1]))
            #print(k, time_t)
            #print(content)
            #print('==========')
            item = [time_t, content]
            #print(item)
            srt_data.append(item)
        return srt_data

    def concat(self, srtFileName):
        # 根据转码后的MP3文件对SRT进行时长拼接，消除积累误差
        mp3__length__srt = []
        for tt in self.SRT_to_MP3:
            mp3 = tt[1]
            #print([mp3])
            length = MP3(mp3).info.length
            srt = tt[0]
            #print([length])
            mp3__length__srt.append((mp3, length, srt))

        #print(mp3__length__srt)

        srt_data_new = []
        n = 0
        duration = 0.000
        for ttt in mp3__length__srt:
            print(duration, n)
            mp3 = ttt[0]
            length = ttt[1]
            srt = ttt[2]
            print([srt])
            srt_data = self.createSRTdata(srt)
            print(srt_data)
            for j in srt_data:
                #print(j)
                new_t = (j[0][0]+duration, j[0][1]+duration)
                new_t_srt = (self.floatseconds2srthms(floatseconds=new_t[0]), self.floatseconds2srthms(floatseconds=new_t[1]))#
                k = [new_t_srt, j[1]]
                #print(k,'\n')
                srt_data_new.append(k)
            
            print(f'== done ==>{srt}\n\n\n')
            
            duration = duration + length
            n += 1
        print(srt_data_new)


        f = open(srtFileName, 'w', encoding='utf8')
        n = 1
        for i in srt_data_new:
            #print(i)
            #print(i[0][1])
            start = i[0][0]
            end = i[0][1]
            f.write(f'{n}\n')
            f.write(f'{start} --> {end}\n')
            f.write(i[1])
            f.write('\n')
            n += 1
        f.close()
SRT subtitle time stitching

Guess you like