python : 新概念英语 课文转为html

txt2htm.py
# -*- coding: utf-8 -*-
import os, sys
import glob

if len(sys.argv) ==2:
    pattern = sys.argv[1]
else:
    print 'usage: txt2htm.py *.txt '
    print 'generate ?????.htm '
    sys.exit(1)

for f1 in glob.glob(pattern):
    print f1
    fn,ext = os.path.splitext(f1)
    if ext != '.txt':
        print 'Error: %s is not txt file ' % f1
        sys.exit(4)
    
    headline ="""<!DOCTYPE html>
<html>
  <head>
  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
    <title> 新概念英语 %s </title>
  </head>
<body>
""" % (fn)
    audio ="""
  <audio controls="controls"> 
      <source src="./%s.mp3" type="audio/mp3" />
  </audio>
""" % (fn)
    
    fp1 = open(f1,'r')
    f2  = fn +'.htm'
    fp2 = open(f2,'w')
    fp2.write(headline)
    ln =0
    for line in fp1:
        if len(line.strip()) ==0:
            continue
        ln +=1
        if ln ==1:
            alist = line.strip().split(' ',1)
            aline = '<h3>'+alist[1]+'</h3>'
            fp2.write(aline)
        else:
            if line.startswith("对应音频"):
                fp2.write(audio)
            elif line.startswith("New Word"):
                aline = '<hr>\n<br>'+line
                fp2.write(aline)
            elif line.startswith("New word"):
                aline = '<hr>\n<br>'+line
                fp2.write(aline)
            elif line.startswith("Notes on"):
                aline = '<hr>\n<br>'+line
                fp2.write(aline)
            elif line.startswith("参考译文"):
                aline = '<hr>\n<br>'+line
                fp2.write(aline)
            elif line.startswith("在线收听地址"):
                break
            elif line.startswith("document.onclick"):
                break
            else:
                aline = '<br>'+line
                fp2.write(aline)
    #
    fp2.write("</p>\n</body>\n</html>\n")
    fp1.close()
    fp2.close()
#

猜你喜欢

转载自belldeep.iteye.com/blog/2380010
今日推荐