python将当所有txt文件另存为utf-8格式

今天遇到需要转换txt保存格式的问题,如果文件很少可以使用“记事本”-另存为功能保存,也可以使用notepad++--编码--转为xx编码,但是文件很多时这种方法就不可取了。

之前查了一下bat脚本,对bat脚本不熟,而且语法很难懂,所以就选择python脚本实现了。

下面的脚本自动获取文件编码格式,然后转化成UTF-8格式保存。

import os
#files = os.listdir(".")#获取当前目录下的文件
from chardet.universaldetector import UniversalDetector


def get_filelist(path):
    Filelist = []
    for home, dirs, files in os.walk(path):
        for filename in files:
            # 文件名列表,包含完整路径
            if ".txt" in filename:
                Filelist.append(os.path.join(home, filename))
            # # 文件名列表,只包含文件名
            # Filelist.append( filename)

    return Filelist


def get_encode_info(file):
 with open(file, 'rb') as f:
     detector = UniversalDetector()
     for line in f.readlines():
         detector.feed(line)
         if detector.done:
             break
     detector.close()
     return detector.result['encoding']
 
def read_file(file):
    with open(file, 'rb') as f:
        return f.read()
 
def write_file(content, file):
    with open(file, 'wb') as f:
        f.write(content)
 
def convert_encode2utf8(file, original_encode, des_encode):
    file_content = read_file(file)
    file_decode = file_content.decode(original_encode,'ignore')
    file_encode = file_decode.encode(des_encode)
    write_file(file_encode, file)
 
if __name__ == "__main__":
    filePath = './'
    Filelist = get_filelist(filePath)
    for filename in Filelist:
        file_content = read_file(filename)
        encode_info = get_encode_info(filename)
        if encode_info != 'utf-8':
            convert_encode2utf8(filename, encode_info, 'utf-8')
        encode_info = get_encode_info(filename)
        print(encode_info)

猜你喜欢

转载自blog.csdn.net/juluwangriyue/article/details/109053440
今日推荐