Python will save all txt files as utf-8 format

Today I encountered the problem of needing to convert the txt save format. If the file is rarely saved, you can use the "Notepad"-save as function to save, or use notepad++-encoding-to convert to xx encoding, but this method is not available when there are many files got it.

I checked the bat script before. I am not familiar with the bat script and the syntax is difficult to understand, so I chose the python script to implement it.

The following script automatically obtains the file encoding format, and then converts it into UTF-8 format and saves it.

import os
#files = os.listdir(".")#获取当前目录下的文件
from chardet.universaldetector import UniversalDetector


def get_filelist(path):
    Filelist = []
    for home, dirs, files in os.walk(path):
        for filename in files:
            # 文件名列表,包含完整路径
            if ".txt" in filename:
                Filelist.append(os.path.join(home, filename))
            # # 文件名列表,只包含文件名
            # Filelist.append( filename)

    return Filelist


def get_encode_info(file):
 with open(file, 'rb') as f:
     detector = UniversalDetector()
     for line in f.readlines():
         detector.feed(line)
         if detector.done:
             break
     detector.close()
     return detector.result['encoding']
 
def read_file(file):
    with open(file, 'rb') as f:
        return f.read()
 
def write_file(content, file):
    with open(file, 'wb') as f:
        f.write(content)
 
def convert_encode2utf8(file, original_encode, des_encode):
    file_content = read_file(file)
    file_decode = file_content.decode(original_encode,'ignore')
    file_encode = file_decode.encode(des_encode)
    write_file(file_encode, file)
 
if __name__ == "__main__":
    filePath = './'
    Filelist = get_filelist(filePath)
    for filename in Filelist:
        file_content = read_file(filename)
        encode_info = get_encode_info(filename)
        if encode_info != 'utf-8':
            convert_encode2utf8(filename, encode_info, 'utf-8')
        encode_info = get_encode_info(filename)
        print(encode_info)

 

Guess you like

Origin blog.csdn.net/juluwangriyue/article/details/109053440