Today I encountered the problem of needing to convert the txt save format. If the file is rarely saved, you can use the "Notepad"-save as function to save, or use notepad++-encoding-to convert to xx encoding, but this method is not available when there are many files got it.
I checked the bat script before. I am not familiar with the bat script and the syntax is difficult to understand, so I chose the python script to implement it.
The following script automatically obtains the file encoding format, and then converts it into UTF-8 format and saves it.
import os
#files = os.listdir(".")#获取当前目录下的文件
from chardet.universaldetector import UniversalDetector
def get_filelist(path):
Filelist = []
for home, dirs, files in os.walk(path):
for filename in files:
# 文件名列表,包含完整路径
if ".txt" in filename:
Filelist.append(os.path.join(home, filename))
# # 文件名列表,只包含文件名
# Filelist.append( filename)
return Filelist
def get_encode_info(file):
with open(file, 'rb') as f:
detector = UniversalDetector()
for line in f.readlines():
detector.feed(line)
if detector.done:
break
detector.close()
return detector.result['encoding']
def read_file(file):
with open(file, 'rb') as f:
return f.read()
def write_file(content, file):
with open(file, 'wb') as f:
f.write(content)
def convert_encode2utf8(file, original_encode, des_encode):
file_content = read_file(file)
file_decode = file_content.decode(original_encode,'ignore')
file_encode = file_decode.encode(des_encode)
write_file(file_encode, file)
if __name__ == "__main__":
filePath = './'
Filelist = get_filelist(filePath)
for filename in Filelist:
file_content = read_file(filename)
encode_info = get_encode_info(filename)
if encode_info != 'utf-8':
convert_encode2utf8(filename, encode_info, 'utf-8')
encode_info = get_encode_info(filename)
print(encode_info)