[Python] convert the file format encoding

Temporary needs, need to put two years csv file format conversion, because utf-8 Excel file open with garbled. . . So think of batch processing options:

# -*- encoding: utf-8 -*-
"""
fc_test.py
Created on 2020/3/9 0009 下午 5:06
@author: LHX
"""
import os
import sys
import codecs
import chardet

# 将路径下面的所有文件,从原来的格式变为UTF-8的格式
def file_convert(path1,file_name,file, in_code="GBK", out_code="UTF-8"):
    """
该程序用于将目录下的文件从指定格式转换到指定格式,默认的是GBK转到UTF-8
需要手动建立输出目录(输入目录_convert)
    """
    out_path = path1+'_convert'
    print("==111",out_path)
    try:
        with codecs.open(file_name, 'r', in_code) as f_in:
            new_content = f_in.read()
            f_out = codecs.open(os.path.join(out_path,file), 'w', out_code)
            f_out.write(new_content)
            f_out.close
    except IOError as err:
        print("I/O error: {0}".format(err))
import os

#根据输入目录循环所有子目录&文件
def list_folders_files(path1):
    lsdir = os.listdir(path1)
    dirs = [i for i in lsdir if os.path.isdir(os.path.join(path1, i))]
    if dirs:
        for i in dirs:
            list_folders_files(os.path.join(path1, i))
    files = [i for i in lsdir if os.path.isfile(os.path.join(path1,i))]
    # for f in files:
    #    print ("==",os.path.join(path, f))
    for f in files:
        file_name =os.path.join(path1,f)
        with open(file_name, "rb") as f_in:
            data = f_in.read()
            code_type = chardet.detect(data)['encoding']
            file_convert(path1,file_name,f, code_type, 'UTF-8')

#设置输入目录并执行程序
path = 'E:\\tmp\\2018_1\\'
list_folders_files(path)
Published 118 original articles · won praise 25 · Views 150,000 +

Guess you like

Origin blog.csdn.net/lhxsir/article/details/104758732