How to use Python for batch file organization

1. Preparation

For experimentation, we use the code to generate 200 txt files, the code is as follows.

for i in range(0, 200):
    file_name = f'file_{i}.txt'
    f = open(f'./file/{file_name}', mode='w')
    f.close()

operation result:

2. Make a file list of excel

1. Idea

Get the file name, and write the serial number and file name to excel.

2. openpyxl installation

This article uses the openpyxl library for excel operations and pip for installation.

pip install -i https://pypi.tuna.tsinghua.edu.cn/simple openpyxl

3. Code

from openpyxl import load_workbook
import os

# 获取file路径所有txt文件
def find_txt(path):
    filenames = []
    filename_listdir = os.listdir(path)
    for filename in filename_listdir:
        if filename.find('txt') != -1:
            filenames.append(filename)
    return filenames

# 生成文件清单
def add_data(excel_path, filenames):
    # 判断excel文件是否存在
    if os.path.exists(excel_path) is False:
				print(excel_path + ' 文件不存在，请重试')
				exit()
    excel_file = load_workbook(excel_path)  # 打开excel文件
    excel_sheet = excel_file['Sheet1']  # 选择Sheet1
    # 增加表头
    excel_sheet.cell(row=1, column=1, value='序号')  # 序号
    excel_sheet.cell(row=1, column=2, value='文件名')  # 文件名
    # 添加文件名
    count = 1
    for i in filenames[0:]:
        count = count + 1
        excel_sheet.cell(row=count, column=1, value=count - 1)  # 序号
        excel_sheet.cell(row=count, column=2, value=i)  # 文件名
    excel_file.save(excel_path)

# 文件所在文件夹
file_path = './file'
# 【文件清单.xlsx】路径
excel_path = os.getcwd() + '/file/文件清单.xlsx'
filenames = find_txt(file_path)
print(filenames)
add_data(excel_path, filenames)
print('Success！')

operation result:

3. Batch renaming of files

1. Add serial numbers to file names in batches

In file sorting and statistics, it is often necessary to add a serial number to the file name. In the process of saving the file list above, we can see that the default sorting and saving of the script is to sort by the first digit. Here we can ensure the sorting by number by padding zeros .

1) Rename the function:

os.rename(name, new_name)

2) Code

import os

path = os.getcwd() + '\\file'
filenames = os.listdir(path)

a = 1
for filename in filenames:
    if filename.find('txt') != -1:
        old_dir = f'{path}\\{filename}'
        if a < 10:
            new_dir = f'{path}\\00{a}-{filename}'
        elif a < 100:
            new_dir = f'{path}\\0{a}-{filename}'
        else:
            new_dir = f'{path}\\{a}-{filename}'
        os.rename(old_dir, new_dir)
        a = a + 1

3) Running results

2. Batch rename the file name to the specified file name

After we make the file list, we can use excel to make convenient unified adjustments to file names (such as removing spaces, adding header and tail fields, etc.). We can make the modified file name in excel, and then rename the file name in batches.

1) Experimental objectives

As shown in the picture: we try to change the file name to a new file name (use excel to replace file with filename).

2) Code

from openpyxl import load_workbook
import os

# 获取file路径文件名
def find_txt(path):
    filenames = []
    filename_listdir = os.listdir(path)
    for filename in filename_listdir:
        if filename.find('txt') != -1:
            filenames.append(filename)
    return filenames

# 批量重命名
def change_file_name(file_path, excel_path, filenames):
    data = load_workbook(excel_path)
    sheet = data['Sheet1']
    for i in range(1, sheet.max_row + 1)[1:]:
        for filename in filenames:
            if filename == sheet.cell(i, 2).value:
                old_dir = os.path.join(file_path, filename)
                new_dir = os.path.join(file_path, sheet.cell(i, 3).value)
                os.rename(old_dir, new_dir)
            else:
                pass
# 文件所在文件夹
file_path = './file'
# 【文件清单.xlsx】路径
excel_path = os.getcwd() + '/file/文件清单.xlsx'
filenames = find_txt(file_path)
print(filenames)
change_file_name(file_path, excel_path, filenames)
print('Success！')

running result:

4. Bulk deletion of files

1. Experimental objectives

After filtering the file name in excel, delete the file whose deletion is listed as 1, and keep the file as 0 (as shown in the figure).

2. Code

from openpyxl import load_workbook
import os

# 获取file路径文件名
def find_txt(path):
    filenames = []
    filename_listdir = os.listdir(path)
    for filename in filename_listdir:
        if filename.find('txt') != -1:
            filenames.append(filename)
    return filenames

# 批量重命名
def change_file_name(file_path, excel_path, filenames):
    data = load_workbook(excel_path)
    sheet = data['Sheet1']
    for i in range(1, sheet.max_row + 1)[1:]:
        for filename in filenames:
            if filename == sheet.cell(i, 2).value:
                file_dir = os.path.join(file_path, filename)
                delete_flag = sheet.cell(i, 3).value
                if delete_flag:
                    os.remove(file_dir)
            else:
                pass
# 文件所在文件夹
file_path = './file'
# 【文件清单.xlsx】路径
excel_path = os.getcwd() + '/file/文件清单.xlsx'
filenames = find_txt(file_path)
print(filenames)
change_file_name(file_path, excel_path, filenames)
print('Success！')

Running result: The tag file has been deleted.

How to use Python for batch file organization - Zhihu "Introduction" Batch file organization has always been a headache in daily work. Using Python for large-scale file organization can greatly improve work efficiency. Here are a few tips for organizing batch files. Difficulty: ⭐⭐ 1. Preparation In order to be used for experiments, we use code generation... https://zhuanlan.zhihu.com/p/441915312

1. Conversion between different character encodings

The default encoding of the windows system is GBK. If you send a character encoded with gbk on the windows system
to a mac computer, the default encoding of the mac is utf-8, and the text will be displayed in garbled characters. How to display this gbk text normally on mac?
encoding and decoding

1. The process of converting any encoding into unicode is called decoding

>>> s="Qingyun"
>>> s="Qingyun"#unicode format
>>> s.encode("utf-8") #Encode it into utf-8
b'\xe5\x8d\xbf \xe4\xba\x91'
2. Any encoding process that converts unicode into is called encoding

>>> s
'Qing Yun'
>>> s.encode("utf-8").decode("utf-8")#Convert utf-8 encoded characters into unicode 'Qing Yun'
3
.

>>> s
'Qingyun'
>>> s.encode("utf-8") #Encode it into utf-8
b'\xe5\x8d\xbf\xe4\xba\x91
#It will become bytes bytes Format, the bytes byte type is expressed in hexadecimal, two hexadecimal numbers like \xe5 represent a byte (because a hexadecimal number occupies 4 digits)
what exactly is the byte type

The byte type is actually a binary number, but for easy understanding, it is often expressed in hexadecimal.

# coding utf-8
import os
import chardet


# 获得所有java文件的路径,传入根目录路径
def find_all_file(path: str) -> str:
  for root, dirs, files in os.walk(path):
    for f in files:
      if f.endswith('.java'):
        fullname = os.path.join(root, f)
        yield fullname
      pass
    pass
  pass


# 判断是不是utf-8编码方式
def judge_coding(path: str) -> dict:
  with open('utf.txt', 'rb') as f:  # 删除就行
    utf = chardet.detect(f.read()) # 同上

  with open(path, 'rb') as f:
    c = chardet.detect(f.read())

  if c != utf:            # 改为 c != 'utf-8'
    return c


# 修改文件编码方式
def change_to_utf_file(path: str):
  for i in find_all_file(path):
    c = judge_coding(i)
    if c:
      change(i, c['encoding'])
      print("{} 编码方式已从{}改为 utf-8".format(i, c['encoding']))


def change(path: str, coding: str):
  with open(path, 'r', encoding=coding) as f:
    text = f.read()

  with open(path, 'w', encoding='utf-8') as f:
    f.write(text)


# 查看所有文件编码方式
def check(path: str):
  for i in find_all_file(path):
    with open(i, 'rb') as f:
      print(chardet.detect(f.read())['encoding'], ': ', i)


def main():
  my_path = 'C:\\WorkSpace'
  change_to_utf_file(my_path)
  # check(my_path)


if __name__ == '__main__':
  main()

import os
from chardet.universaldetector import UniversalDetector

def get_filelist(path):
    """
    获取路径下所有csv文件的路径列表
    """
    Filelist = []
    for home, dirs, files in os.walk(path):
        for filename in files:
            if ".csv" in filename:
                Filelist.append(os.path.join(home, filename))
    return Filelist

def read_file(file):
    """
    逐个读取文件的内容
    """
    with open(file, 'rb') as f:
        return f.read()

def get_encode_info(file):
    """
    逐个读取文件的编码方式
    """
    with open(file, 'rb') as f:
        detector = UniversalDetector()
        for line in f.readlines():
            detector.feed(line)
            if detector.done:
                break
        detector.close()
        return detector.result['encoding']

def convert_encode2utf8(file, original_encode, des_encode):
    """
    将文件的编码方式转换为utf-8，并写入原先的文件中。
    """    
    file_content = read_file(file)
    file_decode = file_content.decode(original_encode, 'ignore')
    file_encode = file_decode.encode(des_encode)
    with open(file, 'wb') as f:
        f.write(file_encode)

def read_and_convert(path):
    """
    读取文件并转换
    """
    Filelist = get_filelist(path=path)
    fileNum= 0
    for filename in Filelist:
        try:
            file_content = read_file(filename)
            encode_info = get_encode_info(filename)
            if encode_info != 'utf-8':
                fileNum +=1
                convert_encode2utf8(filename, encode_info, 'utf-8')
                print('成功转换 %s 个文件 %s '%(fileNum,filename))
        except BaseException:
            print(filename,'存在问题，请检查！')

def recheck_again(path):
    """
    再次判断文件是否为utf-8
    """    
    print('---------------------以下文件仍存在问题---------------------')
    Filelist = get_filelist(path)
    for filename in Filelist:
        encode_info_ch = get_encode_info(filename)
        if encode_info_ch != 'utf-8':
            print(filename,'的编码方式是：',encode_info_ch)

    print('--------------------------检查结束--------------------------')
if __name__ == "__main__":
    """
    输入文件路径
    """    
    path = './'
    read_and_convert(path)
    recheck_again(path)
    print('转换结束！')