python进阶宝典7-文件处理

import os
os.path.join('usr','bin','spam')
#目录拼接并输出。不同系统上输出不同，取决于文件系统表示。windows上为'usr\\bin\\spam', linux上为'/usr/bin/spam'

## 目录与文件路径
myfiles = ['acc.txt','bbb.csv','cedg.docx']
for filename in myfiles:
print(os.path.join('C:\\users\\aswttt',filename))

os.chdir('C:\\Windows\\System32') # 更改当前目录
os.getcwd() # 显示当前目录
os.makedirs('d:\\test\\l1\\l2') # 创建目录
os.path.abspath('.') # 相对路径转绝对路径
os.path.isabs('.') # 判断是否绝对路径
os.path.relpath('c:\\windows','c:\\test\\l1') #返回从参数2路径到参数1路径转换的相对路径字符串 '..\\..\\windows'

lpath = 'c:\\windows\\system32\\calc.exe'
os.path.basename(lpath) # 文件名
os.path.dirname(lpath) # 目录名
os.path.split(lpath) # 返回包含目录名和文件名两个字符串的元组
lpath.split(os.path.sep) # 返回一个对路径进行分割的字符串的列表
os.path.exists(lpath) # 目录或文件是否存在
os.path.isfile(lpath) # 判断文件有效性
os.path.isdir(os.path.dirname(lpath)) # 判断目录有效性

os.path.getsize(lpath) # 返回文件的字节数大小
os.listdir('c:\\temp') # list目录中的子目录和文件名
totalSize = 0
for filename in os.listdir('c:\\temp'):
totalSize += os.path.getsize(os.path.join('c:\\temp',filename))
print(totalSize)

## 文件读写
#1）调用open()函数，返回一个File对象
#2）调用File对象的read()或write()方法
#3）调用File对象的close()方法，关闭该文件

fpath = 'c:\\temp\\1.txt'
rfile = open(fpath) # 打开文件，默认为只读r
rtxt = rfile.read() # 读取文件内容，赋给一个字符串
print(rtxt)
rlines = rfile.readlines() # 读取文件内容，赋给一个字符串列表，每个字符串是文本中的每行
print(rlines)

wfile = open('c:\\temp\\2.txt','w') # 写模式打开文件，直接清空文件后覆写
wfile.write('Hello world!\n')
wfile.close()
wfile = open('c:\\temp\\2.txt','a') # 追加模式打开
wfile.write('Append mode open')
wfile.close()
wfile = open('c:\\temp\\2.txt','r')
content = wfile.read()
wfile.close()
print(content)

## 用shelve模块保存变量，生成到二进制的shelf文件
import shelve
shelfFile = shelve.open('mydata') # 生成mydata.dat文件
cats = ['zophis','pooka','simon']
bats = ['test','AA']
shelfFile['cats'] = cats # 生成mydata.dir文件
shelfFile['bats'] = bats
shelfFile.close() # 生产mydata.bak文件

shelfFile = shelve.open('mydata') # 打开shelf文件使用保存的变量
type(shelfFile) # 显示数据类别 <class 'shelve.DbfilenameShelf'>
print(shelfFile['cats']) # 返回一个key的值
print(dict(shelfFile)) # 文件内容转换成字典返回
list(shelfFile.keys()) # 显示保存在shelf文件中所有变量的keys
list(shelfFile.values()) # 显示保存在shelf文件中所有变量的values
shelfFile.close()

## 使用 pprint.pformat()保存变量到py文件并载入使用
import pprint
cats = [{'name':'Sosdf','desc':'chubby'},{'name':'Poka','desc':'fluffy'}]
pprint.pformat(cats)
fileobj = open('myppdata.py','w')
fileobj.write('cats = '+pprint.pformat(cats)+'\n')
fileobj.close()

import myppdata # 装载之前保存变量的py文件
myppdata.cats
myppdata.cats[0]
myppdata.cats[0]['name']

## shutil与文件复制删除
import shutil,os
shutil.copy('c:\\temp\\1.txt','c:\\temp\\test')
shutil.copy('c:\\temp\\1.txt','c:\\temp\\test\\n1.txt')
shutil.copytree('c:\\temp\\test\\intest','c:\\temp\\test\\intest2')
shutil.move('c:\\temp\\1.txt','c:\\temp\\test\\intest2') # 移动文件到目的文件夹（不存在时移动成一个新文件）
## 上条move及以下三条均为危险命令，要格外当心
os.unlink('d:\\t_path\\1.txt') # 彻底删除文件
os.rmdir('d:\\t_path') # 彻底删除空目录
#shutil.rmtree('d:\\t_path') # 彻底删除目录及其所有内容

for filename in os.listdir():
if filename.endswith('.rxt'):
#os.unlink(filename)
print(filename)

## 使用 send2trash 模块进行安全删除 - 放到回收站
import send2trash
send2trash.send2trash('c:\\temp\\test\\n1.txt')
send2trash.send2trash('c:\\temp\\test\\intest2')

## 遍历目录树 os.walk()
# os.walk()在循环的每次迭代中，返回3个值 - 当前文件夹名称串、当前文件夹中子文件夹的字符串列表、当前文件夹中文件的字符串列表
# 这里的当前文件夹时指walk()循环当前迭代的文件夹，程序本身的工作目录不会因walk而改变
# 可以使用自定义的代码，来替代以下代码中的print函数调用
import os
for folderName,subfolders,filenames in os.walk('c:\\temp\\test'):
print('The current folder is ' + folderName)
for subfolder in subfolders:
print('SUBFOLDER OF ' + folderName + ': ' + subfolder )
for filename in filenames:
print('FILE INSIDE ' + folderName + '： ' + filename )
print('')
print('')

## zipfile模块处理压缩文件
import zipfile,os
currentpwd = os.getcwd()
os.chdir('C:\\temp\\test')
exampleZip = zipfile.ZipFile('example.zip','a') # 以追加模式打开压缩文件
exampleZip = zipfile.ZipFile('example.zip','w') # 以覆盖写入模式创建压缩文件
exampleZip.write('1.txt',compress_type=zipfile.ZIP_DEFLATED) # 添加文件到压缩文件
exampleZip.close()
exampleZip = zipfile.ZipFile('example.zip') # 默认以只读方式打开压缩文件，不存在则报错
spamInfo = exampleZip.getinfo('1.txt') # 获取文件信息
spamInfo.file_size # 显示文件大小
spamInfo.compress_size # 显示压缩后文件大小
'Compressed range is %s x !' % round(spamInfo.file_size / spamInfo.compress_size ,2) #压缩倍数
exampleZip.extract('1.txt') # 解压缩文件到当前目录
exampleZip.extract('1.txt','ext_dir') # 解压缩文件到指定文件夹，如不存在则创建
exampleZip.extractall() # 解压缩全部文件
exampleZip.extractall('c:\\temp\\test\\2') # 解压缩全部文件到指定文件夹，如不存在则创建
exampleZip.close()
os.chdir(currentpwd)

############################################################################
########## 例子1：生成随机的测试试卷文件
###
#1）创建35份不同的测试试卷
#2）为每份试卷创建50个单选题，次序随机 random.shuffle()
#3）为每个问题提供一个正确答案和3个随机的错误答案，次序随机 random.sample()
#4）讲测试试卷及答案分别写到35个文本文件中

#step 1:将测试数据保存在一个字典中
import random
# The quiz data. Keys are states and values are their capitals.
capitals = {'Alabama':'Montgomery','Alaska':'Juneau','Arzona':'Phoenix','Arkansas':'Little Rock','Wyoming':'Cheyenne',
'California':'Sacramento','Colorado':'Denver','Connecticur':'Hartford','Delaware':'Dover','Florida':'Tallahassee',
'Georgia':'Atlanta','Hawaii':'Honolulu','Idaho':'Boise','Illinois':'Springfield','Indiana':'Indianapolis',
'Iowa':'Des Moines','Kansas':'Topeka','Kentucky':'Frankfort','Louisiana':'Baton Rouge','Maine':'Augusta',
'Maryland':'Annapolis','Massachusetts':'Boston','Michigan':'Lansing','Minnesota':'Saint Paul','Mississippi':'Jackson',
'Missouri':'Jefferson City','Montana':'Helena','Nebraska':'Lincoln','Nevada':'Carson City','New Hampshire':'Concord',
'New Jersey':'Trenton','New Mexico':'Santa Fe','New York':'Albany','North Carolina':'Raleigh','North Dakota':'Bismarck',
'Ohio':'Columbus','Oklahoma':'Oklahoma City','Oregon':'Salem','Pennsylvania':'Harrisburg','Rhode Island':'Providence',
'South Carolina':'Columbia','South Dakota':'Pierre','Tennessee':'Nashville','Texas':'Austin','Utah':'Salt Lake City',
'Vermont':'Montpelier','Virginia':'Richmond','Washington':'Olympia','West Virginia':'Charleston','Wisconsin':'Madison'
}
#step 2:创建测验文件，并打乱问题的次序
# Generate 35 quiz files
testNums = 35
for quizNum in range(testNums):
# Create the quiz and answer key files.
quizFile = open('c:\\temp\\test\\capitalsquiz%s.txt' % (quizNum + 1),'w')
answerKeyFile = open('c:\\temp\\test\\capitalsquiz_answers%s.txt' % (quizNum + 1),'w')
# Write out the head for the quiz
quizFile.write('Name:\n\nDate:\n\nPeriod:\n\n')
quizFile.write((' ' * 20) + 'State capitals quizNum (Form %s)' % (quizNum + 1))
quizFile.write('\n\n')
# Shuffle the order of the states
states = list(capitals.keys())
random.shuffle(states)
#step 3:创建答案选项
# Loop through all 50 states, making a question for each
qNums = 50
for questionNum in range(qNums):
# Get right and wrong answers.
correctAnswer = capitals[states[questionNum]] #从capitals中按序号取正确答案
wrongAnswers = list(capitals.values()) #列出所有值
del wrongAnswers[wrongAnswers.index(correctAnswer)] #删除正确答案
wrongAnswers = random.sample(wrongAnswers,3) #错误答案列表中随机取3个
answerOptions = wrongAnswers + [correctAnswer] #错误答案3个与正确答案组合，注意这里correctAnswer先转列表再拼接
random.shuffle(answerOptions) #进行答案组合随机排序
#step 4:将内容写入测试试卷和答案文件
# Write the question and the answer options to the quiz file.
quizFile.write('%s. What is the capital of %s?\n' % (questionNum + 1,states[questionNum]))
for i in range(4):
quizFile.write(' %s. %s\n' % ('ABCD'[i],answerOptions[i]))
quizFile.write('\n')
# Write the answer key to a file.
answerKeyFile.write('%s. %s\n' % (questionNum + 1,'ABCD'[answerOptions.index(correctAnswer)]))
quizFile.close()
answerKeyFile.close()

############################################################################
########## 例子2：批量文件改名
### 将当前工作目录中的所有文件名中的MM-DD-YYYY 改为 DD-MM-YYYY
#
# Renames filenames with American date format to European
import shutil,os,re
# Create a regex that matches files with the American date format.
datePattern = re.compile(r'''^(.*?) # all text before the date (1)
((0|1)?\d)- # one or two digits for the month (2(3))-
((0|1|2|3)?\d)- # one or two digits for the day (4(5))-
((19|20)\d\d) # four digits for the year (6(7))
(.*?)$ # all text after the date (8)
''',re.VERBOSE)

# Loop over the files in the working directory.
for amerFilename in os.listdir('.'):
mo = datePattern.search(amerFilename)
# Skip files without a date.
if mo == None:
continue
# Get the different parts of the filename.
beforePart = mo.group(1)
monthPart = mo.group(2)
dayPart = mo.group(4)
yearPart = mo.group(6)
afterPart = mo.group(8)
# Form the European-style filename.
euroFilename = beforePart + dayPart + '-' + monthPart + '-' + yearPart + afterPart
# Get the full,absolute file paths.
absWorkingDir = os.path.abspath('.')
amerFilename = os.path.join(absWorkingDir,amerFilename)
euroFilename = os.path.join(absWorkingDir,euroFilename)
# Rename the files.
print('Renaming %s to %s ...' % (amerFilename,euroFilename))
#shutil.move(amerFilename,euroFilename) # uncomment after testing

############################################################################
########## 例子3：将一个文件夹备份到一个zip文件, 带版本
###
# Copies an entire folder and its contents into a zip file whose filename increments
#
import zipfile,os
def backupToZip(folder):
# Backup the entire contents of folder into a ZIP file
folder = os.path.abspath(folder) # make sure folder is absolute
# Figure out the filename this code should use based on what files already exist.
number = 1
while True:
zipFilename = os.path.basename(folder) + '_' + str(number) + '.zip'
if not os.path.exists(zipFilename):
break
number = number + 1
# Create the ZIP file.
print('Creating %s...' % (zipFilename))
backupZip = zipfile.ZipFile(zipFilename,'w')
# Walk the entire folder tree and compress the files in each folder
for folderName,subfolders,filenames in os.walk(folder):
print('Adding files in %s...' % (folderName))
# Add the current folder to the ZIP file.
backupZip.write(folderName)
for filename in filenames:
newBase = os.path.basename(folder) + '_'
if filename.startswith(newBase) and filename.endswith('.zip'):
continue # don't backup the backup ZIP files
backupZip.write(os.path.join(folderName,filename))
backupZip.close()
print('Done.')

backupToZip('C:\\temp\\test')

python进阶宝典7-文件处理

猜你喜欢