python操作csv文件基础

本篇文章简单介绍一下python对csv文件的常见操作,由于代码注释写的很清楚,所以文字解释相对较少。

第一种:利用python基本模块操作

'''
利用python普通模块是实现读写csv文件
步骤1:取出A文件的第一行,去除空格,换行符等符号
步骤2:将A文件的第一行保存到一个列表中,然后写入到B文件中
步骤3:依次循环A文件后面的各行,然后写入到B文件中
'''
import sys
input_file='E:\\studytest\\checking.csv' 
output_file='E:\\studytest\\checking111.csv'

with open(input_file,'r',newline='') as  filereader:
    with open(output_file,'w',newline='') as filewrite:
        header=filereader.readline()
        header=header.strip()
        header_list=header.split(',')
        print(header_list)
        filewrite.write(','.join(map(str,header_list))+'\n')
        for row in filereader:
            row=row.strip()
            row_list=row.split(',')
            print(row_list)
            filewrite.write(','.join(map(str,row_list))+'\n')

第二种:利用python中csv模块操作:

import sys
import csv
input_file='E:\\studytest\\checking.csv'
output_file='E:\\studytest\\checking11122.csv'
# input_file=sys.argv[1]
# output_file=sys.argv[2]

with open(input_file,'r',newline='') as csv_in_file:
    with open(output_file,'w',newline='') as csv_out_file:
        filereader=csv.reader(csv_in_file,delimiter=',')#将csv文件的每行以列表的形式返回
        filewriter=csv.writer(csv_out_file,delimiter=',')#创建一个写入对象,delimiter是默认分隔符
        for row_list in filereader:
            print(row_list)
            filewriter.writerow(row_list)

第三种:利用python中csv模块筛选特定的行

'''
利用python中csv模块读写文件(筛选特定的行)
    固定格式:
        for row in filereader:
            ***if value in row meets some business tule or set of rules:***
                do something
            else:
                do something else
'''
import sys
import csv
input_file='E:\\studytest\\data.csv'
output_file='E:\\studytest\\data111.csv'
# input_file=sys.argv[1]
# output_file=sys.argv[2]

with open(input_file,'r',newline='') as csv_in_file:
    with open(output_file,'w',newline='') as csv_out_file:
        filereader=csv.reader(csv_in_file)#将csv文件的每行以列表的形式返回
        filewriter=csv.writer(csv_out_file)#创建一个写入对象,delimiter是默认分隔符
        #先处理第一行
        header=next(filereader)
        filewriter.writerow(header)
        #再处理剩余所有行
        for row_list in filereader:
            supplier=str(row_list[0]).strip()
            cost=str(row_list[3]).strip("$").replace(',','')
            if supplier == 'Supplier Z' or float(cost)>600.00:
                filewriter.writerow(row_list)

第四种:利用python中csv模块筛选特定的列:

'''
利用python中csv模块读写文件(筛选特定的列)
'''
import sys
import csv
input_file='E:\\studytest\\data.csv'
output_file='E:\\studytest\\data_column.csv'
# input_file=sys.argv[1]
# output_file=sys.argv[2]
'''第一种:根据列索引值选出特定的列'''
my_columns = [0,3]#限定我们需要的特定列
with open(input_file,'r',newline='') as csv_in_file:
    with open(output_file,'w',newline='') as csv_out_file:
        filereader=csv.reader(csv_in_file)#将csv文件的每行以列表的形式返回
        filewriter=csv.writer(csv_out_file)#创建一个写入对象,delimiter是默认分隔符
        #处理所有行
        for row_list in filereader:#循环每一行
            row_list_output = []
            for index_value in my_columns:
                row_list_output.append(row_list[index_value])#取出特定列的数据,并封装成列表
            filewriter.writerow(row_list_output)#写入文件
'''
利用python中csv模块读写文件(筛选特定的列)
'''
import sys
import csv
input_file='E:\\studytest\\data.csv'
output_file='E:\\studytest\\data_column.csv'
# input_file=sys.argv[1]
# output_file=sys.argv[2]
'''第二种:根据列标题选出特定的列
        注:方法和第一种的区别在在于,先根据列标题找出列索引,然后在运用第一种方法读写文件,好处是列标题是固定的
'''
my_columns = ['Invoice Number','Purchase Date']
my_columns_index = []
with open(input_file,'r',newline='') as csv_in_file:
    with open(output_file,'w',newline='') as csv_out_file:
        filereader=csv.reader(csv_in_file)#将csv文件的每行以列表的形式返回
        filewriter=csv.writer(csv_out_file)#创建一个写入对象,delimiter是默认分隔符
        header=next(filereader,None)
        for index_value in range(len(header)):
            if header[index_value] in my_columns:
                my_columns_index.append(index_value)
        filewriter.writerow(my_columns)
        for row_list in filereader:
            row_list_output = []
            for index_value in my_columns_index:
                row_list_output.append(row_list[index_value])
        filewriter.writerow(row_list_output)

第五种:选取连续的行

import sys
import csv
'''
选取连续的行
'''
input_file='E:\\studytest\\data.csv'
output_file='E:\\studytest\\data_other1.csv'
row_counter = 0
with open(input_file,'r',newline='') as csv_in_file:
    with open(output_file,'w',newline='') as csv_out_file:
        filereader=csv.reader(csv_in_file)
        filewriter=csv.writer(csv_out_file)
        for row in filereader:
            #选取3-15行
            if row_counter >= 3 and row_counter <= 15:
                filewriter.writerow([value.strip() for value in row])
            row_counter += 1

第六种:添加标题列:

import sys
import csv
'''
添加标题行
'''
input_file='E:\\studytest\\data_notitle.csv'
output_file='E:\\studytest\\data_other2.csv'
with open(input_file,'r',newline='') as csv_in_file:
    with open(output_file,'w',newline='') as csv_out_file:
        filereader=csv.reader(csv_in_file)
        filewriter=csv.writer(csv_out_file)
        header_list = ['Title1','Title2','Title3','Title4','Title5']
        filewriter.writerow(header_list)
        for row in filereader:
            filewriter.writerow(row)

第七种:对多个csv文件操作:

import sys
import csv
'''
读取多个CSV文件
1、文件记数和行列记数
'''
import glob
import os
#存放多个文件的文件夹目录,文件命令格式相似
# input_path='E:\\studytest\\multile'
# file_counter = 0
# #循环文件夹中所有文件
# for input_file in  glob.glob(os.path.join(input_path,'data*')):
#     row_counter = 1
#     with open(input_file,'r',newline='') as csv_in_file:
#         filereader = csv.reader(csv_in_file)
#         header=next(filereader,None)
#         for row in filereader:
#             row_counter += 1
#     print('{0!s}:\t{1:d} rows \t{2:d} columns'.format(os.path.basename(input_file),row_counter,len(header)))
#     file_counter += 1;
# print('Number of files:{0:d}'.format(file_counter))
'''
2、将多个文件的数据放到一个文件中
'''
# input_path='E:\\studytest\\multile'
# output_file='E:\\studytest\\multile\\count_data.csv'
# first_file = True
# for input_file in glob.glob(os.path.join(input_path,'data*')):
#     print(os.path.basename(input_file))
#     with open(input_file,'r',newline='') as csv_in_file:
#         with open(output_file,'a',newline='') as  csv_out_file:
#             filereader = csv.reader(csv_in_file)
#             filewriter = csv.writer(csv_out_file)
#             #如果是第一次,就全部添加
#             if first_file:
#                 for row in filereader:
#                     filewriter.writerow(row)
#                 first_file = False
#             #如果不是第一次,只添加除了标题外的其他行
#             else:
#                 header = next(filereader,None)
#                 for row in filereader:
#                     filewriter.writerow(row)

'''
3、计算每个文件中值的总和和平均数
'''
input_path='E:\\studytest\\multile'
output_file='E:\\studytest\\multile\\count_average_data.csv'
#构造结果文件的标题列
output_header_list=['file_name','total_sales','average_sales']
csv_out_file=open(output_file,'a',newline='')
filewriter=csv.writer(csv_out_file)
filewriter.writerow(output_header_list)
#循环文件夹中每个文件
for input_file in glob.glob(os.path.join(input_path,'data*')):
    with open(input_file,'r',newline='') as csv_in_file:
        #读取文件
        filereader = csv.reader(csv_in_file)
        #构造插入数据格式['file_name','total_sales','average_sales']
        output_list=[]
        #插入文件名
        output_list.append(os.path.basename(input_file))
        header = next(filereader)
        total_sales = 0.0
        number_of_sales = 0.0
        #循环除标题行的每一行
        for row in filereader:
            #获得总销售额
            sale_count = row[3]
            total_sales += float(str(sale_count).strip('$').replace(',',''))
            #获取销售条数
            number_of_sales += 1
        #获取平均销售额
        average_sales = '{0:.2f}'.format(total_sales / number_of_sales)
        #插入总销售额
        output_list.append(total_sales)
        #插入平均销售额
        output_list.append(average_sales)
        #插入结果问价
        filewriter.writerow(output_list)
csv_out_file.close()
由于是基础,对比图片就不贴出来了,大家使用代码的时候,只需要将涉及到文件名和路径的参数改一下就行了。

猜你喜欢

转载自blog.csdn.net/zly412934578/article/details/80844674