# -*- coding: utf-8 -*- import pandas as pd from pandas import Series,DataFrame import numpy as np ''' 读写文本格式的数据 read_cvs read_table ''' data=DataFrame(np.arange(16).reshape(4,4),index=list('abcd'),columns=['hhb','zjx','hcy','zjy']) print(data) data.to_csv('data/ex1.cvs') rdata=pd.read_csv('data/ex1.cvs') print(rdata) # Unnamed: 0 hhb zjx hcy zjy # 0 a 0 1 2 3 # 1 b 4 5 6 7 # 2 c 8 9 10 11 # 3 d 12 13 14 15 rdata=pd.read_table('data/ex1.cvs',sep=',') print(rdata) # Unnamed: 0 hhb zjx hcy zjy # 0 a 0 1 2 3 # 1 b 4 5 6 7 # 2 c 8 9 10 11 # 3 d 12 13 14 15 ''' 配置默认名字 ''' # data=DataFrame(np.random.randn(3,4)) # print(data) # # 0 1 2 3 # # 0 1.095597 0.454671 0.503149 -0.337012 # # 1 -0.688659 -1.455076 0.826556 0.823949 # # 2 1.122201 0.303618 -0.399119 0.979075 # # data.to_csv('data/ex2.cvs') rdata=pd.read_csv('data/ex2.cvs',header=None) print(rdata) # 0 1 2 3 # 0 -0.310421 -0.323209 0.996199 0.927549 # 1 -0.076534 -0.160730 -1.780651 -1.069414 # 2 -0.703372 -1.265776 -0.117108 -0.164619 #配置名字 rdata=pd.read_csv('data/ex2.cvs',names=['a','b','c','d']) print(rdata) # a b c d # 0 -0.310421 -0.323209 0.996199 aaa # 1 -0.076534 -0.160730 -1.780651 bbb # 2 -0.703372 -1.265776 -0.117108 ccc #让d列成为行索引 names=['a','b','c','行索引'] rdata=pd.read_csv('data/ex2.cvs',names=names,index_col='行索引') print(rdata) # a b c # 行索引 # aaa -0.310421 -0.323209 0.996199 # bbb -0.076534 -0.160730 -1.780651 # ccc -0.703372 -1.265776 -0.117108 ''' 解读程序化 ''' # data=DataFrame(np.arange(16).reshape(8,2),index=[['one','one','one','one','two','two','two','one'],['a','b','c','d','a','b','c','d']],columns=['key1','key2']) # # data.to_csv('data/ex3.cvs') # value1,value2,key1,key2 # one,a,0,1 # one,b,2,3 # one,c,4,5 # one,d,6,7 # two,a,8,9 # two,b,10,11 # two,c,12,13 # one,d,14,15 rdata=pd.read_csv('data/ex3.cvs',index_col=['value1','value2']) print(rdata) # key1 key2 # value1 value2 # one a 0 1 # b 2 3 # c 4 5 # d 6 7 # two a 8 9 # b 10 11 # c 12 13 # one d 14 15 #筛选阅读 rdata=pd.read_csv('data/ex4.cvs',skiprows=[0,2]) print(rdata) # value1 value2 key1 key2 # 0 one a 0 1 # 1 one b 2 3 # 2 one c 4 5 # 3 one d 6 7 # 4 two a 8 9 # 5 two b 10 11 # 6 two c 12 13 # 7 one d 14 15 data=pd.read_csv('data/ex5.cvs') print(data) # value1 value2 key1 key2 # 0 one NaN 0 1 # 1 one b 2 3 # 2 one c 4 Na #指定NaN值 sentinels={'key2':[3,'Na'],'value2':'b'} data=pd.read_csv('data/ex5.cvs',na_values=sentinels) print(data) # value1 value2 key1 key2 # 0 one NaN 0 1.0 # 1 one NaN 2 NaN # 2 one c 4 NaN ''' 逐块读取文件 ''' data=pd.read_csv('data/ex4.cvs',nrows=2,skiprows=[0,2]) print(data) # value1 value2 key1 key2 # 0 one a 0 1 # 1 one b 2 3 #将数据分成块 chunker=pd.read_csv('data/ex3.cvs',chunksize=2) print(chunker)#<pandas.io.parsers.TextFileReader object at 0x000000000B1B9F60> tot=Series([]) for piece in chunker: tot=tot.add(piece['value1'].value_counts(),fill_value=0) print(tot) # one 5.0 # two 3.0 # dtype: float64 print(tot[0]) ''' 输出文本格式,自定义分隔符 ''' data=pd.read_csv('data/ex3.cvs') print(data) import sys data.to_csv('data/ex6.cvs',sep='|') # |value1|value2|key1|key2 # 0|one|a|0|1 # 1|one|b|2|3 # 2|one|c|4|5 # 3|one|d|6|7 # 4|two|a|8|9 # 5|two|b|10|11 # 6|two|c|12|13 # 7|one|d|14|15 data=pd.read_csv('data/ex5.cvs') data.to_csv('data/ex7.cvs',na_rep='null')#将空字符串定义为null # ,value1,value2,key1,key2 # 0,one,null,0,1 # 1,one,b,2,3 # 2,one,c,4,Na #禁用索引 data.to_csv('data/ex8.cvs',index=False,header=False) # one,,0,1 # one,b,2,3 # one,c,4,Na #指定排列顺序 data.to_csv('data/ex9.cvs',columns=['key1','key2','value1','value2']) # ,key1,key2,value1,value2 # 0,0,1,one, # 1,2,3,one,b # 2,4,Na,one,c ''' json ''' import json obj='{"name":"hb","data":[1,2,3,4,5],"dict":[{"name":"aa"},{"name":"bb"}],"Na":1}' # result=json.dumps(obj) # print(result) data=json.loads(obj) print(data) dict=DataFrame(data['dict'],columns=['name']) print(dict) # name # 0 aa # 1 bb
python数据分析:pandas的读取和写入文件
猜你喜欢
转载自blog.csdn.net/qq_38788128/article/details/80702721
今日推荐
周排行