# -*- coding: utf-8 -*- import pandas as pd from pandas import Series,DataFrame import numpy as np ''' DataFrame是表格型的数据结构,它含有一组有序的列,每列是不同值类型 ''' #创建一个dataframed的表格 dict={'state':['a','a','a','b','b'],'year':[1998,1999,1998,1999,2000],'pop':[1.2,1.4,2.3,4.5,5.6]} frame=DataFrame(dict) print(frame) ''' state year pop 0 a 1998 1.2 1 a 1999 1.4 2 a 1998 2.3 3 b 1999 4.5 4 b 2000 5.6 ''' #指定列的顺序 data=DataFrame(dict,columns=['year','pop','state']) print(data) ''' year pop state 0 1998 1.2 a 1 1999 1.4 a 2 1998 2.3 a 3 1999 4.5 b 4 2000 5.6 b ''' #传入多余的列,会产生NaN data=DataFrame(dict,columns=['state','year','pop','debt'],index=['one','two','three','fore','five']) print(data) ''' state year pop debt one a 1998 1.2 NaN two a 1999 1.4 NaN three a 1998 2.3 NaN fore b 1999 4.5 NaN five b 2000 5.6 NaN ''' #获取表头 print(data.columns)#Index(['state', 'year', 'pop', 'debt'], dtype='object') #获取列值 print(data['state']) # one a # two a # three a # fore b # five b print(data.year) # one 1998 # two 1999 # three 1998 # fore 1999 # five 2000 #获取行值 print(data.ix['one']) # state a # year 1998 # pop 1.2 # debt NaN #给debt标量赋值 data['debt']=15.6 print(data) # state year pop debt # one a 1998 1.2 15.6 # two a 1999 1.4 15.6 # three a 1998 2.3 15.6 # fore b 1999 4.5 15.6 # five b 2000 5.6 15.6 ''' 给标量赋值索引 ''' data['debt']=np.arange(5) print(data) ''' state year pop debt one a 1998 1.2 0 two a 1999 1.4 1 three a 1998 2.3 2 fore b 1999 4.5 3 five b 2000 5.6 4 ''' ''' 精准赋值给索引,没有赋值就是NaN ''' val=Series([-1.22,22,33],index=['one','three','five']) data['debt']=val print(data) ''' state year pop debt one a 1998 1.2 -1.22 two a 1999 1.4 NaN three a 1998 2.3 22.00 fore b 1999 4.5 NaN five b 2000 5.6 33.00 ''' ''' 删除不符合条件的列 ''' data['deldata']=data.state=='a' print(data) ''' state year pop debt deldata one a 1998 1.2 -1.22 True two a 1999 1.4 NaN True three a 1998 2.3 22.00 True fore b 1999 4.5 NaN False five b 2000 5.6 33.00 False ''' del data['deldata'] print(data) ''' state year pop debt one a 1998 1.2 -1.22 two a 1999 1.4 NaN three a 1998 2.3 22.00 fore b 1999 4.5 NaN five b 2000 5.6 33.00 ''' ''' 另一种常见的数据形式,字典形式,行数据无值,以NaN显示 ''' dict={'hhb':{'2001':100,'2002':200},'zjx':{'2000':1.2,'2001':1.23,'2002':1.3}} dict_data=DataFrame(dict) print(dict_data) # hhb zjx # 2000 NaN 1.20 # 2001 100.0 1.23 # 2002 200.0 1.30 #转置 print(dict_data.T) # 2000 2001 2002 # hhb NaN 100.00 200.0 # zjx 1.2 1.23 1.3 ''' 指定索引 ''' #错误 # dict_index=DataFrame(dict,index=[2001,2002,2003]) # print(dict_index) #正确 pdata={'hhb':dict_data['hhb'][:-1],'zjx':dict_data['zjx'][:2]} print(DataFrame(pdata)) # hhb zjx # 2000 NaN 1.20 # 2001 100.0 1.23 ''' 指定index和columns的属性 ''' dict_data.index.name='year' dict_data.columns.name='name' print(dict_data) # name hhb zjx # year # 2000 NaN 1.20 # 2001 100.0 1.23 # 2002 200.0 1.30 #获取values值 print(dict_data.values) # [[ nan 1.2 ] # [100. 1.23] # [200. 1.3 ]] #data不同时,数据会兼容 print(data.values) # [['a' 1998 1.2 -1.22] # ['a' 1999 1.4 nan] # ['a' 1998 2.3 22.0] # ['b' 1999 4.5 nan] # ['b' 2000 5.6 33.0]]
python数据分析四:DataFrame基本结构
猜你喜欢
转载自blog.csdn.net/qq_38788128/article/details/80651621
今日推荐
周排行