数据挖掘工具pandas(三)DataFrame查看、修改数据

一,dataframe按索引查看数据

1,使用行、列索引直接查看 (注意:先列后行!)
import numpy as np
import pandas as pd
np.random.seed(2)
day_data = np.random.normal(0,1,(500,507))
# 构造行索引列表
stock_list = ["股票"+ str(i) for i in range(day_data.shape[0])]
# 构造列索引列表
date = pd.date_range("2018-01-01",periods=day_data.shape[1],freq='B')
# 设置行、列索引
day_data2 = pd.DataFrame(day_data,index=stock_list,columns=date)

# 必须要注意是先列后行!!!
print(day_data2['2018-01-01']['股票0'])
# print(day_data2[["第0天","第1天"]])
2,使用loc、iloc、ix查看索引

loc(通过索引名字),iloc(通过索引下标),ix(通过索引名字+索引下标)

import numpy as np
import pandas as pd
np.random.seed(2)
day_data = np.random.normal(0,1,(500,507))

# 构造行索引列表
stock_list = ["股票"+ str(i) for i in range(day_data.shape[0])]
# 构造列索引列表
date = ["第"+ str(i)+"天" for i in range(day_data.shape[1])]
# 设置行、列索引
day_data2 = pd.DataFrame(day_data,index=stock_list,columns=date)

# loc:
# 冒号在loc里是闭合的,即会选择到冒号后面的数据
# print(day_data2.loc[["股票0","股票2"],("第0天","第1天")])

# iloc:
# 与loc不同,不会选择到:后的数据
# print(day_data2.iloc[0:2,0:2])

# ix:
print(day_data2.ix[[1,2],("第0天","第1天")])
3,不支持的操作
# 错误  
data[行索引][列索引]
# 错误
data[:1,:2]

二,dataframe按索引修改数据

1,使用行、列索引直接修改 (注意:先列后行!)
# encoding=utf-8
import numpy as np
import pandas as pd
np.random.seed(2)
day_data = np.random.normal(0,1,(500,507))
# 构造行索引列表
stock_list = ["股票"+ str(i) for i in range(day_data.shape[0])]
# 构造列索引列表
date = ["第"+ str(i)+"天" for i in range(day_data.shape[1])]
# 设置行、列索引
day_data2 = pd.DataFrame(day_data,index=stock_list,columns=date)

# 必须要注意是先列后行!!!
# day_data2['第0天']['股票0'] = np.nan
# day_data2['第0天'] = np.nan
day_data2.0= np.nan
print(day_data2)

# 去掉整列数据
day_data2 = day_data2.drop(["第0天","第1天"],axis=1)
print(day_data2)
2,使用loc、iloc修改索引
import numpy as np
import pandas as pd
np.random.seed(2)
day_data = np.random.normal(0,1,(500,507))

# 构造行索引列表
stock_list = ["股票"+ str(i) for i in range(day_data.shape[0])]
# 构造列索引列表
date = ["第"+ str(i)+"天" for i in range(day_data.shape[1])]
# 设置行、列索引
day_data2 = pd.DataFrame(day_data,index=stock_list,columns=date)

# loc:
# 冒号在loc里是闭合的,即会选择到冒号后面的数据
# day_data2.loc[["股票0","股票2"],("第0天","第1天")] = 0

# iloc:
# 与loc不同,不会选择到:后的数据
# day_data2.iloc[0:2,0:2] = 0

# ix:
day_data2.ix[[1,2],("第0天","第1天")] = 0
3,pandas的字符串方法
import pandas as pd
from sqlalchemy import create_engine

engine = create_engine('mysql+pymysql://root:root@localhost:3306/yoyo')
sql = """select * from role_info;"""
df = pd.read_sql(sql,engine)

print(df.dtypes)
print(df['role_pic'].str.split('/').tolist())
4,int赋值为nan不报错,因为DataFrame会自动将int转化为float类型的。
import pandas as pd
import numpy as np

temp =pd.DataFrame(np.arange(12).reshape(3,4),index=list('abc'),columns=list("wxyz"))

print(temp.dtypes)
temp.iloc[1,1] = np.nan
print(temp.dtypes)

#-------------output---------------------------------------
w    int32
x    int32
y    int32
z    int32
dtype: object
w      int32
x    float64
y      int32
z      int32
dtype: object

猜你喜欢

转载自blog.csdn.net/TFATS/article/details/106288312
今日推荐