import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
# 构造数据
temp = pd.DataFrame(np.random.normal(0,100,(500,505)))
stock_list =["股票"+str(i)for i inrange(temp.shape[0])]
date =["第"+str(i)+"天"for i inrange(temp.shape[1])]
temp.index = stock_list
temp.columns = date
# 取出单列数据
p_change = temp["第1天"]# 使用cut自己进行分组,使用自己定义的组距进行分组
bins =[-500,-100,-7,-5,-3,0,3,5,7,100,500]
a = pd.cut(p_change,bins)# prefix为展示列名称前的文字
dummies = pd.get_dummies(a,prefix="rise")# 使用concat合并数据print(pd.concat([temp,dummies],axis=1))# 使用join合并数据print(temp.join(dummies))
二,merge合并数据
1,内链接
import pandas as pd
left ={"name":["xiaoming","xiaofang","xiaoli","xiaofu"],"sore":[120,132,124,110],"tel":[10086,10000,10010,10001],"agent":[1,0,0,1]}
right =[{"name":"daming","age":22,"tel":10000},{"name":"dafang","age":32,"tel":120120},{"name":"xiaoli","age":24,"tel":10010},{"name":"xiaofu","age":26,"tel":10010}]
left = pd.DataFrame(left)
right = pd.DataFrame(right)# merge内连接-on中取两组数据中相同的key名,取出同key的value值全部相同的数据# result = pd.merge(left,right,on=["name","tel"])print(result)
2,左链接
import pandas as pd
left ={"name":["xiaoming","xiaofang","xiaoli","xiaofu"],"sore":[120,132,124,110],"tel":[10086,10000,10010,10001],"agent":[1,0,0,1]}
right =[{"name":"daming","age":22,"tel":10000},{"name":"dafang","age":32,"tel":120120},{"name":"xiaoli","age":24,"tel":10010},{"name":"xiaofu","age":26,"tel":10010}]
left = pd.DataFrame(left)
right = pd.DataFrame(right)# merge左连接on中取两组数据中相同的key名,以左组数据的key的基础外加右组数据不同的key# on中key的value值全部相同,则显示右边特有key的value,其他key的value显示NaN.
result = pd.merge(left,right,how="left",on=["name","tel"])print(result)
3,右链接
import pandas as pd
left ={"name":["xiaoming","xiaofang","xiaoli","xiaofu"],"sore":[120,132,124,110],"tel":[10086,10000,10010,10001],"agent":[1,0,0,1]}
right =[{"name":"daming","age":22,"tel":10000},{"name":"dafang","age":32,"tel":120120},{"name":"xiaoli","age":24,"tel":10010},{"name":"xiaofu","age":26,"tel":10010}]
left = pd.DataFrame(left)
right = pd.DataFrame(right)# merge右连接on中取两组数据中相同的key名,以右组数据的key的基础外加左组数据不同的key# on中key的value值全部相同,则显示左边特有key的value,其他key的value显示NaN.
result = pd.merge(left,right,how="right",on=["name","tel"])print(result)
4,外链接
import pandas as pd
left ={"name":["xiaoming","xiaofang","xiaoli","xiaofu"],"sore":[120,132,124,110],"tel":[10086,10000,10010,10001],"agent":[1,0,0,1]}
right =[{"name":"daming","age":22,"tel":10000,"agent":1},{"name":"dafang","age":32,"tel":120120,"agent":0},{"name":"xiaoli","age":24,"tel":10010,"agent":1},{"name":"xiaofu","age":26,"tel":10010,"agent":1}]
left = pd.DataFrame(left)
right = pd.DataFrame(right)# merge外连接-分别以左、右两组数据为基础,如果on中的key对应的value值相同,合为一条数据,# 其他相同key不同value,创建新的key来表示.
result = pd.merge(left,right,how="outer",on=["name","tel"])print(result)