数据挖掘工具pandas(十一)数据合并

一,concat / join合并数据

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

# 构造数据
temp = pd.DataFrame(np.random.normal(0,100,(500,505)))
stock_list = ["股票"+ str(i) for i in range(temp.shape[0])]
date = ["第"+ str(i)+"天" for i in range(temp.shape[1])]
temp.index = stock_list
temp.columns = date
	
# 取出单列数据
p_change = temp["第1天"]

# 使用cut自己进行分组,使用自己定义的组距进行分组
bins = [-500,-100,-7,-5,-3,0,3,5,7,100,500]
a = pd.cut(p_change,bins)

# prefix为展示列名称前的文字
dummies = pd.get_dummies(a,prefix="rise")

# 使用concat合并数据
print(pd.concat([temp,dummies],axis=1))

# 使用join合并数据
print(temp.join(dummies))

二,merge合并数据

1,内链接
import pandas as pd
left = {
    "name" : ["xiaoming","xiaofang","xiaoli","xiaofu"],
    "sore" : [120,132,124,110],
    "tel" : [10086,10000,10010,10001],
    "agent":[1,0,0,1]
}
right = [
    {"name" : "daming", "age": 22,"tel":10000 },
    {"name" : "dafang", "age": 32,"tel":120120 },
    {"name" : "xiaoli", "age": 24,"tel":10010 },
    {"name" : "xiaofu", "age": 26,"tel":10010 }
]
left = pd.DataFrame(left)
right = pd.DataFrame(right)

# merge内连接-on中取两组数据中相同的key名,取出同key的value值全部相同的数据
# result = pd.merge(left,right,on=["name","tel"])
print(result)

在这里插入图片描述

2,左链接
import pandas as pd
left = {
    "name" : ["xiaoming","xiaofang","xiaoli","xiaofu"],
    "sore" : [120,132,124,110],
    "tel" : [10086,10000,10010,10001],
    "agent":[1,0,0,1]
}
right = [
    {"name" : "daming", "age": 22,"tel":10000 },
    {"name" : "dafang", "age": 32,"tel":120120 },
    {"name" : "xiaoli", "age": 24,"tel":10010 },
    {"name" : "xiaofu", "age": 26,"tel":10010 }
]
left = pd.DataFrame(left)
right = pd.DataFrame(right)

# merge左连接on中取两组数据中相同的key名,以左组数据的key的基础外加右组数据不同的key
# on中key的value值全部相同,则显示右边特有key的value,其他key的value显示NaN.
result = pd.merge(left,right,how="left",on=["name","tel"])

print(result)

在这里插入图片描述

3,右链接
import pandas as pd
left = {
    "name" : ["xiaoming","xiaofang","xiaoli","xiaofu"],
    "sore" : [120,132,124,110],
    "tel" : [10086,10000,10010,10001],
    "agent":[1,0,0,1]
}
right = [
    {"name" : "daming", "age": 22,"tel":10000 },
    {"name" : "dafang", "age": 32,"tel":120120 },
    {"name" : "xiaoli", "age": 24,"tel":10010 },
    {"name" : "xiaofu", "age": 26,"tel":10010 }
]
left = pd.DataFrame(left)
right = pd.DataFrame(right)

# merge右连接on中取两组数据中相同的key名,以右组数据的key的基础外加左组数据不同的key
# on中key的value值全部相同,则显示左边特有key的value,其他key的value显示NaN.
result = pd.merge(left,right,how="right",on=["name","tel"])

print(result)

在这里插入图片描述

4,外链接
import pandas as pd
left = {
    "name" : ["xiaoming","xiaofang","xiaoli","xiaofu"],
    "sore" : [120,132,124,110],
    "tel" : [10086,10000,10010,10001],
    "agent":[1,0,0,1]
}
right = [
    {"name" : "daming", "age": 22,"tel":10000,"agent":1},
    {"name" : "dafang", "age": 32,"tel":120120,"agent":0},
    {"name" : "xiaoli", "age": 24,"tel":10010 ,"agent":1},
    {"name" : "xiaofu", "age": 26,"tel":10010 ,"agent":1}
]
left = pd.DataFrame(left)
right = pd.DataFrame(right)

# merge外连接-分别以左、右两组数据为基础,如果on中的key对应的value值相同,合为一条数据,
# 其他相同key不同value,创建新的key来表示.
result = pd.merge(left,right,how="outer",on=["name","tel"])

print(result)

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/TFATS/article/details/106354452