numpy中的concatenate()
>>> a = np.array([[1, 2], [3, 4]])
>>> b = np.array([[5, 6]])
>>> np.concatenate((a, b), axis=0)
array([[1, 2],
[3, 4],
[5, 6]])
>>> np.concatenate((a, b.T), axis=1)
array([[1, 2, 5],
[3, 4, 6]])
注意:用到这个方法的时候很容易报dimensions不同的错误,有的需要更改demensions
可以使用下面的两种方法
y_resamplednew = y_resampled[:,np.newaxis]
print(y_resamplednew)
y_resamplednew2 = y_resampled[np.newaxis,:]
print(y_resamplednew2)
pandas中的merge,concat,join
data1 = pd.DataFrame(
np.arange(0,16).reshape(4,4),
columns=list('abcd')
)
data1
data2 = [
[4,1,5,7],
[6,5,7,1],
[9,9,123,129],
[16,16,32,1]
]
data2 = pd.DataFrame(data2,columns = ['a','b','c','d'])
data2
pd.merge(data1,data2,on=['b'])
pd.merge(data1,data2,on='b',how='left')
data1.append(data2,ignore_index = True)
data2.columns=list('pown')
result = data1.join(data2)
result
data1.columns = list('abcd')
data2.columns =list('abcd')
data3 = data2
pd.concat([data1,data2,data3],keys=['data1','data2','data3'])
pd.concat([data1,data2,data3],axis = 1,keys = ['data1','data2','data3'])
data4 = data3[['a','b','c']]
pd.concat([data1,data4])
pd.concat([data1,data4],join='inner')
import pandas as pd
df1 = pd.DataFrame({'key':['b','b','a','c','a','a','b'],'data1':range(7)})
df2 = pd.DataFrame({'key':['a','b','d'],'data2':range(3)})
print(pd.merge(df1,df2))
print(pd.merge(df1,df2,on='key'))
df3 = pd.DataFrame({'lkey':['b','b','a','c','a','a','b'],'data1':range(7)})
df4 = pd.DataFrame({'rkey':['a','b','d'],'data2':range(3)})
print(pd.merge(df3,df4,left_on='lkey',right_on='rkey'))
print(pd.merge(df1,df2,how = 'left'))
left = pd.DataFrame({'key1':['foo','foo','bar'],'key2':['one','two','one'],'lval':[1,2,3]})
right = pd.DataFrame({'key1':['foo','foo','bar','bar'],'key2':['one','one','one','two'],'lval':[4,4,6,7]})
print(left,'\n',right)
print(pd.merge(left,right,on=['key1','key2'],how = 'outer'))
print(pd.merge(left,right,on=['key1','key2'],how = 'inner'))
print(pd.merge(left,right,on=['key1','key2'],how = 'inner',suffixes = ('_left','_right')))
'''
merge:函数的参数:
on:用于连接的列名
left_on,right_on :左侧(右侧)用于连接键的列
left_index(right_index):将左(右)侧的行索引作为连接键
sort:对于合并后的数据进行排列,默认为true,对于数据量比较大的时候,把他设置为False,性能更好
suffixes:默认为('_x','_y')
copy:默认为True,可以改为False,数据量比较大时
'''
left1 = pd.DataFrame({'key':['a','b','a','a','b','c'],'value':range(6)})
right1 = pd.DataFrame({'group_val':[3.5,7]},index=['a','b'])
print(pd.merge(left1,right1,left_on='key',right_index=True))
print(left1.join(right1,how='outer'))
s1 = pd.Series([0,1],index=['a','b'])
s2 = pd.Series([2,3,4],index=list('cde'))
s3 = pd.Series([5,6],index=list('fg'))
print(pd.concat([s1,s2,s3]))
s4 = pd.concat([s1*5,s3])
print(pd.concat([s1,s4],axis=1,join_axes=[['a','c','b','e']]))
result = pd.concat([s1,s2,s3],keys=['s1','s2','s3'])
print(result)
result = pd.concat([s1,s2,s3],keys=['s1','s2','s3'],ignore_index=True)
print(result)
import numpy as np
a = pd.Series([np.nan,2.5,np.nan,3.5,4.5,8],index=list('fedcba'))
b = pd.Series(np.arange(len(a),dtype=np.float64),index=list('fedcba'))
b[-1] = np.nan
print(a,'\n',b)
print(np.where(pd.isnull(a),b,a))
print('/nn')
print(b.combine_first(a))