import numpy as np
import pandas as pd
from pandas import Series, DataFrame
連結する
マトリックス: シリーズとデータフレームの 連結 :連結
arr1 = np. arange( 9 ) . reshape( 3 , 3 )
arr1
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]])
arr2 = np. arange( 9 ) . reshape( 3 , 3 )
arr2
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]])
np. concatenate( [ arr1, arr1] )
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8],
[0, 1, 2],
[3, 4, 5],
[6, 7, 8]])
np. concatenate( [ arr1, arr1] , axis= 1 )
array([[0, 1, 2, 0, 1, 2],
[3, 4, 5, 3, 4, 5],
[6, 7, 8, 6, 7, 8]])
s1 = Series( [ 1 , 2 , 3 ] , index= [ 'x' , 'y' , 'z' ] )
s1
x 1
y 2
z 3
dtype: int64
s2 = Series( [ 4 , 5 ] , index= [ 'a' , 'b' ] )
s2
a 4
b 5
dtype: int64
pd. concat( [ s1, s2] )
x 1
y 2
z 3
a 4
b 5
dtype: int64
pd. concat( [ s1, s2] , axis= 1 )
/Users/bennyrhys/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.
To accept the future behavior, pass 'sort=False'.
To retain the current behavior and silence the warning, pass 'sort=True'.
0
1
a
NaN
4.0
b
NaN
5.0
バツ
1.0
NaN
y
2.0
NaN
から
3.0
NaN
df1 = DataFrame( np. random. rand( 4 , 3 ) , columns= [ 'x' , 'y' , 'z' ] )
df1
バツ
y
から
0
0.118006
0.976428
0.286200
1
0.554356
0.739202
0.441234
2
0.987343
0.032884
0.963760
3
0.730118
0.617397
0.943546
df2 = DataFrame( np. random. randn( 3 , 3 ) , columns= [ 'x' , 'y' , 'a' ] )
df2
バツ
y
a
0
0.792735
0.927720
1.960326
1
-1.015684
0.524749
1.002970
2
-0.676568
0.378511
0.103341
pd. concat( [ df1, df2] )
/Users/bennyrhys/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.
To accept the future behavior, pass 'sort=False'.
To retain the current behavior and silence the warning, pass 'sort=True'.
a
バツ
y
から
0
NaN
0.118006
0.976428
0.286200
1
NaN
0.554356
0.739202
0.441234
2
NaN
0.987343
0.032884
0.963760
3
NaN
0.730118
0.617397
0.943546
0
1.960326
0.792735
0.927720
NaN
1
1.002970
-1.015684
0.524749
NaN
2
0.103341
-0.676568
0.378511
NaN
組み合わせる
Combine_first機能 2つのデータセット。現在のnanのセットの場合、後者のセットが入力されます。 結合されたグループは、前のグループよりもデータが少ない
s1 = Series( [ 2 , np. nan, 4 , np. nan] , index= [ 'A' , 'B' , 'C' , 'D' ] )
s1
A 2.0
B NaN
C 4.0
D NaN
dtype: float64
s2 = Series( [ 1 , 2 , 3 , 4 ] , index= [ 'A' , 'B' , 'C' , 'D' ] )
s2
A 1
B 2
C 3
D 4
dtype: int64
s1. combine_first( s2)
A 2.0
B 2.0
C 4.0
D 4.0
dtype: float64
df1 = DataFrame( {
'x' : [ 1 , np. nan, 3 , np. nan] ,
'y' : [ 5 , np. nan, 7 , np. nan] ,
'z' : [ 9 , np. nan, 11 , np. nan]
} )
df1
バツ
y
から
0
1.0
5.0
9.0
1
NaN
NaN
NaN
2
3.0
7.0
11.0
3
NaN
NaN
NaN
df2 = DataFrame( {
'z' : [ np. nan, 10 , np. nan, 12 ] ,
'a' : [ 1 , 2 , 3 , 4 ]
} )
df2
から
a
0
NaN
1
1
10.0
2
2
NaN
3
3
12.0
4
df1. combine_first( df2)
a
バツ
y
から
0
1.0
1.0
5.0
9.0
1
2.0
NaN
NaN
10.0
2
3.0
3.0
7.0
11.0
3
4.0
NaN
NaN
12.0