import numpy as np
import pandas as pd
df = pd.DataFrame({'col_a': np.arange(10),
'col_b': np.random.randn(10),
'col_c': np.random.choice(['A', 'B', 'C'], 10),
'col_d': np.random.choice([0, 1], 10)})
df.head(5)
|
col_a |
col_b |
col_c |
col_d |
0 |
0 |
-1.030159 |
B |
0 |
1 |
1 |
1.447636 |
B |
1 |
2 |
2 |
1.572227 |
C |
0 |
3 |
3 |
0.031504 |
A |
1 |
4 |
4 |
0.071705 |
C |
0 |
print(df.shape, df.shape[0], df.shape[1])
(10, 4) 10 4
df.columns
Index(['col_a', 'col_b', 'col_c', 'col_d'], dtype='object')
df.iloc[:5]
|
col_a |
col_b |
col_c |
col_d |
0 |
0 |
-1.030159 |
B |
0 |
1 |
1 |
1.447636 |
B |
1 |
2 |
2 |
1.572227 |
C |
0 |
3 |
3 |
0.031504 |
A |
1 |
4 |
4 |
0.071705 |
C |
0 |
df[['col_a', 'col_b']]
|
col_a |
col_b |
0 |
0 |
-1.030159 |
1 |
1 |
1.447636 |
2 |
2 |
1.572227 |
3 |
3 |
0.031504 |
4 |
4 |
0.071705 |
5 |
5 |
-0.284231 |
6 |
6 |
0.403412 |
7 |
7 |
1.271582 |
8 |
8 |
0.693771 |
9 |
9 |
1.510458 |
df.iloc[:5, :2]
|
col_a |
col_b |
0 |
0 |
-1.030159 |
1 |
1 |
1.447636 |
2 |
2 |
1.572227 |
3 |
3 |
0.031504 |
4 |
4 |
0.071705 |
df.iat[0, 1]
-1.0301593908948492
df[(df['col_a'] > 3) & (df['col_b'] < 0)]
|
col_a |
col_b |
col_c |
col_d |
5 |
5 |
-0.284231 |
A |
0 |
df[df['col_c'].isin(['A', 'B'])]
|
col_a |
col_b |
col_c |
col_d |
0 |
0 |
-1.030159 |
B |
0 |
1 |
1 |
1.447636 |
B |
1 |
3 |
3 |
0.031504 |
A |
1 |
5 |
5 |
-0.284231 |
A |
0 |
7 |
7 |
1.271582 |
A |
0 |
8 |
8 |
0.693771 |
B |
1 |
9 |
9 |
1.510458 |
A |
1 |
df['col_e'] = df['col_a'] + df['col_b']
df
|
col_a |
col_b |
col_c |
col_d |
col_e |
0 |
0 |
-1.030159 |
B |
0 |
-1.030159 |
1 |
1 |
1.447636 |
B |
1 |
2.447636 |
2 |
2 |
1.572227 |
C |
0 |
3.572227 |
3 |
3 |
0.031504 |
A |
1 |
3.031504 |
4 |
4 |
0.071705 |
C |
0 |
4.071705 |
5 |
5 |
-0.284231 |
A |
0 |
4.715769 |
6 |
6 |
0.403412 |
C |
1 |
6.403412 |
7 |
7 |
1.271582 |
A |
0 |
8.271582 |
8 |
8 |
0.693771 |
B |
1 |
8.693771 |
9 |
9 |
1.510458 |
A |
1 |
10.510458 |
df = df.drop(columns='col_e')
df
|
col_a |
col_b |
col_c |
col_d |
0 |
0 |
-1.030159 |
B |
0 |
1 |
1 |
1.447636 |
B |
1 |
2 |
2 |
1.572227 |
C |
0 |
3 |
3 |
0.031504 |
A |
1 |
4 |
4 |
0.071705 |
C |
0 |
5 |
5 |
-0.284231 |
A |
0 |
6 |
6 |
0.403412 |
C |
1 |
7 |
7 |
1.271582 |
A |
0 |
8 |
8 |
0.693771 |
B |
1 |
9 |
9 |
1.510458 |
A |
1 |
df.drop(columns=df.columns[0])
|
col_b |
col_c |
col_d |
0 |
-1.030159 |
B |
0 |
1 |
1.447636 |
B |
1 |
2 |
1.572227 |
C |
0 |
3 |
0.031504 |
A |
1 |
4 |
0.071705 |
C |
0 |
5 |
-0.284231 |
A |
0 |
6 |
0.403412 |
C |
1 |
7 |
1.271582 |
A |
0 |
8 |
0.693771 |
B |
1 |
9 |
1.510458 |
A |
1 |
df.T
|
0 |
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
col_a |
0 |
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
col_b |
-1.03016 |
1.44764 |
1.57223 |
0.0315043 |
0.0717051 |
-0.284231 |
0.403412 |
1.27158 |
0.693771 |
1.51046 |
col_c |
B |
B |
C |
A |
C |
A |
C |
A |
B |
A |
col_d |
0 |
1 |
0 |
1 |
0 |
0 |
1 |
0 |
1 |
1 |
df['col_a'].astype(str)
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
Name: col_a, dtype: object
pd.Categorical(df['col_c'])
[B, B, C, A, C, A, C, A, B, A]
Categories (3, object): [A, B, C]
df[['col_a', 'col_b']].sum(axis=1)
0 -1.030159
1 2.447636
2 3.572227
3 3.031504
4 4.071705
5 4.715769
6 6.403412
7 8.271582
8 8.693771
9 10.510458
dtype: float64
df[['col_a', 'col_b']].mean(axis=0)
col_a 4.50000
col_b 0.56879
dtype: float64
df[['col_a', 'col_b']].apply(lambda x: x.mean() + 10)
col_a 14.50000
col_b 10.56879
dtype: float64
df2 = pd.DataFrame({'col_x': np.arange(10),
'col_y': np.arange(10)[::-1]})
df2
|
col_x |
col_y |
0 |
0 |
9 |
1 |
1 |
8 |
2 |
2 |
7 |
3 |
3 |
6 |
4 |
4 |
5 |
5 |
5 |
4 |
6 |
6 |
3 |
7 |
7 |
2 |
8 |
8 |
1 |
9 |
9 |
0 |
pd.concat([df, df2], axis=1)
|
col_a |
col_b |
col_c |
col_d |
col_x |
col_y |
0 |
0 |
-1.030159 |
B |
0 |
0 |
9 |
1 |
1 |
1.447636 |
B |
1 |
1 |
8 |
2 |
2 |
1.572227 |
C |
0 |
2 |
7 |
3 |
3 |
0.031504 |
A |
1 |
3 |
6 |
4 |
4 |
0.071705 |
C |
0 |
4 |
5 |
5 |
5 |
-0.284231 |
A |
0 |
5 |
4 |
6 |
6 |
0.403412 |
C |
1 |
6 |
3 |
7 |
7 |
1.271582 |
A |
0 |
7 |
2 |
8 |
8 |
0.693771 |
B |
1 |
8 |
1 |
9 |
9 |
1.510458 |
A |
1 |
9 |
0 |
df3 = pd.DataFrame({'col_a': [-1, -2],
'col_b' : [0, 1],
'col_c': ['B', 'C'],
'col_d': [1, 0]})
df3
|
col_a |
col_b |
col_c |
col_d |
0 |
-1 |
0 |
B |
1 |
1 |
-2 |
1 |
C |
0 |
pd.concat([df, df3], axis=0, ignore_index=True)
|
col_a |
col_b |
col_c |
col_d |
0 |
0 |
-1.030159 |
B |
0 |
1 |
1 |
1.447636 |
B |
1 |
2 |
2 |
1.572227 |
C |
0 |
3 |
3 |
0.031504 |
A |
1 |
4 |
4 |
0.071705 |
C |
0 |
5 |
5 |
-0.284231 |
A |
0 |
6 |
6 |
0.403412 |
C |
1 |
7 |
7 |
1.271582 |
A |
0 |
8 |
8 |
0.693771 |
B |
1 |
9 |
9 |
1.510458 |
A |
1 |
10 |
-1 |
0.000000 |
B |
1 |
11 |
-2 |
1.000000 |
C |
0 |