Code:
import pandas as pd import numpy as np print("Generate sequence") s = pd.Series([1, 3, 6, np.nan, 44, 1]) print(s) dates = pd.date_range('20160101', periods=6) print(dates)
operation result:
Generate sequence 0 1.0 1 3.0 2 6.0 3 NaN 4 44.0 5 1.0 dtype: float64 DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04', '2016-01-05', '2016-01-06'], dtype='datetime64[ns]', freq='D')
Code:
# Law one print("法一") df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=['a', 'b', 'c', 'd']) print(df) print("Method 2") # Law two df1 = pd.DataFrame(np.arange(12).reshape((3, 4))) print(df1) # Law three print("Method three") df2 = pd.DataFrame({'A': pd.Series([1,2,3,4]), 'B': pd.Timestamp('20130102'), 'C': pd.Series(1, index=list(range(4)), dtype='float32'), 'D': np.array([3] * 4, dtype='int32'), 'E': pd.Categorical(['test', 'train', 'test', 'train']), 'F': 'foo'}) print(df2)
operation result:
law one a b c d 2016-01-01 0.754096 1.030924 1.122601 -0.945364 2016-01-02 0.678263 1.597405 -0.570131 1.778779 2016-01-03 -2.048156 -0.824976 1.497493 0.865702 2016-01-04 0.095282 0.790544 -0.623735 -0.560488 2016-01-05 -0.369227 -0.463738 -1.496183 -0.421661 2016-01-06 0.271562 -0.709889 0.012280 1.094913 Law II 0 1 2 3 0 0 1 2 3 1 4 5 6 7 2 8 9 10 11 Law three A B C D E F 0 1 2013-01-02 1.0 3 test foo 1 2 2013-01-02 1.0 3 train foo 2 3 2013-01-02 1.0 3 test foo 3 4 2013-01-02 1.0 3 train foo
Code:
# Attributes print(df2.dtypes) # print the subscript order of all lines print(df2.index) # output column print(df2.columns) # output values print("output values") print(df2.values)
operation result:
A int64 B datetime64[ns] C float32 D int32 E category F object dtype: object RangeIndex(start=0, stop=4, step=1) Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object') output values [[1 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo'] [2 Timestamp('2013-01-02 00:00:00') 1.0 3 'train' 'foo'] [3 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo'] [4 Timestamp('2013-01-02 00:00:00') 1.0 3 'train' 'foo']]
Code:
# describe print(df2.describe()) # Transpose print(df2.T) # sort print(df2.sort_index(axis=1, ascending=False)) # Reverse by column print(df2.sort_index(axis=0, ascending=False)) # Reverse row by row # sort values print(df2.sort_values(by='E'))
operation result:
A C D count 4.000000 4.0 4.0 mean 2.500000 1.0 3.0 std 1.290994 0.0 0.0 min 1.000000 1.0 3.0 25% 1.750000 1.0 3.0 50% 2.500000 1.0 3.0 75% 3.250000 1.0 3.0 max 4.000000 1.0 3.0 0 1 2 \ A 1 2 3 B 2013-01-02 00:00:00 2013-01-02 00:00:00 2013-01-02 00:00:00 C 1 1 1 D 3 3 3 E test train test F foo foo foo 3 A 4 B 2013-01-02 00:00:00 C 1 D 3 E train F foo F E D C B A 0 foo test 3 1.0 2013-01-02 1 1 foo train 3 1.0 2013-01-02 2 2 foo test 3 1.0 2013-01-02 3 3 foo train 3 1.0 2013-01-02 4 A B C D E F 3 4 2013-01-02 1.0 3 train foo 2 3 2013-01-02 1.0 3 test foo 1 2 2013-01-02 1.0 3 train foo 0 1 2013-01-02 1.0 3 test foo A B C D E F 0 1 2013-01-02 1.0 3 test foo 2 3 2013-01-02 1.0 3 test foo 1 2 2013-01-02 1.0 3 train foo 3 4 2013-01-02 1.0 3 train foo