-Pandas quantify programming and data analysis

# - * - Coding: UTF-. 8 - * - 
# @date: 2017-08-26 
# @Original: 

Import numpy AS NP 

stock_cnt = 200 is 
view_days = 504
 # generate a sequence 
stock_day_change = np.random.standard_normal ((stock_cnt, view_days )) 
stock_day_change.shape 


Import PANDAS PD AS 

listed in the first five rows of data 
pd.DataFrame (stock_day_change) .head ( 5 )


 4.1.2 index ranks sequence
 # progressive upward from 2017-1-1 time in freq = '1d' i.e. one day 
days pd.date_range = ( ' 2017-1-1 ' , stock_day_change.shape periods = [. 1], FREQ = ' 1D ')
 # Stock 0 -> Stock stock_day_change.shape [0] 
stock_symbols = [ ' stock ' + STR (X) for X in Range (stock_day_change.shape [0])]
 # are provided index and Columns 
DF = pd.DataFrame (stock_day_change , index = stock_symbols, Columns = Days)
 # shown in table 4-3 
df.head (2 ) 


df_stock0 = DF [ ' stock 0 ' ]
 Print (type (df_stock0))
 # prints a front row 5 data Series, and DataFrame consistent 
# <class 'pandas.core.series.Series'> 
df_stock0.head ()


 4.2.1 data analysis entire 
if info () See missing data, data type and each sub-data
tsla_df.info () 
DESCRIBE () shows statistics of each data 
tsla_df.describe ()

 4.2.2 index selection and slice selection 
slice row with the exemplary use loc name, column name selected as 
# 2014-07-23 to 2014-07 -31 opening price series 
tsla_df.loc [ ' 2014-07-23 ' : ' 2014-07-31 ' , ' Open ' ] 

iLoc with line index value and the index value of the column select slice 
# [1: 5] :( 1, 2,3,4), [2:. 6]: (2,. 3,. 4,. 5) 
tsla_df.iloc [. 1:. 5, 2:. 6 ] 

The mixing column name select 
tsla_df [[. ' Close ' , ' High ' , ' Low ' ]] [0:. 3 ]


 4.2.3 Logical condition data filter
 # ABS is an absolute value 
# of Price greater than 8%, more than 2.5 times the average trading volume in the counting period 
tsla_df [(np.abs (tsla_df.netChangeRatio)> 8) & (tsla_df .volume> 2.5 * tsla_df.volume.mean ())]

 4.2.4 data conversion and structured 
tsla_df.sort_index (by = ' netChangeRatio ' ) [:. 5 ] 

# if the data of one line in the presence of na deleted line 
tsla_df.dropna ()            
 # by controlling how if the data of one line are all deleted na line 
tsla_df.dropna (how = ' all ' )     

# specified value na filled, representative of InPlace local operation, i.e., does not return a new sequence in the original modified sequence 
tsla_df.fillna (tsla_df.mean (), InPlace = True) .head ()
 #pct_change () to do the sequence from the beginning of the second subtraction forward after the previous one, namely Quote change except 
tsla_df.close.pct_change () [: 3 ] 

# convert change_ratio into the field with the same percentage tsla_df.p_change one hundred, the same two decimals 
np.round (change_ratio [-5:] * 100, 2 )


 4.2.5 data local serialization operation 

tsla_df.to_csv ( ' ../gen/tsla_df.csv ' , Columns = tsla_df.columns , index = True) 
tsla_df_load = pd.read_csv ( ' ../gen/tsla_df.csv ' , parse_dates = True, index_col = 0) 
tsla_df_load.head ()

 

Guess you like

Origin www.cnblogs.com/fangbei/p/11521559.html