# - * - Coding: UTF-. 8 - * - # @date: 2017-08-26 # @Original: Import numpy AS NP stock_cnt = 200 is view_days = 504 # generate a sequence stock_day_change = np.random.standard_normal ((stock_cnt, view_days )) stock_day_change.shape Import PANDAS PD AS listed in the first five rows of data pd.DataFrame (stock_day_change) .head ( 5 ) 4.1.2 index ranks sequence # progressive upward from 2017-1-1 time in freq = '1d' i.e. one day days pd.date_range = ( ' 2017-1-1 ' , stock_day_change.shape periods = [. 1], FREQ = ' 1D ') # Stock 0 -> Stock stock_day_change.shape [0] stock_symbols = [ ' stock ' + STR (X) for X in Range (stock_day_change.shape [0])] # are provided index and Columns DF = pd.DataFrame (stock_day_change , index = stock_symbols, Columns = Days) # shown in table 4-3 df.head (2 ) df_stock0 = DF [ ' stock 0 ' ] Print (type (df_stock0)) # prints a front row 5 data Series, and DataFrame consistent # <class 'pandas.core.series.Series'> df_stock0.head () 4.2.1 data analysis entire if info () See missing data, data type and each sub-data tsla_df.info () DESCRIBE () shows statistics of each data tsla_df.describe () 4.2.2 index selection and slice selection slice row with the exemplary use loc name, column name selected as # 2014-07-23 to 2014-07 -31 opening price series tsla_df.loc [ ' 2014-07-23 ' : ' 2014-07-31 ' , ' Open ' ] iLoc with line index value and the index value of the column select slice # [1: 5] :( 1, 2,3,4), [2:. 6]: (2,. 3,. 4,. 5) tsla_df.iloc [. 1:. 5, 2:. 6 ] The mixing column name select tsla_df [[. ' Close ' , ' High ' , ' Low ' ]] [0:. 3 ] 4.2.3 Logical condition data filter # ABS is an absolute value # of Price greater than 8%, more than 2.5 times the average trading volume in the counting period tsla_df [(np.abs (tsla_df.netChangeRatio)> 8) & (tsla_df .volume> 2.5 * tsla_df.volume.mean ())] 4.2.4 data conversion and structured tsla_df.sort_index (by = ' netChangeRatio ' ) [:. 5 ] # if the data of one line in the presence of na deleted line tsla_df.dropna () # by controlling how if the data of one line are all deleted na line tsla_df.dropna (how = ' all ' ) # specified value na filled, representative of InPlace local operation, i.e., does not return a new sequence in the original modified sequence tsla_df.fillna (tsla_df.mean (), InPlace = True) .head () #pct_change () to do the sequence from the beginning of the second subtraction forward after the previous one, namely Quote change except tsla_df.close.pct_change () [: 3 ] # convert change_ratio into the field with the same percentage tsla_df.p_change one hundred, the same two decimals np.round (change_ratio [-5:] * 100, 2 ) 4.2.5 data local serialization operation tsla_df.to_csv ( ' ../gen/tsla_df.csv ' , Columns = tsla_df.columns , index = True) tsla_df_load = pd.read_csv ( ' ../gen/tsla_df.csv ' , parse_dates = True, index_col = 0) tsla_df_load.head ()