Pandas library 06_DataFrame data structure _ summary and statistics

# This lecture summary and statistics of pandas
Import numpy AS NP
Import pandas AS pd


# My own private library, I have added a function name t_ +
T_DATA = {
"name": [ "Don Ho", "Wang", "Pharaoh", "Zhao Three", "John Doe" "King sister"],
"Sex": [ "male", "female", "male", "female", "male", "female"],
"year": [37,22,15,18,33 , 25],
"City": [ "Chengdu", "Beijing", "Shanghai", "Chengdu", "Shenzhen", "Beijing"]
}

df1=pd.DataFrame(np.arange(0,20).reshape(5,4),index=[1,2,3,4,5],columns=["A","B","C","D"])
print(df1)

# Column summation
# df2 = df1.sum (axis = 1 ) #x axis row summation
# Print (DF2)
# = df1.sum DF22 (Axis = 0) # default, y-axis column summation
# print (df22 )

#describe method, useful statistics for all numeric columns
#eg:
df222 = df1.describe () # can only be columns, rows, invalid, no axis option
Print (df222)
df2222 = df222.applymap (the lambda the X-: " .2f% "X%)
Print (df2222)
# the following is what is meant
print (df2222.index) #Index ([ ' count number', 'mean average', 'std', 'min ', '25% ', '50%', '75% ',' max ']
# a so to get the number of columns, average, maximum, minimum, STD ...
# Print (df222.ix [ "min" , "D"]) # want to take what is what, this to the police, then following this approach the
print (df222 [ "D"] [ "max"]) # df222 [ " column name that is key"] [ " index tag "] so that the same can get what I wanted
Print (df2222 [" a "] [" Mean "])
" ""
ABCD
COUNT 5.00 5.00 5.00 5.00
Mean 8.00 9.00 10.00 11.00
std 6.32 6.32 6.32 6.32
Min 0.00 1.00 2.00 3.00
25% 4.00 5.00 6.00 7.00
50% 8.00 9.00 10.00 11.00
75% 12.00 13.00 14.00 15.00
max 16.00 17.00 18.00 19.00
"""

 


# Column element number, line number
# DF3 = len (df1.index)
# Print (DF3)
# Print (df1.index) to obtain a list of indexes #

# Find the number of columns, like a dictionary key number
# DF4 = len (df1.keys ())
# Print (DF4)
# Print (df1.keys ()) # get the column name list
# print (df1.values) # Series data type, all of values of the elements of the matrix

# All total number of elements in the number of indexes is X number of columns
# print (len (df1.keys () ) * len (df1.index))

Guess you like

Origin www.cnblogs.com/yiyea/p/11441803.html