pandas数据分析模块(一)

版权声明: https://blog.csdn.net/Geroge_lmx/article/details/84336746

一.准备知识

my_dataframe = pandas.read_csv('csv文件')   ---   DataFrame对象
my_dataframe.head(n)   ---   显示前n行
my_dataframe.columns   ---   获取列名组成的Index对象
my_dataframe.shape   ---   返回DataFrame的形状(row * colus)

my_dataframe.loc[n]   ---   第n+1行记录以Series形式返回
my_datafarme.loc[n:m]  支持切片取值,包含m
my_datafarme[n:m]  支持切片取值,不包含m
my_dataframe.loc[[1,4,7]] 支持列表取值

my_dataframe['列名']   ---   Series对象
my_dataframe['列名'].name
my_dataframe['列名'].dtype
my_dataframe[['列名1','列名2',...]]   ---   DataFrame对象  

my_dataframe.columns.tolist()   ---   列名以列表形式返回
my_dataframe['新列名'] = Series对象  ---  为DataFrame添加新列
my_dataframe['列名'].max()  --- 求某一列的最大值
my_dataframe.sort_values('列名', inplace=True, ascending=True) #就地升序排列
my_dataframe.sort_values('列名', inplace=True, ascending=False) #就地降序排列

二.代码示例(运行环境,python2.7)

import pandas

food_info = pandas.read_csv('food_info.csv')
# =============================================================================
# col_names = food_info.columns.tolist()
# 
# gram_columns = []
# 
# for c in col_names:
#     if c.endswith("(g)"):
#         gram_columns.append(c)
#    
# gram_df = food_info[gram_columns]
# print(gram_df.head(3))
# =============================================================================


# =============================================================================
# weighted_protein = food_info["Protein_(g)"] * 2
# weighted_fat = -0.75 * food_info["Lipid_Tot_(g)"]
# initial_rating = weighted_protein + weighted_fat
# food_info['initial_rating'] = initial_rating
# print(food_info.loc[0:2])
# =============================================================================


# =============================================================================
# max_calories = food_info["Energ_Kcal"].max()
# 
# # 求最大值
# normalized_protein = food_info["Protein_(g)"] / food_info["Protein_(g)"].max()
# normalized_fat = food_info["Lipid_Tot_(g)"] / food_info["Lipid_Tot_(g)"].max()
# 
# # 添加新列
# food_info["Normalized_Protein"] = normalized_protein
# food_info["Normalized_Fat"] = normalized_fat
# 
# print(food_info.head(3))
# =============================================================================


# =============================================================================
# # inplace=True对DataFrame就地修改,而不是返回一个新的DataFrame
# # NAN:缺失值排在最后
# food_info.sort_values("Sodium_(mg)", inplace=True)
# print(food_info["Sodium_(mg)"])
# 
# # 降序排列
# food_info.sort_values("Sodium_(mg)", inplace=True, ascending=False)
# print(food_info["Sodium_(mg)"])
# =============================================================================

csv文件网盘下载链接:https://pan.baidu.com/s/1jAOOXCobDSeBZc3h3qGybQ

猜你喜欢

转载自blog.csdn.net/Geroge_lmx/article/details/84336746