一.准备知识
my_dataframe = pandas.read_csv('csv文件') --- DataFrame对象
my_dataframe.head(n) --- 显示前n行
my_dataframe.columns --- 获取列名组成的Index对象
my_dataframe.shape --- 返回DataFrame的形状(row * colus)
my_dataframe.loc[n] --- 第n+1行记录以Series形式返回
my_datafarme.loc[n:m] 支持切片取值,包含m
my_datafarme[n:m] 支持切片取值,不包含m
my_dataframe.loc[[1,4,7]] 支持列表取值
my_dataframe['列名'] --- Series对象
my_dataframe['列名'].name
my_dataframe['列名'].dtype
my_dataframe[['列名1','列名2',...]] --- DataFrame对象
my_dataframe.columns.tolist() --- 列名以列表形式返回
my_dataframe['新列名'] = Series对象 --- 为DataFrame添加新列
my_dataframe['列名'].max() --- 求某一列的最大值
my_dataframe.sort_values('列名', inplace=True, ascending=True) #就地升序排列
my_dataframe.sort_values('列名', inplace=True, ascending=False) #就地降序排列
二.代码示例(运行环境,python2.7)
import pandas
food_info = pandas.read_csv('food_info.csv')
# =============================================================================
# col_names = food_info.columns.tolist()
#
# gram_columns = []
#
# for c in col_names:
# if c.endswith("(g)"):
# gram_columns.append(c)
#
# gram_df = food_info[gram_columns]
# print(gram_df.head(3))
# =============================================================================
# =============================================================================
# weighted_protein = food_info["Protein_(g)"] * 2
# weighted_fat = -0.75 * food_info["Lipid_Tot_(g)"]
# initial_rating = weighted_protein + weighted_fat
# food_info['initial_rating'] = initial_rating
# print(food_info.loc[0:2])
# =============================================================================
# =============================================================================
# max_calories = food_info["Energ_Kcal"].max()
#
# # 求最大值
# normalized_protein = food_info["Protein_(g)"] / food_info["Protein_(g)"].max()
# normalized_fat = food_info["Lipid_Tot_(g)"] / food_info["Lipid_Tot_(g)"].max()
#
# # 添加新列
# food_info["Normalized_Protein"] = normalized_protein
# food_info["Normalized_Fat"] = normalized_fat
#
# print(food_info.head(3))
# =============================================================================
# =============================================================================
# # inplace=True对DataFrame就地修改,而不是返回一个新的DataFrame
# # NAN:缺失值排在最后
# food_info.sort_values("Sodium_(mg)", inplace=True)
# print(food_info["Sodium_(mg)"])
#
# # 降序排列
# food_info.sort_values("Sodium_(mg)", inplace=True, ascending=False)
# print(food_info["Sodium_(mg)"])
# =============================================================================
csv文件网盘下载链接:https://pan.baidu.com/s/1jAOOXCobDSeBZc3h3qGybQ