1.去除重复项drop_duplication
#去除重复项drop_duplication import pandas as pd df = pd.DataFrame({"col1":[1, 1, 2, 2], "col2" : ["a", "a", "b", "b"]}) print(df) #指定某个列,保留第一出现的元素 df.drop_duplicates("col1", "first", inplace=True) print(df) df = pd.DataFrame({"col1":[1, 1, 2, 2], "col2" : ["a", "a", "b", "b"]}) print(df) #对某一列进行去除 df = df["col1"].drop_duplicates() print(df)
col1 col2 0 1 a 1 1 a 2 2 b 3 2 b
col1 col2 0 1 a 2 2 b
col1 col2 0 1 a 1 1 a 2 2 b 3 2 b
0 1 2 2 Name: col1, dtype: int64
2.设置索引
#设置索引 import pandas as pd df = pd.DataFrame({"col1":[1, 1, 2, 2], "col2" : ["a", "a", "b", "b"]}) df.set_index("col1", inplace=True) print(df) df.index.name = "索引" print(df) df = pd.DataFrame({"col1":[1, 1, 2, 2], "col2" : ["a", "a", "b", "b"]}) index = df.pop("col1") df.index = index df.index.name = "索引" print(df)
col2 col1 1 a 1 a 2 b 2 b
col2 索引 1 a 1 a 2 b 2 b
col2 索引 1 a 1 a 2 b 2 b