dataframe操作

1.去除重复项drop_duplication

#去除重复项drop_duplication

import pandas as pd
df = pd.DataFrame({"col1":[1, 1, 2, 2], "col2" : ["a", "a", "b", "b"]})
print(df)
#指定某个列,保留第一出现的元素
df.drop_duplicates("col1", "first", inplace=True)
print(df)

df = pd.DataFrame({"col1":[1, 1, 2, 2], "col2" : ["a", "a", "b", "b"]})
print(df)
#对某一列进行去除
df = df["col1"].drop_duplicates()
print(df)
   col1 col2
0     1    a
1     1    a
2     2    b
3     2    b
col1 col2 0 1 a 2 2 b
col1 col2 0 1 a 1 1 a 2 2 b 3 2 b
0 1 2 2 Name: col1, dtype: int64

2.设置索引
#设置索引
import pandas as pd
df = pd.DataFrame({"col1":[1, 1, 2, 2], "col2" : ["a", "a", "b", "b"]})
df.set_index("col1", inplace=True)
print(df)
df.index.name = "索引"
print(df)

df = pd.DataFrame({"col1":[1, 1, 2, 2], "col2" : ["a", "a", "b", "b"]})
index = df.pop("col1")
df.index = index
df.index.name = "索引"
print(df)
     col2
col1     
1       a
1       a
2       b
2       b
col2 索引 1 a 1 a 2 b 2 b
col2 索引 1 a 1 a 2 b 2 b

猜你喜欢

转载自www.cnblogs.com/Stephen-Qin/p/10230450.html