python pandas模块的基本使用

import pandas as pd

df=pd.DataFrame({
    "name":["Tom","Jerry","Tony","June"],
    "age":[16,42,38,22],
    "province":["A","B","C","D"],
    "country":["Amerian","Japan","Ruisa","Roman"]
})
print(df)
#获取所有列名
print(df.columns)
#获取所有行名
print(df.index)
#获取所有值
print(df.values)
#查看顶部n行
print(df.head(2))
#查看尾部n行
print(df.tail(2))
#获取所有统计摘要
print(df.describe())
# count   4.000000
# mean   30.000000
# std    12.110601
# min    16.000000
# 25%    22.000000
# 50%    31.000000
# 75%    39.000000
# max    42.000000
#转置数据
print(df.T)
#排序,按index
df1=df.sort_index(ascending=False)
print(df1)
#排序，按value
df2=df.sort_values(by="age",ascending=False)
print(df2)
df3=df.sort_values(by="province",ascending=True)
print(df3)
#获取单列
print(df["country"])
#获取多行
print(df[0:3])
#获取多列
df4=df.loc[:,["country","name"]]
print(df4)
#获取单行
df5=df.loc[2]
print(df5)
#获取多行多列
print(df.loc[[0,2],["country","name"]])
df6=pd.DataFrame({
    "name":["PF","QC"],
    "age":[16,22],
    "province":["A","B"],
    "country":["Ruisa","Roman"]
})
print(df6)
#多数据合并concat
df7=pd.concat([df,df6],ignore_index=True)
print(df7)
#去重
res=df7.duplicated("country")
print(res)

res1=df7.drop_duplicates("country")
print(res1)
print("--------------------")
# df7.to_csv("lianxi.csv")
df7.to_excel("lianxi.xlsx",sheet_name="Sname")

"C:\Program Files\Python37\python.exe" C:/Users/Administrator/Desktop/note/exer/lianxi.py
    name  age province  country
0    Tom   16        A  Amerian
1  Jerry   42        B    Japan
2   Tony   38        C    Ruisa
3   June   22        D    Roman
Index(['name', 'age', 'province', 'country'], dtype='object')
RangeIndex(start=0, stop=4, step=1)
[['Tom' 16 'A' 'Amerian']
 ['Jerry' 42 'B' 'Japan']
 ['Tony' 38 'C' 'Ruisa']
 ['June' 22 'D' 'Roman']]
    name  age province  country
0    Tom   16        A  Amerian
1  Jerry   42        B    Japan
   name  age province country
2  Tony   38        C   Ruisa
3  June   22        D   Roman
             age
count   4.000000
mean   29.500000
std    12.476645
min    16.000000
25%    20.500000
50%    30.000000
75%    39.000000
max    42.000000
                0      1      2      3
name          Tom  Jerry   Tony   June
age            16     42     38     22
province        A      B      C      D
country   Amerian  Japan  Ruisa  Roman
    name  age province  country
3   June   22        D    Roman
2   Tony   38        C    Ruisa
1  Jerry   42        B    Japan
0    Tom   16        A  Amerian
    name  age province  country
1  Jerry   42        B    Japan
2   Tony   38        C    Ruisa
3   June   22        D    Roman
0    Tom   16        A  Amerian
    name  age province  country
0    Tom   16        A  Amerian
1  Jerry   42        B    Japan
2   Tony   38        C    Ruisa
3   June   22        D    Roman
0    Amerian
1      Japan
2      Ruisa
3      Roman
Name: country, dtype: object
    name  age province  country
0    Tom   16        A  Amerian
1  Jerry   42        B    Japan
2   Tony   38        C    Ruisa
   country   name
0  Amerian    Tom
1    Japan  Jerry
2    Ruisa   Tony
3    Roman   June
name         Tony
age            38
province        C
country     Ruisa
Name: 2, dtype: object
   country  name
0  Amerian   Tom
2    Ruisa  Tony
  name  age province country
0   PF   16        A   Ruisa
1   QC   22        B   Roman
    name  age province  country
0    Tom   16        A  Amerian
1  Jerry   42        B    Japan
2   Tony   38        C    Ruisa
3   June   22        D    Roman
4     PF   16        A    Ruisa
5     QC   22        B    Roman
0    False
1    False
2    False
3    False
4     True
5     True
dtype: bool
    name  age province  country
0    Tom   16        A  Amerian
1  Jerry   42        B    Japan
2   Tony   38        C    Ruisa
3   June   22        D    Roman
--------------------

Process finished with exit code 0

python pandas模块的基本使用

猜你喜欢