一、复习 关于 布尔序列 在数据清洗中的应用P150
import numpy as np
import pandas as pd
detail=pd.read_csv(r"G:\大数据实验数据库\3.大数据实验数据\detail.csv",encoding="GB18030")
ser1=detail["counts"]
x1=ser1.mean()-3*ser1.std()>ser1
x2=ser1.mean()+3*ser1.std()<ser1
boolind=x1|x2
boolind
二、P188 任务6.3 svm模型的使用
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
ablone=pd.read_csv(r"G:\大数据实验数据库\4.Python数据分析与应用\第6章\任务程序\data\abalone.data")
data=ablone.iloc[:,:8]
target=ablone.iloc[:,8]
sex=pd.get_dummies(ablone["sex"])
data=pd.concat([data,sex],axis=1)
data.drop("sex",axis=1,inplace=True)
x1,x2,y1,y2=\
train_test_split(data,target,train_size=0.8,random_state=42)
ssd=StandardScaler().fit(x1)
x1_s=ssd.transform(x1)
x2_s=ssd.transform(x2)
mysvm=SVC().fit(x1_s,y1)
pred=mysvm.predict(x2_s)
print(classification_report(y2,pred))
三、处理数据航空数据
airline_data=pd.read_csv(r"G:\大数据实验数据库\4.Python数据分析与应用\第7章\任务程序\data\air_data.csv",encoding="GB18030")
exp1=airline_data["SUM_YR_1"].notnull()
exp2=airline_data["SUM_YR_1"].notnull()
exp=exp1 & exp2
airnotnull=airline_data.loc[exp,:]
airnotnull.shape
index1=airnotnull["SUM_YR_1"]!=0
index2=airnotnull["SUM_YR_2"]!=0
index3=airnotnull["SEG_KM_SUM"]>0& (airnotnull["avg_discount"]!=0)
airline=airnotnull[(index1|index2)&index3]
airline.shape