# 读取csv文件
import numpy as np
import pandas as pd
data_file_path = f'__path__'
#如果给的数据使用空格分隔,修改sep的内容
df = pd.read_csv(data_file_path,sep = ',')
## 如果你的数据需要额外的处理,请另外添加代码段
## 默认最后一列为标签
np_data = df.iloc[:,:-1].to_numpy()
np_label= df.iloc[:,-1].to_numpy()
# 训练集测试集分隔
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = tts(np_data,np_label)
## 线性回归模型
from sklearn.linear_model import LinearRegression
lr_model = LinearRegression()
lr_model.fit(x_train,y_train)
## 参数与bias
## lr_model.coef_,lr_model.intercept_
## KNN回归
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import MinMaxScaler
#需要使用最大最小归一化处理数据
knn_scaler = MinMaxScaler()
knn_x_train = knn_scaler.fit_transform(x_train)
knn_x_test = knn_scaler.transform(x_test)
KNR_model = KNeighborsRegressor()
KNR_model.fit(knn_x_train,y_train)
## 调用score函数时请使用 knn_x_t* 作为输入对象
## eg: KNR_model.score(knn_x_test,y_test)
## 多项式回归
from sklearn.preprocessing import PolynomialFeatures
# degree值可修改,过大容易过拟合
poly = PolynomialFeatures(degree=2)
x_poly_train = poly.fit_transform(x_train)
#注意数据都需要通过poly处理
x_poly_test = poly.transform(x_test)
poly_model = LinearRegression()
poly_model.fit(x_poly_train,y_train)
## 决策树回归
from sklearn.tree import DecisionTreeRegressor
dt = DecisionTreeRegressor()
dt.fit(x_train,y_train)
## GBDT回归(梯度提升树回归)
from sklearn.ensemble import GradientBoostingRegressor as GBDT
gb = GBDT()
gb.fit(x_train,y_train)
## XGBOOST,如果无法调用,使用
# !pip install xgboost -i https://pypi.tuna.tsinghua.edu.cn/simple
## 以安装xgboost包
import xgboost as xgb
xgb_model = xgb.XGBRegressor()
xgb_model.fit(x_train,y_train)
### 分类模型
## 朴素贝叶斯(伯努利分布/二项分布)
from sklearn.naive_bayes import BernoulliNB
nb_model = BernoulliNB()
nb_model.fit(x_train,y_train)
## 支持向量机
from sklearn.svm import SVC
clf = SVC()
clf.fit(x_train,y_train)
## KNN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler
knn_scaler = MinMaxScaler()
knn_x_train = knn_scaler.fit_transform(x_train)
knn_x_test = knn_scaler.transform(x_test)
knn_c = KNeighborsClassifier()
knn_c.fit(knn_x_train,y_train)
## 决策树分类
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(x_train,y_train)
## xgboost
from xgboost import XGBClassifier
xgbc = XGBClassifier()
xgbc.fit(x_train,y_train)
很多时候找代码只是想找一下模型调用代码就够用了
原理很多都学过但是在哪个包里又得翻翻翻很麻烦
去查资料都讲一大堆原理(实际上我们需要的不是原理,即使是搜索框里搜索 GBDT+代码)
还是要翻一大堆东西才能找到代码,因此做个简单整理,应付作业够用了。