Supervised learning experiments

Linear Regression

# 一元线性回归的实现
import matplotlib.pyplot as plt #导入matplotlib库,主要用于可视化
from matplotlib.font_manager import FontProperties
import numpy as np
# 给出用于训练的数据集
x_train = [4,8,5,10,12]
y_train = [30,50,35,70,60]
# 画出用于训练数据集的散点图,定义一个draw函数,传入训练数据集
def draw(x_train,y_train):
    plt.scatter(x_train, y_train)

def fit(x_train,y_train): #定义函数求得斜率w和截距b
    size = len(x_train)
    fenzi = 0 #初始化分子
    fenmu= 0#初始化分母
    for i in range(size):
        fenzi += (x_train[i]-np.mean(x_train))*(y_train[i]-np.mean(y_train))
        fenmu +=(x_train[i]-np.mean(x_train))**2
    w = fenzi/fenmu
    b = np.mean(y_train)-w*np.mean(x_train)
    return w,b
def predict(x,w,b):  
    #预测模型
    y = w*x+b
    return y
def fit_line(w,b):
#测试集进行测试,并作图
#创建等差数列
x = np.linspace(4,15,9)
#numpy.limspace(start,stop,num,endpoint=True,retstep=False,dtype=None,axis=0#) 
    y = w*x+b
    plt.plot(x,y)
    plt.show()
if __name__ =="__main__":
    draw(x_train,y_train)
    w,b = fit(x_train,y_train)
    print(w,b) #输出斜率和截距
    fit_line(w,b) #绘制预测函数图像
print(predict(150,w,b)) #输出当x=15000时的预测结果


Here Insert Picture Description

Linear regression call

#导入sklearn下的LinearRegression 方法
from sklearn.linear_model import LinearRegression
import numpy as np
model = LinearRegression()
x_train = np.array([[2,4],[5,8],[5,9],[7,10],[9,12]])
y_train = np.array([20,50,30,70,60])
#训练模型
model.fit(x_train,y_train)
#fit(x,y,sample_weight=None)x:训练集 y:目标值 sample_weight:每个样本的个数
#coef_ 系数w,intercept_截距
print(model.coef_) #输出系数w
print(model.intercept_) #输出截距b
print(model.score(x_train,y_train)) #输出训练结果

Here Insert Picture Description

Logistic regression

  • Reading the data set
  • Substituting data set
  • Calls logistic regression method by sklearn
import numpy as np
#从sklearn导入LogisticRegression方法
from sklearn.linear_model import LogisticRegression
#导入划分训练集和测试集的方法
from sklearn.model_selection import train_test_split
#读取数据
dataSet=[[111,22245,45],[222,444,677],
          [33,4,44],[333,55,789],
          [334,557,889],[346,679,2435]]
data =np.array(dataSet)
#划分训练集和测试集,测试集占数据集的30%,训练集占数据集的70%
train_x,test_x,train_y,test_y=train_test_split(data[:,0:2],data[:,2],test_size=0.3)
#train_test_split(x,y,test_size=0.3)参数test_size测试集占比;x:数据集; y:数据集的目标#值
model = LogisticRegression()
#通过训练集得到训练后的模型
model.fit(train_x,train_y)
#测试模型
pred_y=model.predict(test_x)
# 输出判断预测是否与真实值相等
print(pred_y==test_y)
print(model.score(test_x,test_y))

may appear

ImportError: No module named ‘sklearn.model_selection’

Solution:
Run as administrator Anaconda prompt
Here Insert Picture Description
Here Insert Picture Description
may be slower

Decision Tree

from sklearn import tree
X = [[1, 1], [2, 2]]
#分类标签
Y = [0, 1]
#拟合模型,默认采用GINI值来分类
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X, Y)
#拟合后,模型用来预测分类
print(clf.predict([[3., 3.]]))

#每个分类的概率可以被预测,即某个叶子中,该分类样本的占比
a=clf.predict_proba([[3., 3.]])       #计算属于每个类的概率
print(a)

operation result
Here Insert Picture Description

Published 20 original articles · won praise 23 · views 975

Guess you like

Origin blog.csdn.net/surijing/article/details/104955371