(一)Python环境下安装pybrain(开源神经网络工具包)
pybrain模块(URL):https://github.com/pybrain/pybrain
pybrain官方文档:http://pybrain.org/docs/
git下载pybrain:git clone https://github.com/pybrain/pybrain
Tip:1.在安装pybrain前,要确保已安装模块scipy+numpy+nose
2.使用pip install pybrain也可以下载pybrain模块,但是在import pybrain时会报错,如下:
('ModuleNotFoundError: No module named 'structure')
3.模块下载完毕后得到文件夹(pybrain),在该文件中有一个重名的文件夹pybrain,需要把后者复制到site-packages目录下
(二)pybrain搭建BPNN(分类)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn import preprocessing
from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure import *
from pybrain.datasets import ClassificationDataSet
from pybrain.supervised.trainers import BackpropTrainer
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')
#构建数据集
data = datasets.load_iris()
X,y = data.data,data.target
x = preprocessing.StandardScaler().fit_transform(X)
dataset = ClassificationDataSet(x.shape[1],1,nb_classes=3)
for i in range(len(y)):
dataset.addSample(list(x[i]),list([y[i]]))
#划分数据集
dataset._convertToOneOfMany()
dataTrain,dataTest = dataset.splitWithProportion(proportion=0.8)
x_train,y_train = dataTrain['input'],dataTrain['target']
x_test,y_test = dataTest['input'],dataTest['target']
print('Input dim:{} Output dim:{}'.format(dataTrain.indim,dataTrain.outdim))
print('Train: x = {} y = {}'.format(x_train.shape,y_train.shape))
print('Test: x = {} y = {}'.format(x_test.shape,y_test.shape))
#训练网络
net = buildNetwork(dataTrain.indim,5,dataTrain.outdim,outclass=SoftmaxLayer)
model = BackpropTrainer(net,dataTrain,learningrate=0.01,momentum=0.1,verbose=False)
model.trainUntilConvergence(maxEpochs=100)
predict_train = np.argmax(net.activateOnDataset(dataTrain),axis=1)
actual_train = np.argmax(y_train,axis=1)
train_acc = accuracy_score(actual_train,predict_train)
predict_test = np.argmax(net.activateOnDataset(dataTest),axis=1)
actual_test = np.argmax(y_test,axis=1)
test_acc = accuracy_score(actual_test,predict_test)
print('Train acc = ',round(train_acc,2),' Test acc = ',round(test_acc,2))
(三)pybrain搭建BPNN(回归)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets,preprocessing
from pybrain.structure import *
from pybrain.datasets.supervised import SupervisedDataSet
from pybrain.supervised.trainers import BackpropTrainer
from sklearn.metrics import accuracy_score
'''
from pybrain.structure import *
-设置网络各层结构(网络类型+节点数+激活函数+权值/阈值等)
from pybrain.datasets.supervised import SupervisedDataSet
-将带标签数据转为pybrain模块可用的数据结构
from pybrain.supervised.trainers import BackpropTrainer
-训练BPNN
'''
import warnings
warnings.filterwarnings('ignore')
#设置网络结构
net = FeedForwardNetwork()
inLayer = LinearLayer(30,name='input')
hiddenLayer = SigmoidLayer(36,name='hidden')
outLayer = LinearLayer(1,name='output')
net.addInputModule(inLayer)
net.addModule(hiddenLayer)
net.addOutputModule(outLayer)
in_hidden = FullConnection(inLayer,hiddenLayer)
hidden_out = FullConnection(hiddenLayer,outLayer)
net.addConnection(in_hidden)
net.addConnection(hidden_out)
net.sortModules()
print('网络结构信息如下:\n',net)
print('input-hidden weights:',len(in_hidden.params))
print('hidden-output weights:',len(hidden_out.params))
'''
net = FeedForwardNetwork():网络初始化
inLayer = LinearLayer(13,name='input'):设置输入层(节点数+层名称+激活函数)
hiddenLayer = SigmoidLayer(20,name='hidden'):设置隐含层(节点数+层名称+激活函数)
outLayer = LinearLayer(1,name='output'):设置输出层(节点数+层名称+激活函数)
(激活函数:TanhLayer+SigmoidLayer+SoftmaxLayer)
net.addInputModule(inLayer):将输入层添加到网络
net.addModule(hiddenLayer):将隐含层添加到网络
net.addOutputModule(outLayer):将输出层添加到网络
in_hidden = FullConnection(inLayer,hiddenLayer):设置权值连接(输入层-隐含层)
hidden_out = FullConnection(hiddenLayer,outLayer):设置权值连接(隐含层-输出层)
net.addConnection(in_hidden):将input-hidden权值添加到网络
net.addConnection(hidden_out):将hidden-output权值添加到网络
net.sortModules():启动网络
'''
#构建数据集
data = datasets.load_breast_cancer()
X,y = data.data,data.target
x = preprocessing.StandardScaler().fit_transform(X)
dataset = SupervisedDataSet(x.shape[1],1)
for i in range(len(y)):
dataset.addSample(list(x[i]),list([y[i]]))
print('Input:',dataset['input'].shape)
print('Target:',dataset['target'].shape)
'''
dataset = SupervisedDataSet(输入向量维度,输出向量维度):创建pybrain环境下的数据集
dataset.addSample(list(x[i]),list([y[i]])):向数据集中添加数据(记录)
'''
#划分数据集
dataTrain,dataTest = dataset.splitWithProportion(proportion=0.8)
x_train,y_train = dataTrain['input'],dataTrain['target']
x_test,y_test = dataTest['input'],dataTest['target']
print('Train: x = {} y = {}'.format(x_train.shape,y_train.shape))
print('Test: x = {} y = {}'.format(x_test.shape,y_test.shape))
#训练网络
model = BackpropTrainer(net,dataTrain,learningrate=0.01,lrdecay=1.0,verbose=False)
model.trainUntilConvergence(maxEpochs=100)
predict = []
for arr in x_train:
result = int(np.ceil(net.activate(arr)[0]))
predict.append(result)
predict = np.array(predict)
print('Train acc = ',str(round(accuracy_score(y_train,predict)*100,2))+'%')
predict = []
for arr in x_test:
result = int(np.ceil(net.activate(arr)[0]))
predict.append(result)
predict = np.array(predict)
print('Test acc = ',str(round(accuracy_score(y_test,predict)*100,2))+'%')