引言
记录学习官网的例程中的一些重要语句,遇到的问题等,内容分散,建议顺序查看。
主要是调用Caffe的Python接口
源文件就在{caffe_root}/examples中(目录下面的中文标题也附有链接),安装sudo pip install jupyter
打开即可运行,初学者最好是放在它指定的目录,如,否则要改很多路径。
注:eaxmples是用jupyter notebook写的,部分Cell中出现了一些特殊的用法:
1. 感叹号‘!’:用于执行系统命令,如 !pwd
2. 百分号‘%’:用法太多,如 %matplotlib inline 显示绘图窗口 详见Jupyter Notebook Viewer
目录
构建逻辑回归
虽然Caffe是为深度网络而生但是也可以构建浅层模型,像用于分类的逻辑回归。
对合成数据进行逻辑回归,保存为HDF5作为Caffe的输入向量
合成一个数据集,其中含有10000个4维向量(两个信息特征,两个噪声特征)用于二分类
X, y = sklearn.datasets.make_classification( n_samples=10000, n_features=4, n_redundant=0, n_informative=2, n_clusters_per_class=2, hypercube=False, random_state=0 ) # Split into train and test X, Xt, y, yt = sklearn.model_selection.train_test_split(X, y) # Visualize sample of the data ind = np.random.permutation(X.shape[0])[:1000] df = pd.DataFrame(X[ind]) _ = pd.plotting.scatter_matrix(df, figsize=(9, 9), diagonal='kde', marker='o', s=40, alpha=.4, c=y[ind])
用通过随机梯度下降(SGD)训练学习和评估
sklearn
的逻辑回归并检查分类器的准确性。# Train and test the scikit-learn SGD logistic regression. clf = sklearn.linear_model.SGDClassifier( loss='log', n_iter=1000, penalty='l2', alpha=5e-4, class_weight='balanced') clf.fit(X, y) yt_pred = clf.predict(Xt) print('Accuracy: {:.3f}'.format(sklearn.metrics.accuracy_score(yt, yt_pred)))
保存数据集为HDF5文件, 用于Caffe输入。
# Write out the data to HDF5 files in a temp directory. # This file is assumed to be caffe_root/examples/hdf5_classification.ipynb dirname = os.path.abspath('./examples/hdf5_classification/data') if not os.path.exists(dirname): os.makedirs(dirname) train_filename = os.path.join(dirname, 'train.h5') test_filename = os.path.join(dirname, 'test.h5') # HDF5DataLayer source should be a file containing a list of HDF5 filenames. # To show this off, we'll list the same data file twice. with h5py.File(train_filename, 'w') as f: f['data'] = X f['label'] = y.astype(np.float32) with open(os.path.join(dirname, 'train.txt'), 'w') as f: f.write(train_filename + '\n') f.write(train_filename + '\n') # HDF5 is pretty efficient, but can be further compressed. comp_kwargs = {'compression': 'gzip', 'compression_opts': 1} with h5py.File(test_filename, 'w') as f: f.create_dataset('data', data=Xt, **comp_kwargs) f.create_dataset('label', data=yt.astype(np.float32), **comp_kwargs) with open(os.path.join(dirname, 'test.txt'), 'w') as f: f.write(test_filename + '\n')
通过Python接口在Caffe中定义逻辑回归模型
定义网络模型
from caffe import layers as L from caffe import params as P def logreg(hdf5, batch_size): # logistic regression: data, matrix multiplication, and 2-class softmax loss n = caffe.NetSpec() n.data, n.label = L.HDF5Data(batch_size=batch_size, source=hdf5, ntop=2) n.ip1 = L.InnerProduct(n.data, num_output=2, weight_filler=dict(type='xavier')) n.accuracy = L.Accuracy(n.ip1, n.label) n.loss = L.SoftmaxWithLoss(n.ip1, n.label) return n.to_proto() train_net_path = 'examples/hdf5_classification/logreg_auto_train.prototxt' with open(train_net_path, 'w') as f: f.write(str(logreg('examples/hdf5_classification/data/train.txt', 10))) test_net_path = 'examples/hdf5_classification/logreg_auto_test.prototxt' with open(test_net_path, 'w') as f: f.write(str(logreg('examples/hdf5_classification/data/test.txt', 10)))
定义网络的求解器
from caffe.proto import caffe_pb2 def solver(train_net_path, test_net_path): s = caffe_pb2.SolverParameter() # Specify locations of the train and test networks. s.train_net = train_net_path s.test_net.append(test_net_path) s.test_interval = 1000 # Test after every 1000 training iterations. s.test_iter.append(250) # Test 250 "batches" each time we test. s.max_iter = 10000 # # of times to update the net (training iterations) # Set the initial learning rate for stochastic gradient descent (SGD). s.base_lr = 0.01 # Set `lr_policy` to define how the learning rate changes during training. # Here, we 'step' the learning rate by multiplying it by a factor `gamma` # every `stepsize` iterations. s.lr_policy = 'step' s.gamma = 0.1 s.stepsize = 5000 # Set other optimization parameters. Setting a non-zero `momentum` takes a # weighted average of the current gradient and previous gradients to make # learning more stable. L2 weight decay regularizes learning, to help prevent # the model from overfitting. s.momentum = 0.9 s.weight_decay = 5e-4 # Display the current training loss and accuracy every 1000 iterations. s.display = 1000 # Snapshots are files used to store networks we've trained. Here, we'll # snapshot every 10K iterations -- just once at the end of training. # For larger networks that take longer to train, you may want to set # snapshot < max_iter to save the network and training state to disk during # optimization, preventing disaster in case of machine crashes, etc. s.snapshot = 10000 s.snapshot_prefix = 'examples/hdf5_classification/data/train' # We'll train on the CPU for fair benchmarking against scikit-learn. # Changing to GPU should result in much faster training! s.solver_mode = caffe_pb2.SolverParameter.CPU return s solver_path = 'examples/hdf5_classification/logreg_solver.prototxt' with open(solver_path, 'w') as f: f.write(str(solver(train_net_path, test_net_path)))
用Python接口学习和评估Caffe构建的逻辑回归模型
对上面定义的逻辑回归模型进行评估
caffe.set_mode_cpu() solver = caffe.get_solver(solver_path) solver.solve() accuracy = 0 batch_size = solver.test_nets[0].blobs['data'].num test_iters = int(len(Xt) / batch_size) for i in range(test_iters): solver.test_nets[0].forward() accuracy += solver.test_nets[0].blobs['accuracy'].data accuracy /= test_iters print("Accuracy: {:.3f}".format(accuracy))
- 也可以用命令行接口:
!./build/tools/caffe train -solver examples/hdf5_classification/logreg_solver.prototxt
改进逻辑回归模型
上面这个模型是一个简单的逻辑回归,可以通过在输入、输出两层之间添加一个非线性层改善性能。
from caffe import layers as L from caffe import params as P def nonlinear_net(hdf5, batch_size): # one small nonlinearity, one leap for model kind n = caffe.NetSpec() n.data, n.label = L.HDF5Data(batch_size=batch_size, source=hdf5, ntop=2) # define a hidden layer of dimension 40 n.ip1 = L.InnerProduct(n.data, num_output=40, weight_filler=dict(type='xavier')) # transform the output through the ReLU (rectified linear) non-linearity n.relu1 = L.ReLU(n.ip1, in_place=True) # score the (now non-linear) features n.ip2 = L.InnerProduct(n.ip1, num_output=2, weight_filler=dict(type='xavier')) # same accuracy and loss as before n.accuracy = L.Accuracy(n.ip2, n.label) n.loss = L.SoftmaxWithLoss(n.ip2, n.label) return n.to_proto() train_net_path = 'examples/hdf5_classification/nonlinear_auto_train.prototxt' with open(train_net_path, 'w') as f: f.write(str(nonlinear_net('examples/hdf5_classification/data/train.txt', 10))) test_net_path = 'examples/hdf5_classification/nonlinear_auto_test.prototxt' with open(test_net_path, 'w') as f: f.write(str(nonlinear_net('examples/hdf5_classification/data/test.txt', 10))) solver_path = 'examples/hdf5_classification/nonlinear_logreg_solver.prototxt' with open(solver_path, 'w') as f: f.write(str(solver(train_net_path, test_net_path)))
对改进的模型进行学习和评估,最终精度有提升。
caffe.set_mode_cpu() solver = caffe.get_solver(solver_path) solver.solve() accuracy = 0 batch_size = solver.test_nets[0].blobs['data'].num test_iters = int(len(Xt) / batch_size) for i in range(test_iters): solver.test_nets[0].forward() accuracy += solver.test_nets[0].blobs['accuracy'].data accuracy /= test_iters print("Accuracy: {:.3f}".format(accuracy))
- 同样也可以用命令行:
!./build/tools/caffe train -solver examples/hdf5_classification/nonlinear_logreg_solver.prototxt