使用训练好的caffe模型分类图片(python版)

英文官方文档：http://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/00-classification.ipynb

导入python caffe包

import numpy as np
import matplotlib.pyplot as plt
# display plots in this notebook
%matplotlib inline

# set display defaults
plt.rcParams['figure.figsize'] = (10, 10)        # large images
plt.rcParams['image.interpolation'] = 'nearest'  # don't interpolate: show square pixels
plt.rcParams['image.cmap'] = 'gray'  # use grayscale output rather than a (potentially misleading) color heatmap


import sys
import os
caffe_root = './'  #指定caffe的根目录 
sys.path.insert(0, caffe_root + 'python')    #将caffe python接口文件路径添加到python path中
import caffe

# 判断model文件是否存在
if os.path.isfile(caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'):
    print 'CaffeNet found.'
else:
    print 'Downloading pre-trained CaffeNet model...'

加载网络，创建输入处理

使用python caffe.io.loadImage接口读取图片，返回的是[0-1]返回的np.float32数组

def load_image(filename, color=True):
    """
    Load an image converting from grayscale or alpha as needed.

    Parameters
    ----------
    filename : string
    color : boolean
        flag for color format. True (default) loads as RGB while False
        loads as intensity (if image is already grayscale).

    Returns
    -------
    image : an image with type np.float32 in range [0, 1]
        of size (H x W x 3) in RGB or
        of size (H x W x 1) in grayscale.
    """
    img = skimage.img_as_float(skimage.io.imread(filename, as_grey=not color)).astype(np.float32)
    if img.ndim == 2:
        img = img[:, :, np.newaxis]
        if color:
            img = np.tile(img, (1, 1, 3))
    elif img.shape[2] == 4:
        img = img[:, :, :3]
    return img

python Transformer接口会对load_image读取的图片做处理，注意raw_scale实在减去均值和其他处理之前，而input_scale实在这些操作之后

    def preprocess(self, in_, data):
        """
        Format input for Caffe:
        - convert to single
        - resize to input dimensions (preserving number of channels)
        - transpose dimensions to K x H x W
        - reorder channels (for instance color to BGR)
        - scale raw input (e.g. from [0, 1] to [0, 255] for ImageNet models)
        - subtract mean
        - scale feature

        Parameters
        ----------
        in_ : name of input blob to preprocess for
        data : (H' x W' x K) ndarray

        Returns
        -------
        caffe_in : (K x H x W) ndarray for input to a Net
        """
        self.__check_input(in_)
        caffe_in = data.astype(np.float32, copy=False)
        transpose = self.transpose.get(in_)
        channel_swap = self.channel_swap.get(in_)
        raw_scale = self.raw_scale.get(in_)
        mean = self.mean.get(in_)
        input_scale = self.input_scale.get(in_)
        in_dims = self.inputs[in_][2:]

        #1 resize大小
        if caffe_in.shape[:2] != in_dims:   
            caffe_in = resize_image(caffe_in, in_dims)
        
        #2 维度变换，H*W*C转换成  C*H*W
        if transpose is not None:     
            caffe_in = caffe_in.transpose(transpose)
        
        #3 通道变换
        if channel_swap is not None: #RGB 
            caffe_in = caffe_in[channel_swap, :, :]
       
        #4 raw_scale 读取的图片数值范围在[0,1]时，raw_scale = 255,转换成[0,255]
        if raw_scale is not None:
            caffe_in *= raw_scale
        
        #5 减去均值
        if mean is not None:   
            caffe_in -= mean

        # input_scale = 0.00390625时， 图片数据转换成[0,1] 
        if input_scale is not None:
            caffe_in *= input_scale
        return caffe_in

# 使用cpu计算
caffe.set_mode_cpu()

model_def = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
model_weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'

# 加载网络
net = caffe.Net(model_def,      # 模型定义文件
                model_weights,  # 模型参数文件
                caffe.TEST)     # 启用测试模式 (e.g., don't perform dropout)

# 加载均值文件,mu的shape是(3,256,256), mean(1)实在第一个维度上做均值，返回shape为(3,256)
# 再mean(1)后，返回形状是(3),分别是rgb三个通道上均值
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1) 
print 'mean-subtracted values:', zip('BGR', mu)
#mean-subtracted values: [('B', 104.0069879317889), ('G', 116.66876761696767), ('R', 122.6789143406786)]

# create transformer for the input called 'data'
# 创建一个转换器，名字叫‘data’
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})

# transformer会将channels变成最外面的维度， 即 (H，W，C) 变成(C, W, C)
transformer.set_transpose('data', (2,0,1))  
transformer.set_mean('data', mu)            # 每个通道上减去均值
transformer.set_raw_scale('data', 255)      # 从[0, 1]的范围放大到[0, 255]
transformer.set_channel_swap('data', (2,1,0))  #修改通道顺序，从RGB变成BGR

使用CPU分类

# 为了演示批处理，将输入的batch size修改成50
net.blobs['data'].reshape(50,        # batch size
                          3,         # 3通道
                          227, 227)  # 图片大小为 227x227

# caffe.io.load_image读取图片值的范围是0-1，cv2.imread读取图片值的范围是0-255
image = caffe.io.load_image(caffe_root + 'examples/images/cat.jpg')
# transformer进行图片预处理，包括图片值转换到0-255
transformed_image = transformer.preprocess('data', image)
plt.imshow(image)



# 图片数据拷贝到net申请内存中
net.blobs['data'].data[...] = transformed_image

### 前向传播，执行图片分类。
output = net.forward()
# top blob可能有多个，使用'prob'索引，后面的0表示第一张图片的输出
output_prob = output['prob'][0]  
# 获取分类编号
print 'predicted class is:', output_prob.argmax()
# 输出predicted class is: 281

验证分裂是否正确是否正确

# 加载imageNet的label文件
labels_file = caffe_root + 'data/ilsvrc12/synset_words.txt'
if not os.path.exists(labels_file):
    !../data/ilsvrc12/get_ilsvrc_aux.sh
    
labels = np.loadtxt(labels_file, str, delimiter='\t')

print 'output label:', labels[output_prob.argmax()]
# 输出内容   output label: n02123045 tabby, tabby cat


# sort默认升序排列，反转后全最大前五个
top_inds = output_prob.argsort()[::-1][:5]  # reverse sort and take five largest items

print 'probabilities and labels:'
zip(output_prob[top_inds], labels[top_inds])

'''[(0.31243637, 'n02123045 tabby, tabby cat'),
 (0.2379719, 'n02123159 tiger cat'),
 (0.12387239, 'n02124075 Egyptian cat'),
 (0.10075711, 'n02119022 red fox, Vulpes vulpes'),
 (0.070957087, 'n02127052 lynx, catamount')]  
'''

使用GPU模式

# CPU计算耗时
%timeit net.forward()
# 1 loop, best of 3: 1.42 s per loop


# 设置使用gpu，有多个gpu时使用编号的gpu
caffe.set_device(0)  # if we have multiple GPUs, pick the first one
caffe.set_mode_gpu()
net.forward()  # run once before timing to set up memory
%timeit net.forward()
# 10 loops, best of 3: 70.2 ms per loop

使用训练好的caffe模型分类图片(python版)

猜你喜欢