Caffe: (二) Classification (CPU/GPU设置, feature map可视化,层名,参数输出, Top-1、Top-5输出)

#set up python environment: numpy for numerical routines, matplotlib for plotting
import numpy as np
import matplotlib.pyplot as plt
#display plots in this notebook
%matplotlib inline
plt.rcParams['figure.figsize']=(10,10) # large images
plt.rcParams['image.interpolation']='nearest' # don't interpolate: show square pixels
plt.rcParams['image.cmap']='gray' #use grayscale output rather than a (potentially misleading) color hetmap
# the caffe modules needs to be on the Python path:
import sys
# print(sys.path)
import caffe
import os
caffe_root ="path to caffe"
import os
if os.path.exists(caffe_root+'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'):
    print('CaffeNet found.')
else:
    print('Downloading pre-trained caffenet model...')
    !path_to_caffe/scripts/download_model_binary.py path_to_caffe/models/bvlc_reference_caffenet/
CaffeNet found.
import sys
# print(sys.path)
caffe.set_mode_cpu()
model_def = caffe_root+'models/bvlc_reference_caffenet/deploy.prototxt'
model_weights = caffe_root+'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'

#define the structure of the model, trained weights, use test mdoe
net = caffe.Net(model_def,
                model_weights, 
                caffe.TEST)
# load the mean ImageNet image (as distributed with Caffe) for subtraction
mu = np.load(caffe_root+'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1) #average over pixels to obtain the mean (BGR) pixel values
print("mean-subtracted values: ", list(zip('BGR',mu)))
mean-subtracted values:  [('B', 104.0069879317889), ('G', 116.66876761696767), ('R', 122.6789143406786)]
# create transformer for the input called 'data'
transformer = caffe.io.Transformer({'data':net.blobs['data'].data.shape})
transformer.set_transpose('data', (2, 0, 1)) #move image channels to outermost dimension
transformer.set_mean('data', mu) #subtract the dataset-mean value in each channel
transformer.set_raw_scale('data', 255) #rescale from [0,1] to [0,255]
transformer.set_channel_swap('data', (2, 1, 0))
# set the size of the input 
net.blobs['data'].reshape(50, 3, 227, 227)
image = caffe.io.load_image(caffe_root+"examples/images/cat.jpg")
transformed_image = transformer.preprocess('data', image)
plt.imshow(image)

<matplotlib.image.AxesImage at 0x7fe5fae03978>

在这里插入图片描述

# copy the image data into the memory allocated for the net
net.blobs['data'].data[...]=transformed_image
# perform classification
output = net.forward()
output_prob = output['prob'][0] # the output probability vector for the first image in the batch
print("predicted class is: ", output_prob.argmax())
predicted class is:  281
# load ImageNet labels
labels_file = caffe_root+'data/ilsvrc12/synset_words.txt'
if not os.path.exists(labels_file):
    !path_to_caffe/data/ilsvrc12/get_ilsvrc_aux.sh
    
labels = np.loadtxt(labels_file, str, delimiter='\t')
print("output label: ", labels[output_prob.argmax()])
Downloading...
--2020-03-04 13:47:08--  http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz
Resolving dl.caffe.berkeleyvision.org (dl.caffe.berkeleyvision.org)... 128.32.162.150
Connecting to dl.caffe.berkeleyvision.org (dl.caffe.berkeleyvision.org)|128.32.162.150|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 17858008 (17M) [application/octet-stream]
Saving to: ‘caffe_ilsvrc12.tar.gz’

caffe_ilsvrc12.tar. 100%[===================>]  17.03M   111KB/s    in 4m 15s  

2020-03-04 13:51:25 (68.3 KB/s) - ‘caffe_ilsvrc12.tar.gz’ saved [17858008/17858008]

Unzipping...
Done.
output label:  n02123045 tabby, tabby cat
# sort top five predictions from softmax output
top_inds = output_prob.argsort()[::-1][:5]

print('probabilities and labels: ')
print(list(zip(output_prob[top_inds], labels[top_inds])))
probabilities and labels: 
[(0.32925108, 'n02123045 tabby, tabby cat'), (0.23463891, 'n02123159 tiger cat'), (0.12834354, 'n02124075 Egyptian cat'), (0.10341559, 'n02119022 red fox, Vulpes vulpes'), (0.06193552, 'n02127052 lynx, catamount')]
%timeit net.forward()
768 ms ± 14.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
caffe.set_device(0) # if we have multiple GPUs, pick the first one
caffe.set_mode_gpu()
net.forward() # run once before timing to set up memory
%timeit net.forward()
42.6 ms ± 63.2 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
# for each layer, show the outpyt shape
for layer_name, blob in net.blobs.items():
    print(layer_name+'\t'+str(blob.data.shape))
data	(50, 3, 227, 227)
conv1	(50, 96, 55, 55)
pool1	(50, 96, 27, 27)
norm1	(50, 96, 27, 27)
conv2	(50, 256, 27, 27)
pool2	(50, 256, 13, 13)
norm2	(50, 256, 13, 13)
conv3	(50, 384, 13, 13)
conv4	(50, 384, 13, 13)
conv5	(50, 256, 13, 13)
pool5	(50, 256, 6, 6)
fc6	(50, 4096)
fc7	(50, 4096)
fc8	(50, 1000)
prob	(50, 1000)
for layer_name, param in net.params.items():
    print (layer_name+'\t'+str(param[0].data.shape),str(param[1].data.shape))
conv1	(96, 3, 11, 11) (96,)
conv2	(256, 48, 5, 5) (256,)
conv3	(384, 256, 3, 3) (384,)
conv4	(384, 192, 3, 3) (384,)
conv5	(256, 192, 3, 3) (256,)
fc6	(4096, 9216) (4096,)
fc7	(4096, 4096) (4096,)
fc8	(1000, 4096) (1000,)
def vis_square(data):
    # Take an array of shape (n, height, width) or (n, height, width, 3) 
    # and visualize each (height, width) thing in a grid of size approx.
    # sqrt(n) by sqrt(n)
    print(data.shape[0], data.shape[1], data.shape[2])
    # normalize data for display
    data = (data - data.min())/(data.max()-data.min())

    # force the number of filters to be square
    n = int(np.ceil(np.sqrt(data.shape[0])))
    # add some space between filters
    # don't pad the last dimension (if there is one)
    padding = (((0, n**2 - data.shape[0]), (0, 1), (0, 1))
              +((0, 0), )*(data.ndim-3)) 
    print(padding)
    # pad with ones (white)
    data = np.pad(data, padding, mode='constant', constant_values=1) 
    
    # tile the filters into an image
    data = data.reshape((n, n)+data.shape[1:]).transpose((0,2,1,3)+tuple(range(4, data.ndim+1)))
    data = data.reshape((n*data.shape[1],n*data.shape[3])+data.shape[4:])
    plt.imshow(data)
    plt.axis('off')
# the parameters are a list of [weights, bias]
filters = net.params['conv1'][0].data
vis_square(filters.transpose(0,2,3,1))
96 11 11
((0, 4), (0, 1), (0, 1), (0, 0))

在这里插入图片描述

feat = net.blobs['conv1'].data[0,:36]
vis_square(feat)
36 55 55
((0, 0), (0, 1), (0, 1))

在这里插入图片描述

feat = net.blobs['pool5'].data[0]
vis_square(feat)
256 6 6
((0, 0), (0, 1), (0, 1))

在这里插入图片描述

feat = net.blobs['fc6'].data[0]
plt.subplot(2, 1, 1)
plt.plot(feat.flat)
plt.subplot(2, 1, 2)
_ = plt.hist(feat.flat[feat.flat>0], bins=100)

在这里插入图片描述

理解总结部分:

设置CPU、GPU的方式:

cpu模式

caffe.set_mode_cpu()

gpu模式

caffe.set_device(0) # if we have multiple GPUs, pick the first one
caffe.set_mode_gpu()

分类Top-1, Top-5设置

Top-1

output_prob = output['prob'][0] # the output probability vector for the first image in the batch

Top-5

# sort top five predictions from softmax output
top_inds = output_prob.argsort()[::-1][:5]

输出所有层的shape和对应参数的shape

输出对应层的名字和shape,feature map 的方式 (N, C, H, W)

# for each layer, show the outpyt shape
for layer_name, blob in net.blobs.items():
    print(layer_name+'\t'+str(blob.data.shape))

输出对应层参数的权重(weights和biases),weights的存储方式:(output_channels, input_channels, filter_height, filter_width)

for layer_name, param in net.params.items():
    print (layer_name+'\t'+str(param[0].data.shape),str(param[1].data.shape))

可视化卷积核

def vis_square(data):
    # Take an array of shape (n, height, width) or (n, height, width, 3) 
    # and visualize each (height, width) thing in a grid of size approx.
    # sqrt(n) by sqrt(n)
    print(data.shape[0], data.shape[1], data.shape[2])
    # normalize data for display
    data = (data - data.min())/(data.max()-data.min())

    # force the number of filters to be square
    n = int(np.ceil(np.sqrt(data.shape[0])))
    # add some space between filters
    # don't pad the last dimension (if there is one)
    padding = (((0, n**2 - data.shape[0]), (0, 1), (0, 1))
              +((0, 0), )*(data.ndim-3)) 
    print(padding)
    # pad with ones (white)
    data = np.pad(data, padding, mode='constant', constant_values=1) 
    
    # tile the filters into an image
    data = data.reshape((n, n)+data.shape[1:]).transpose((0,2,1,3)+tuple(range(4, data.ndim+1)))
    data = data.reshape((n*data.shape[1],n*data.shape[3])+data.shape[4:])
    plt.imshow(data)
    plt.axis('off')
发布了349 篇原创文章 · 获赞 237 · 访问量 65万+

猜你喜欢

转载自blog.csdn.net/Felaim/article/details/104657187