About YOLOv3 into a little problem in caffemodel

First of all, I refer https://blog.csdn.net/Chen_yingpeng/article/details/80692018 yolov3_darknet2caffe.py script provided to achieve a darknet-yolov3 transformed into caffemodel, has been prototxt and caffemodel.

However, when I caffe-yolov3 compiled Chen provided, since there is no sudo permissions on the server, can not install opencv3, it can not compile successfully, for which according to MobileNet-YOLOv3 in use before caffe, I use the https: // github .com / eric612 / MobileNet-YOLO provided ssd_detect.cpp / yolo_detect.cpp (before bloggers have already MobileNet-YOLO been fully compiled through the use cmake) test (of course, according to the needs of bloggers have been some changes related code)

Note that the use ssd_detect.cpp MobileNet-YOLO provided Chen provide necessary conversions over yolov3.prototxt certain adaptations, i.e. to add the appropriate yolo detection_out layer and final layer, the specific reference frame is given prototxt file modification.

Eventually normal test converted into one of caffemodel, after a demo test, the results are still normal, but our data in a dedicated time of focused test specific targets found on the recall traffic sign with respect to the darknet under a lot worse, after show pictures found caffemodel blue will ignore red traffic sign detection, increasing the training data after the situation is slightly better, but still serious.

For this reason, I am a convert python debug code that Chen offered, some of which weights the value of the conversion are as follows (the most important part of the overall code):

def darknet2caffe(cfgfile, weightfile, protofile, caffemodel):
    net_info = cfg2prototxt(cfgfile)
    save_prototxt(net_info , protofile, region=False)

    net = caffe.Net(protofile, caffe.TEST)
    params = net.params

    blocks = parse_cfg(cfgfile)

    #Open the weights file
    fp = open(weightfile, "rb")

    #The first 4 values are header information 
    # 1. Major version number
    # 2. Minor Version Number
    # 3. Subversion number 
    # 4. IMages seen 
    header = np.fromfile(fp, dtype = np.int32, count = 5)

    #fp = open(weightfile, 'rb')
    #header = np.fromfile(fp, count=5, dtype=np.int32)
    #header = np.ndarray(shape=(5,),dtype='int32',buffer=fp.read(20))
    #print(header)
    buf = np.fromfile(fp, dtype = np.float32)
    #print(buf)
    fp.close()

    layers = []
    layer_id = 1
    start = 0
    for block in blocks:
        if start >= buf.size:
            break

        if block['type'] == 'net':
            continue
        elif block['type'] == 'convolutional':
            batch_normalize = int(block['batch_normalize'])
            if block.has_key('name'):
                conv_layer_name = block['name']
                bn_layer_name = '%s-bn' % block['name']
                scale_layer_name = '%s-scale' % block['name']
            else:
                conv_layer_name = 'layer%d-conv' % layer_id
                bn_layer_name = 'layer%d-bn' % layer_id
                scale_layer_name = 'layer%d-scale' % layer_id

            if batch_normalize:
                start = load_conv_bn2caffe(buf, start, params[conv_layer_name], params[bn_layer_name], params[scale_layer_name])
            else:
                start = load_conv2caffe(buf, start, params[conv_layer_name])
            layer_id = layer_id+1
        elif block['type'] == 'depthwise_convolutional':
            batch_normalize = int(block['batch_normalize'])
            if block.has_key('name'):
                conv_layer_name = block['name']
                bn_layer_name = '%s-bn' % block['name']
                scale_layer_name = '%s-scale' % block['name']
            else:
                conv_layer_name = 'layer%d-dwconv' % layer_id
                bn_layer_name = 'layer%d-bn' % layer_id
                scale_layer_name = 'layer%d-scale' % layer_id

            if batch_normalize:
                start = load_conv_bn2caffe(buf, start, params[conv_layer_name], params[bn_layer_name], params[scale_layer_name])
            else:
                start = load_conv2caffe(buf, start, params[conv_layer_name])
            layer_id = layer_id+1
        elif block['type'] == 'connected':
            if block.has_key('name'):
                fc_layer_name = block['name']
            else:
                fc_layer_name = 'layer%d-fc' % layer_id
            start = load_fc2caffe(buf, start, params[fc_layer_name])
            layer_id = layer_id+1
        elif block['type'] == 'maxpool':
            layer_id = layer_id+1
        elif block['type'] == 'avgpool':
            layer_id = layer_id+1
        elif block['type'] == 'region':
            layer_id = layer_id + 1
        elif block['type'] == 'route':
            layer_id = layer_id + 1
        elif block['type'] == 'shortcut':
            layer_id = layer_id + 1
        elif block['type'] == 'softmax':
            layer_id = layer_id + 1
        elif block['type'] == 'cost':
            layer_id = layer_id + 1
	elif block['type'] == 'upsample':
	    layer_id = layer_id + 1
        else:
            print('unknow layer type %s ' % block['type'])
            layer_id = layer_id + 1
    print('save prototxt to %s' % protofile)
    save_prototxt(net_info , protofile, region=True)
    print('save caffemodel to %s' % caffemodel)
    net.save(caffemodel)

......


def load_conv_bn2caffe(buf, start, conv_param, bn_param, scale_param):
    conv_weight = conv_param[0].data
    running_mean = bn_param[0].data
    running_var = bn_param[1].data
    scale_weight = scale_param[0].data
    scale_bias = scale_param[1].data

    
   
    scale_param[1].data[...] = np.reshape(buf[start:start+scale_bias.size], scale_bias.shape); start = start + scale_bias.size
    #print scale_bias.size
    #print scale_bias

    scale_param[0].data[...] = np.reshape(buf[start:start+scale_weight.size], scale_weight.shape); start = start + scale_weight.size
    #print scale_weight.size

    bn_param[0].data[...] = np.reshape(buf[start:start+running_mean.size], running_mean.shape); start = start + running_mean.size
    #print running_mean.size

    bn_param[1].data[...] = np.reshape(buf[start:start+running_var.size], running_var.shape); start = start + running_var.size
    #print running_var.size

    bn_param[2].data[...] = np.array([1.0])
    conv_param[0].data[...] = np.reshape(buf[start:start+conv_weight.size], conv_weight.shape); start = start + conv_weight.size
    #print conv_weight.size

    return start

Wherein, buf is the weight yolov3.weights read, according to the weights of the weight storage darknet, buf is a one-dimensional Vector, dimension 61592497 x 1, which is obtained by the accumulated yolov3 in all layers, and each record start layer weights starting position. However, after switching to caffemodel (buf coming right code value acquired in load_conv_bn2caffe () written into the caffemodel) Vector dimensional changes, e.g. 64x32x3x3 (denote convolution layer input channel 64, to an output channel 32, kernel size is 3x3), this step is completed mainly by the load_conv_bn2caffe reshape the numpy package () function. Therefore, the final test results causes decrease of whether a four-dimensional reshape when not corresponding weights affect RGB offset generated due to the order in the darknet.

I then examined using ssd_detection.cpp MobileNet-YOLO provided, input part of the network from the start image, code examples are as follows:

		  cv::Mat img = cv::imread(fn[k]);
		  if (img.empty()) continue; //only proceed if sucsessful
									// you probably want to do some preprocessing
		  CHECK(!img.empty()) << "Unable to decode image " << file;
		  Timer batch_timer;
		  batch_timer.Start();
		  std::vector<vector<float> > detections = detector.Detect(img);
		  LOG(INFO) << "Computing time: " << batch_timer.MilliSeconds() << " ms.";

From the above code, the read image is used in opencv imread () function, and to read the incoming img Detector test, and is defined as the class Class Detector, comprising three sub-functions as follows:

class Detector {
 public:
  Detector(const string& model_file,
           const string& weights_file,
           const string& mean_file,
           const string& mean_value,
		   const float confidence_threshold,
		   const float normalize_value);

  std::vector<vector<float> > Detect(const cv::Mat& img);

 private:
  void SetMean(const string& mean_file, const string& mean_value);

  void WrapInputLayer(std::vector<cv::Mat>* input_channels);

  void Preprocess(const cv::Mat& img,
                  std::vector<cv::Mat>* input_channels);
  void Preprocess(const cv::Mat& img,
	  std::vector<cv::Mat>* input_channels,double normalize_value);
 private:
  shared_ptr<Net<float> > net_;
  cv::Size input_geometry_;
  int num_channels_;
  cv::Mat mean_;
  float nor_val = 1.0;
};

The four sub-function input img do not convert RGB channel, carried only the resize operation, and then test.

From there I seem to get some inspiration, RGB image imread opencv read is read in accordance with BGR order, whether it happens to correspond to the traffic sign in blue and red detection missed the law? I then read img channel imread read (2) and the channel (0) exchange, and then input to the Detector test results (scores and coordinate information) is used to test Rectangle () function is not drawn in the conversion of RGB channel the original img, found that normal test results, with basically the same results under the darknet, which proved convert code in reshape Chen offered no wrong, but I used the cv in imread () directly into the detector leads .

Why after my original darknet and caffe other model (such as ssd, RefineDet, etc.) does not appear in this case from the consideration of it? I was carried out to explore. Reading the code darknet image input portion, in particular src / image.c a function load_image_color (), the following code:

image load_image_color(char *filename, int w, int h)
{
    return load_image(filename, w, h, 3);
}

image load_image(char *filename, int w, int h, int c)
{
#ifdef OPENCV
    image out = load_image_cv(filename, c);
#else
    image out = load_image_stb(filename, c);
#endif

    if((h && w) && (h != out.h || w != out.w)){
		//按网络要求调整到(w,h)大小,前提是输入的w,h不要是0
        image resized = resize_image(out, w, h);
        free_image(out);
        out = resized;
    }
    return out;
}

Which load_image () function calls load_image_cv () function, apparently still in use darknet cv read image, because we know cv read in two ways image of imread (C ++) and cvLoadImage (C #) are read in accordance with BGR format this makes me even more puzzled to see load_image_cv () function, code as follows:

image load_image_cv(char *filename, int channels)
{
    IplImage* src = 0;
    int flag = -1;
    if (channels == 0) flag = -1;
    else if (channels == 1) flag = 0;  //grayscale image
    else if (channels == 3) flag = 1;  //3-channel color image
    else {
        fprintf(stderr, "OpenCV can't force load with %d channels\n", channels);
    }
 
	//opencv api load image
    if( (src = cvLoadImage(filename, flag)) == 0 )
    {
        fprintf(stderr, "Cannot load image \"%s\"\n", filename);
        char buff[256];
        sprintf(buff, "echo %s >> bad.list", filename);
        system(buff);
        return make_image(10,10,3);
        //exit(0);
    }

	//将读取到的IplImage容器中的图片装入image结构中
    image out = ipl_to_image(src);
    cvReleaseImage(&src);
    rgbgr_image(out); //convert BGR to RGB
    
    return out;
}

 

Guess you like

Origin blog.csdn.net/xunan003/article/details/94741304