移动端unet人像分割模型--2

前一篇blog里提到的错误果然是mxnet网络的问题，pool5误敲成pool4修改之后，ncnn就不再crash，不过ncnn的mxnet2ncnn这个工具应该多加一些诊断确保转换的模型参数一致才对。

只是事情也没那么一帆风顺，转成ncnn后的预测结果死活不对。没办法，只能一层层去检查，写了几个简单的工具可以打印中间隐藏层的结果。

check.py

import os
os.environ["MXNET_BACKWARD_DO_MIRROR"] = "1"
os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
import sys
import cv2
import mxnet as mx
from mxnet import ndarray as F
from skimage.transform import resize
from skimage.io import imsave
import numpy as np
from unetdataiter import UnetDataIter
import matplotlib.pyplot as plt
from unet import build_unet

np.set_printoptions(threshold=np.inf)

def post_process_mask(label, img_cols, img_rows, n_classes, p=0.5):
    pr = label.reshape(n_classes, img_cols, img_rows).transpose([1,2,0]).argmax(axis=2)
    return (pr*255).asnumpy()

def ncnn_output(label):
    #pr = label.reshape(channels, img_cols, img_rows).transpose([1,2,0])
    pr = label.transpose([1,2,0])
    return pr.asnumpy()


def load_image(img, width, height):
    im = np.zeros((height, width, 3), dtype='uint8')
    #im[:, :, :] = 128

    if img.shape[0] >= img.shape[1]:
        scale = img.shape[0] / height
        new_width = int(img.shape[1] / scale)
        diff = (width - new_width) // 2
        img = cv2.resize(img, (new_width, height))

        im[:, diff:diff + new_width, :] = img
    else:
        scale = img.shape[1] / width
        new_height = int(img.shape[0] / scale)
        diff = (height - new_height) // 2

        img = cv2.resize(img, (width, new_height))
        im[diff:diff + new_height, :, :] = img

    im = np.float32(im) / 255.0

    return [im.transpose((2,0,1))]

def main():
    batch_size = 16
    n_classes = 2
    img_width = 256
    img_height = 256
    #img_width = 96
    #img_height = 96

    ctx = [mx.gpu(0)]

    # sym, arg_params, aux_params = mx.model.load_checkpoint('unet_person_segmentation', 20)
    #unet_sym = build_unet(batch_size, img_width, img_height, False)
    # unet = mx.mod.Module(symbol=unet_sym, context=ctx, label_names=None)

    sym, arg_params, aux_params = mx.model.load_checkpoint('unet_person_segmentation', 0)
    all_layers = sym.get_internals()
    print(all_layers.list_outputs())
    unet = mx.mod.Module(symbol=all_layers['conv11_1_output'], context=ctx, label_names=None)
    #unet = mx.mod.Module(symbol=all_layers['pool5_output'], context=ctx, label_names=None)

    unet.bind(for_training=False, data_shapes=[['data', (batch_size, 3, img_width, img_height)]], label_shapes=unet._label_shapes)
    #unet.set_params(arg_params, aux_params, allow_missing=True)
    unet.set_params(arg_params, aux_params)

    testimg = cv2.imread(sys.argv[1], 1)
    img = load_image(testimg, img_width, img_height)
    unet.predict(mx.io.NDArrayIter(data=[img]))

    outputs = unet.get_outputs()[0]
    print(outputs[0].shape)
    output = ncnn_output(outputs[0])
    print(output) 

    #keys = unet.get_params()[0].keys() # 列出所有权重名称
    #print(keys)
    #conv_w = unet.get_params()[0]['trans_conv6_weight'] # 获取想要查看的权重信息
    #print(conv_w.shape)
    #print(conv_w.asnumpy()) # 查看具体数值

    #cv2.imshow('test', testimg)
    #cv2.imshow('mask', post_process_mask(outputs[0], img_width, img_height, n_classes))
    #cv2.waitKey()

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("illegal parameters")
        sys.exit(0)

    main()

在这个基础之上，发现是第一次反卷积就出了问题（mxnet神经网络trans_conv6的输出）。结果完全不一致，按个人理解，反卷积算法会出问题的可能性比较小，所以把mxnet这一层的权重值打印了出来（上面注释掉的代码）。再在mxnet2ncnn的代码里把对应的参数打印，最后发现是num_group出了问题，简单处理就是把mxnet2ncnn.cpp里的反卷积num_group固定为1，终于解决问题。得到正确的输出结果：

中间还遇到一些ncnn和mxnet之间图像格式之类的转换问题，特别是浮点数的处理，就不啰嗦了，直接上代码。

#include "net.h"
#include <opencv2/opencv.hpp>
#include <string>
#include <vector>
#include <time.h>
#include <algorithm>
#include <map>
#include <iostream>
#include <opencv2/opencv.hpp>

using namespace std;
using namespace cv;

#define INPUT_WIDTH     256
#define INPUT_HEIGHT    256

int main(int argc, char** argv) {
    if (argc < 2) {
        printf("illegal parameters!");
        exit(0);
    }

    ncnn::Net Unet;

    Unet.load_param("../models/ncnn.param");
    Unet.load_model("../models/ncnn.bin");

    cv::Scalar value = Scalar(0,0,0);
    cv::Mat src;
    cv::Mat tmp;
    src = cv::imread(argv[1]);

    if (src.size().width > src.size().height) {
        int top = (src.size().width - src.size().height) / 2;
        int bottom = (src.size().width - src.size().height) - top;
        cv::copyMakeBorder(src, tmp, top, bottom, 0, 0, BORDER_CONSTANT, value);
    } else {
        int left = (src.size().height - src.size().width) / 2;
        int right = (src.size().height - src.size().width) - left;
        cv::copyMakeBorder(src, tmp, 0, 0, left, right, BORDER_CONSTANT, value);
    }

    cv::Mat tmp1;
    cv::resize(tmp, tmp1, cv::Size(INPUT_WIDTH, INPUT_HEIGHT), CV_INTER_CUBIC);

    cv::Mat image;
    tmp1.convertTo(image, CV_32FC3, 1/255.0);

    std::cout << "image element type "<< image.type() << " " << image.cols << " " << image.rows << std::endl;

    // std::cout << src.cols << " " << src.rows << " " << image.cols << " " << image.rows << std::endl;
    //cv::imshow("test", image);
    //cv::waitKey();

    //ncnn::Mat ncnn_img = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_BGR2RGB, image.cols, image.rows);

    // cv32fc3 的布局是 hwc ncnn的Mat布局是 chw 需要调整排布
    float *srcdata = (float*)image.data;
    float *data = new float[INPUT_WIDTH*INPUT_HEIGHT*3];
    for (int i = 0; i < INPUT_HEIGHT; i++)
       for (int j = 0; j < INPUT_WIDTH; j++)
           for (int k = 0; k < 3; k++) {
              data[k*INPUT_HEIGHT*INPUT_WIDTH + i*INPUT_WIDTH + j] = srcdata[i*INPUT_WIDTH*3 + j*3 + k];
           }
    ncnn::Mat in(image.rows*image.cols*3, data);
    in = in.reshape(256, 256, 3);

    //ncnn::Mat in;

    //resize_bilinear(ncnn_img, in, INPUT_WIDTH, INPUT_HEIGHT);

    ncnn::Extractor ex = Unet.create_extractor();

    ex.set_light_mode(true);
    //sex.set_num_threads(4);

    ex.input("data", in);

    ncnn::Mat mask;
    //ex.extract("relu5_2_splitncnn_0", mask);
    //ex.extract("trans_conv6", mask);
    ex.extract("conv11_1", mask);
    //ex.extract("pool5", mask);

    std::cout << "whc " << mask.w << " " << mask.h << " " << mask.c << std::endl;
#if 1
    cv::Mat cv_img = cv::Mat::zeros(INPUT_WIDTH,INPUT_HEIGHT,CV_8UC1);
//    mask.to_pixels(cv_img.data, ncnn::Mat::PIXEL_GRAY);

    {
    float *srcdata = (float*)mask.data;
    unsigned char *data = cv_img.data;

    for (int i = 0; i < mask.h; i++)
       for (int j = 0; j < mask.w; j++) {
         float tmp = srcdata[0*mask.w*mask.h+i*mask.w+j];
         int maxk = 0;
         for (int k = 0; k < mask.c; k++) {
           if (tmp < srcdata[k*mask.w*mask.h+i*mask.w+j]) {
             tmp = srcdata[k*mask.w*mask.h+i*mask.w+j];
             maxk = k;
           }
           //std::cout << srcdata[k*mask.w*mask.h+i*mask.w+j] << std::endl;
         }
         data[i*INPUT_WIDTH + j] = maxk;
       }
    }
    
    cv_img *= 255;
    cv::imshow("test", cv_img);
    cv::waitKey();
#endif
    return 0;
}

至此，功能完成，有兴趣的请移步：https://github.com/xuduo35/unet_mxnet2ncnn

另外，调试过程发现，ncnn的中间层输出和mxnet的输出不是完全一致，可能是有一些参数或者运算细节问题，不影响最后mask结果，暂时就不管了。

移动端unet人像分割模型--2

猜你喜欢