C++ は YOLO ターゲット認識画像の前処理と後処理を実装します

【背景】

YOLO ターゲット認識の画像後処理部分を自分で記述する必要があるのはなぜですか? 自社開発の FPGA 開発ボードを使用し、その上に YOLO ターゲット認識高速演算回路を開発、デプロイしたため、PS 側の開発者として、指定された DDR アドレスに入力イメージを送信し、FPGA 高速演算レジスタを有効にする必要があります計算結果によって割り込みが開始されるのを待ち、対象の認識結果の元データを取得し、後処理操作を通じて復元し、元の画像上に結果をマークします。

【乾燥情報】

早速、コードに進みましょう。

次のコードを通じて、少なくとも次の操作を学習する必要があります。

1. フォルダーからデータセットイメージを読み取る方法を学びます

2. FPGA 結果に対する逆量子化、デコード、最大化抑制、およびその他の基本操作の方法を学習します。

3. 認識結果にラベルを付けて保存する方法を学ぶ

4. レジスタの操作を通じて FPGA の動作を制御する方法を学びます

static char text[256];
static int length = 5+5;
static int kind_nums = 5;
static float anchors_0[6] = {2.53125f, 2.5625f, 4.21875f, 5.28125f, 10.75f, 9.96875f};
static float anchors_1[6] = {1.4375f, 1.6875f, 2.3125f, 3.625f, 5.0625f, 5.125f};
static float conf_thresh_ = 0.5f;
static float nms_thresh_ = 0.4f;

static std::string label_name[] = {"person", "aeroplane", "bus", "car", "truck"};

bool PedestrianDetection::get_files_list()
{
  struct dirent *ptr;
  DIR *dir;
  std::string PATH = dataset_dir_;
  dir = opendir(PATH.c_str());
  while((ptr=readdir(dir)) != nullptr)
  {
    if(ptr->d_name[0] == '.')
      continue;
    files_list.push_back(ptr->d_name);
  }
  for(auto iter : files_list)
  {
    printf("%s\n", iter.c_str());
  }
  closedir(dir);
  if(files_list.size() == dataset_size)
  {
    return true;
  }
  else {
    printf("[PedestrianDetection] : Error to search dataset %ld \n", files_list.size());
    return false;
  }
}

bool PedestrianDetection::init_net_info(std::string pkg_path)
{
  yolov4tn = 2;
  outputchannel = 30;  // class number change
  yolov4tnsize = {11, 15, 22, 30};  // class number change
  yolov4tnsize_sum = yolov4tnsize[0] * yolov4tnsize[1] + yolov4tnsize[2] * yolov4tnsize[3];
  accData_size = yolov4tnsize_sum * outputchannel;
  accRaw_size = yolov4tnsize_sum * (outputchannel + 2);

  return true;
}

void PedestrianDetection::init_fpga()
{
  raw_img_w = 480;
  raw_img_h = 352;
  raw_img_c = 3;
  memset(&yolo_v4, 0, sizeof(struct Model_Output));
  yolo_v4.addr[0] = 0x4491F800;
  yolo_v4.length[0] = 11 * 15 * (30+2);
  yolo_v4.addr[1] = 0x44921000;
  yolo_v4.length[1] = 22 * 30 * (30+2);
  if(init_model_output(&yolo_v4, fd_mem) == -1)
  {
    printf("[yolov4] failed to init model output \n");
    exit(-1);
  }
}

void PedestrianDetection::read_dataset()
{
  while(nh_private.ok())
  {
    while(ready_buf_.size() > 50)
    {
      std::this_thread::sleep_for(std::chrono::milliseconds(5));
    }

#ifdef WITH_NETCAM
    cv::Mat origin;
    struct timeval timestamp;
    if(netcam.getCameraImage(origin, timestamp, cv::Size(1920, 1080)))
    {
      std::vector<cv::Mat> pics;
      cut_img(origin, 2, 2, pics);
      for(int i = 0; i < pics.size(); i++)
      {
        if(i != (pics.size()-1))
        {
          std::vector<uint8_t> vec_data;
          cv::imencode(".jpg", pics[i], vec_data);
          std::string str_data;
          str_data.assign(vec_data.begin(), vec_data.end());
          zmqpp::message message;
          message << "robot" + std::to_string(i+1) +"pic" << str_data;
          socket_pubraw.send(message);
        }
        else {
          cv::Mat resize;
          cv::resize(pics[i], resize, cv::Size(raw_img_w, raw_img_h), cv::INTER_CUBIC); // cv::INTER_CUBIC
          cv::cvtColor(resize, resize, cv::COLOR_BGR2RGB);

          cv::Mat padding;
          cv::copyMakeBorder(resize, padding, 1, 0, 1, 0, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
          rawImgFile tmp;
          tmp.raw_img = pics[i];
          tmp.pad_img = padding;
          tmp.file_name = robot_id;
          ready_buf_mutex_.lock();
          ready_buf_.push(tmp);
          ready_buf_mutex_.unlock();
        }
      }
    }
    else {
      printf("camera no data\n");
      usleep(30*1000);
      continue;
    }
#else
    zmqpp::message message;
    socket_sub.receive(message);
    std::string topic;
    std::string data_str;
    message >> topic >> data_str;
    std::vector<uint8_t> data_vec;
    data_vec.assign(data_str.begin(), data_str.end());

    cv::Mat img_decode;
    img_decode = cv::imdecode(data_vec, CV_LOAD_IMAGE_COLOR);
    cv::Mat resize;
    cv::resize(img_decode, resize, cv::Size(raw_img_w, raw_img_h), cv::INTER_CUBIC); // cv::INTER_CUBIC
    cv::cvtColor(resize, resize, cv::COLOR_BGR2RGB);

    cv::Mat padding;
    cv::copyMakeBorder(resize, padding, 1, 0, 1, 0, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
    rawImgFile tmp;
    tmp.raw_img = img_decode;
    tmp.pad_img = padding;
    tmp.file_name = robot_id;
    ready_buf_mutex_.lock();
    ready_buf_.push(tmp);
    ready_buf_mutex_.unlock();
#endif

  }
}

void PedestrianDetection::test_fpga()
{
  while(nh_private.ok())
  {
    while(nh_private.ok())
    {
      if(ready_buf_.empty())
      {
        std::this_thread::sleep_for(std::chrono::milliseconds(5));
      }
      else
      {
        break;
      }
    }

    rawImgFile tmp_input;
    ready_buf_mutex_.lock();
    tmp_input = ready_buf_.front();
    ready_buf_.pop();
    ready_buf_mutex_.unlock();

    tmp_input.acc.acc_out = nullptr;
    tmp_input.acc.acc_out = new int8_t[accRaw_size];
    if(!tmp_input.acc.acc_out)
    {
      ROS_ERROR("Allocation of memory Failed \n");
      return;
    }

    int image_size = 481 * 353 * 3;
    image_to_mem(tmp_input.pad_img.data, 0x46000000, fd_mem, image_size);
 	
    // TODO : start fpga
    void* mem_ctrl = mmap(nullptr, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd_mem, 0x0400000000L);

    int state = 0;
    while(state != 1)
    {
      memcpy(&state, (char*)mem_ctrl+124, 4);
    }
    state = 1;
    memcpy((char*)mem_ctrl, &state, 4);
    int res = -1;
    memcpy(&res, (char*)mem_ctrl, 4);
    
    state = 0;
    memcpy((char*)mem_ctrl, &state, 4);
    memcpy(&res, (char*)mem_ctrl, 4);

    int busy = 0;
    memcpy(&busy, (char*)mem_ctrl+8, 4);
    int busy_prev = busy;
    while(busy_prev != 0)
    {
      memcpy(&busy, (char*)mem_ctrl+8, 4);
      if(busy != busy_prev)
      {
        busy_prev = busy;
      }
    }
    munmap(mem_ctrl, 4096);

    get_model_output(&yolo_v4, tmp_input.acc.acc_out);

    result_buf_mutex_.lock();
    result_buf_.push(tmp_input);
    result_buf_mutex_.unlock();
  }
}


void PedestrianDetection::write_predict()
{
  while(nh_private.ok())
  {
    while(nh_private.ok())
    {
      if(result_buf_.empty())
      {
        std::this_thread::sleep_for(std::chrono::milliseconds(5));
      }
      else
      {
        break;
      }
    }

    rawImgFile tmp_output;
    result_buf_mutex_.lock();
    tmp_output = result_buf_.front();
    result_buf_.pop();
    result_buf_mutex_.unlock();

    decode(tmp_output);

    delete [] tmp_output.acc.acc_out;
    tmp_output.acc.acc_out = nullptr;
  }
}

static bool outbin_write = true;
void PedestrianDetection::decode(rawImgFile &msg)
{
  cv::Mat frame = msg.raw_img;

  if(msg.acc.acc_out != nullptr)
  {
    int8_t* data;
    data = msg.acc.acc_out;

    if(outbin_write)
    {
      FILE* fp = fopen((pkg_path + "/unit_test/result_img/out.bin").c_str(), "wb");
      fwrite(data, sizeof(int8_t), accRaw_size, fp);
      outbin_write = false;
    }

    float *out = (float*)calloc(accData_size, sizeof(float));
    unpadding(data, out, 32, 2);
    std::vector<de_result> res;
    process_decode(out, res, frame);
    draw_objects(frame, res);
    
#if 1

    draw_objects(msg.raw_img, res);
    std::vector<uint8_t> vec_data;
    cv::imencode(".jpg", msg.raw_img, vec_data);
    std::string str_data;
    str_data.assign(vec_data.begin(), vec_data.end());
    zmqpp::message message;
    message << "pedestrian_detect" << str_data;
    socket_pub.send(message);
//    cv::imwrite((pkg_path + "/unit_test/result_img/" + img_name +"jpg"), msg.raw_img);

/*
    std::ofstream outfile;
    outfile.open((pkg_path + "/unit_test/result_label/" + img_name +"txt"));
    for(size_t i = 0; i < res.size(); i++)
    {
      std::string str_conf = std::to_string(res[i].complex_prob);
      std::string conf_4(str_conf.begin(), str_conf.begin()+5);
      outfile << label_name[res[i].index] << " " << conf_4 << " " << static_cast<int>(res[i].x - res[i].w/2) << " " << static_cast<int>(res[i].y - res[i].h/2) << " " << static_cast<int>(res[i].x + res[i].w/2) << " " << static_cast<int>(res[i].y + res[i].h/2);
      if(i != (res.size()-1))
        outfile << std::endl;
    }
    outfile.close();*/
/*
    FILE* fp = fopen((pkg_path + "/unit_test/result_bin/" + img_name +"bin").c_str(), "wb");
    fwrite(data, sizeof(int8_t), 11 * 15 * 96 + 22 * 30 * 96, fp);
    fclose(fp);
    */
#endif

    free(out);
  }
  else
  {
    printf("acc is empty \n");
  }

}


void PedestrianDetection::unpadding(int8_t* input, float* output, int ic ,int padding)
{
  int offset = yolov4tnsize[0] * yolov4tnsize[1];
  for(int i = 0; i < yolov4tnsize_sum; i++)
  {
    if(i < offset)
    {
      for(int j = 0; j < ic - padding; j++)
      {
        int tmp = *(input + j);
        *(output++) = (tmp - 78) * 0.1175554f;
      }
    }
    else
    {
      for(int j = 0; j < ic - padding; j++)
      {
        int tmp = *(input + j);
        *(output++) = (tmp - 88) * 0.1006139f;
      }
    }

    input += ic;
  }
}

inline float PedestrianDetection::sigmoid(const float &x)
{
  return (1 / (1 + expf(-x)));
}

inline int PedestrianDetection::find_max_index(float *buf, int len)
{
  int k = 0;
  float max = *buf;
  for (int i = 1; i < len; ++i)
  {
    if (buf[i] > max)
    {
      k = i;
      max = buf[i];
    }
  }
  return k;
}

bool comp_prob(const de_result &a, const de_result &b)
{
  return a.complex_prob > b.complex_prob;
}

bool comp(const de_result &a, const de_result &b)
{
  return a.index > b.index;
}

inline float PedestrianDetection::clac_iou(const de_result &A, const de_result &B)
{
  float aminx = A.x - A.w / 2.f;
  float amaxx = A.x + A.w / 2.f;
  float aminy = A.y - A.h / 2.f;
  float amaxy = A.y + A.h / 2.f;

  float bminx = B.x - B.w / 2.f;
  float bmaxx = B.x + B.w / 2.f;
  float bminy = B.y - B.h / 2.f;
  float bmaxy = B.y + B.h / 2.f;

  float w = std::min(amaxx, bmaxx) - std::max(aminx, bminx);
  float h = std::min(amaxy, bmaxy) - std::max(aminy, bminy);

  if (w <= 0 || h <= 0)
    return 0;

  return (w * h) / ((A.w * A.h) + (B.w * B.h) - (w * h));
}

void PedestrianDetection::nms(const std::vector<de_result> &vpbox, std::vector<de_result> &voutbox, float iou_thres)
{
  std::vector<float> flag(vpbox.size(), -1.0);
  int n = vpbox.size();
  for (int i = 0; i < n; i++)
  {
    flag[i] = vpbox[i].index;
  }

  for (int i = 0; i < n; i++)
  {
    if (flag[i] == -1.0)
      continue;
    for (int j = i + 1; j < n; j++)
    {
      if(flag[j] == -1.0 || vpbox[i].index != vpbox[j].index)
        continue;
      float iou = clac_iou(vpbox[i], vpbox[j]);

      if (iou > iou_thres)
      {
        flag[j] = -1.0;
      }
    }
  }
  for (int i = 0; i < n; i++)
  {
    if (flag[i] != -1.0)
    {
      voutbox.push_back(vpbox[i]);
    }
  }
}

void PedestrianDetection::decode_one_with_thre(int layer, float *anchors, void *data, const int xno, const int yno, const int anchor_no, std::vector<de_result>& boxes, cv::Mat &img)
{
  const float *ori_ptr = (const float *)data;
  float decode_data[length], *data_ptr;
  data_ptr = decode_data;

  data_ptr[4] = sigmoid(ori_ptr[4]);
  for(int k = 5; k < length; k++)
  {
    data_ptr[k] = sigmoid(ori_ptr[k]);
  }
  int max_index = find_max_index(data_ptr+5, kind_nums);
  if(data_ptr[max_index+5] * data_ptr[4] < conf_thresh_)
    return;

  data_ptr[0] = sigmoid(ori_ptr[0]);
  data_ptr[1] = sigmoid(ori_ptr[1]);
  data_ptr[2] = expf(ori_ptr[2]);
  data_ptr[3] = expf(ori_ptr[3]);

  de_result tmp;
  tmp.x = (data_ptr[0] + xno) / yolov4tnsize[2 * layer + 1] * img.cols;
  tmp.y = (data_ptr[1] + yno) / yolov4tnsize[2 * layer] * img.rows;
  tmp.w = (((data_ptr[2]) * anchors[2 * anchor_no])) / yolov4tnsize[2 * layer + 1] * img.cols;
  tmp.h = (((data_ptr[3]) * anchors[2 * anchor_no + 1])) / yolov4tnsize[2 * layer] * img.rows;
  tmp.conf = data_ptr[4];
  tmp.index = max_index;
  tmp.prob = data_ptr[max_index+5];
  tmp.complex_prob = data_ptr[max_index+5] * data_ptr[4];
//  printf("## layer[%d] prob : %f , x : %f , y : %f , w : %f , h : %f \n", layer, tmp.prob, tmp.x, tmp.y, tmp.w, tmp.h);
  boxes.push_back(tmp);
}

void PedestrianDetection::process_decode(float* input, std::vector<de_result> &res, cv::Mat &img)
{
  std::vector<de_result> boxes;
  float* data_ptr = input;

  for(int i = 0; i < yolov4tnsize[0]; i++){
    for(int j = 0; j < yolov4tnsize[1]; j++){
      for(int k = 0; k < 3; k++)
      {
        decode_one_with_thre(0, anchors_0, data_ptr, j, i, k, boxes, img);
        data_ptr += length;
      }
    }
  }

  for(int i = 0; i < yolov4tnsize[2]; i++){
    for(int j = 0; j < yolov4tnsize[3]; j++){
      for(int k = 0; k < 3; k++)
      {
        decode_one_with_thre(1, anchors_1, data_ptr, j, i, k, boxes, img);
        data_ptr += length;
      }
    }
  }
  sort(boxes.begin(), boxes.end(), comp_prob);
  nms(boxes, res, nms_thresh_);
  boxes.clear();
}


void PedestrianDetection::draw_objects(cv::Mat &image, const std::vector<de_result> &objects)
{
  double font_size = 0.6;
  int font_bold = 1;
  int baseLine = 0;
  for(size_t i = 0; i < objects.size(); i++)
  {
    const de_result &obj = objects[i];

    sprintf(text, "%s %.1f%%", label_name[obj.index].c_str(), obj.complex_prob * 100);
    int x = std::max(static_cast<int>(obj.x - obj.w / 2), 0);
    int y = std::max(static_cast<int>(obj.y - obj.h / 2), 0);
    int x1 = std::min(static_cast<int>(obj.x + obj.w / 2), image.cols);
    int y1 = std::min(static_cast<int>(obj.y + obj.h / 2), image.rows);
    int w = x1 - x - 1;
    int h = y1 - y - 1;

    cv::Rect obj_rect(x, y, w, h);
    cv::rectangle(image, obj_rect, cv::Scalar(0, 255, 0), font_bold);
    cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, font_size, font_bold, &baseLine);

    int tx = obj_rect.x;
    int ty = obj_rect.y - label_size.height - baseLine;
    if (ty < 0)
      ty = 0;

    cv::Point point;
    point = cv::Point(tx, ty + label_size.height);
    cv::rectangle(image, cv::Rect(cv::Point(obj_rect.x, ty), cv::Size(obj_rect.width, label_size.height + baseLine)), cv::Scalar(128, 128, 0), CV_FILLED);
    cv::putText(image, text, point, cv::FONT_HERSHEY_SIMPLEX, font_size, cv::Scalar(0, 255, 0), font_bold);
  }
}

void PedestrianDetection::cut_img(cv::Mat &src, int m, int n, std::vector<cv::Mat> &ceil)
{
  int height = src.rows;
  int width = src.cols;
  int ceil_h = height / m;
  int ceil_w = width / n;
  cv::Mat roi_img;
  for(int i = 0; i < m; i++)
  {
    for(int j = 0; j < n; j++)
    {
      cv::Rect rect(j*ceil_w, i*ceil_h, ceil_w, ceil_h);
      roi_img = cv::Mat(src, rect);
      ceil.push_back(roi_img);
    }
  }
}

おすすめ

転載: blog.csdn.net/weixin_39538031/article/details/131001772