escribir directorio
Implementación del modelo yolov8: implementación de TensorRT
1. Exportar el modelo al formato onnx.
-
Si desea implementar YOLOv8 con TensorRT, primero debe exportar el modelo al formato onnx usando el siguiente comando:
yolo export model=yolov8n.pt format=onnx
-
Los tres cabezales de detección de YOLOv8 tienen un total de 80x80 + 40x40 + 20x20 = 8400 celdas de salida. Cada celda contiene 4 elementos de x, y, w, h más 80 categorías de confianza, un total de 84 elementos, por lo que a través de Las dimensiones de salida del modelo onnx exportado por el comando anterior son
1x84x8400
. -
Dimensiones de salida del modelo
-
Hay un problema con esta secuencia de disposición de canales, es decir, provocará un acceso discontinuo a la memoria durante el posprocesamiento.
-
Para solucionar este problema podemos modificar el código, el método específico es modificar
ultralytics/nn/modules.py
el código en el archivo de la siguiente manera e intercambiar el orden de los canales del tensor y:
def forward(self, x):
shape = x[0].shape # BCHW
for i in range(self.nl):
x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
if self.training:
return x
elif self.dynamic or self.shape != shape:
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
self.shape = shape
x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
if self.export and self.format in ('saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs'): # avoid TF FlexSplitV ops
box = x_cat[:, :self.reg_max * 4]
cls = x_cat[:, self.reg_max * 4:]
else:
box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
y = torch.cat((dbox, cls.sigmoid()), 1)
# 修改模型输出维度
y=y.permute(0,2,1)
return y if self.export else (y, x)
- Después de esta modificación y luego ejecutar el comando de exportación del modelo anterior, la dimensión de salida del modelo se convierte en
1x8400x84
2. Convierta el formato del modelo onnx al motor para su implementación.
- Configurar el
TensorRT
entornoNVIDIA
- Usar
trtexec
formato de conversióntrtexec --onnx=coco/best.onnx --saveEngine=coco/best.onnx.engine --workspace=32
- Código de pieza de implementación del modelo-c++
#ifndef MyController_hpp #define MyController_hpp #include <ctime> #include <chrono> #include <sstream> #include <iomanip> #include <iostream> #include <numeric> #include <vector> #include "oatpp/web/server/api/ApiController.hpp" #include "oatpp/core/macro/codegen.hpp" #include "oatpp/core/macro/component.hpp" #include "opencv2/opencv.hpp" #include "../dto/DTOs.hpp" // 定义数据格式,用于在不同组件之间传输数据 #include "../yoloApp/simple_yolo.hpp" #include "../byteTrackApp/logging.h" #include "../byteTrackApp/BYTETracker.h" // high performance #include "../yoloHighPer/cpm.hpp" #include "../yoloHighPer/infer.hpp" #include "../yoloHighPer/yolo.hpp" # include <dirent.h> # include <sys/types.h> # include <sys/stat.h> # include <unistd.h> # include <stdarg.h> using namespace std; using namespace cv; #include OATPP_CODEGEN_BEGIN(ApiController) //<-- Begin Codegen static bool exists(const string& path){ #ifdef _WIN32 return ::PathFileExistsA(path.c_str()); #else return access(path.c_str(), R_OK) == 0; #endif } static std::vector<std::string> cocolabels = { "car", "excavator", "loader", "dumpTruck", "person" }; class InferInstance{ public: InferInstance(std::string onnx_model_path, std::string trt_model_path){ onnx_model = onnx_model_path; trt_model = trt_model_path; startup(); } bool startup(){ // if(!exists(trt_model)){ // SimpleYolo::compile( // SimpleYolo::Mode::FP32, // FP32、FP16、INT8 // SimpleYolo::Type::V8, // 1, // max batch size // onnx_model, // source // trt_model, // save to // 1 << 30, // "inference" // ); // } infer_ = yolo::load(trt_model, yolo::Type::V8); return infer_ != nullptr; } int inference(const Mat& image_input, yolo::BoxArray& boxarray){ if(infer_ == nullptr){ // INFOE("Not Initialize."); return 1; } if(image_input.empty()){ // INFOE("Image is empty."); return 1; } boxarray = infer_->forward(cvimg(image_input)); return 0; } private: yolo::Image cvimg(const cv::Mat &image) { return yolo::Image(image.data, image.cols, image.rows);} private: std::string onnx_model = "best.onnx"; std::string trt_model = "best.onnx.engine"; shared_ptr<yolo::Infer> infer_; }; /// std::string onnx_model = "coco/best.onnx"; std::string engine_label = "coco/best.onnx.engine"; std::unique_ptr<InferInstance> infer_instance1(new InferInstance(onnx_model, engine_label)); int frame_rate = 10; int track_buffer = 30; std::unique_ptr<BYTETracker> tracker_instance1(new BYTETracker(frame_rate, track_buffer)); /// /** * 建议使用 Api 控制器,而不是使用裸 HttpRequestHandler 为每个新端点创建新的请求处理程序。 * API 控制器通过为您生成样板代码,使添加新端点的过程变得更加容易。 它还有助于组织您的端点, * 将它们分组到不同的 API 控制器中。 */ /** * Sample Api Controller. */ class MyController : public oatpp::web::server::api::ApiController { protected: /** * Constructor with object mapper. * @param objectMapper - default object mapper used to serialize/deserialize DTOs. */ MyController(const std::shared_ptr<ObjectMapper>& objectMapper) : oatpp::web::server::api::ApiController(objectMapper) { } public: static std::shared_ptr<MyController> createShared(OATPP_COMPONENT(std::shared_ptr<ObjectMapper>, objectMapper)){ return std::shared_ptr<MyController>(new MyController(objectMapper)); } // TODO Insert Your endpoints here !!! /--data-- // 多目标追踪 ENDPOINT_ASYNC("POST", "/car1", tracker1){ ENDPOINT_ASYNC_INIT(tracker1) Action act() override { return request->readBodyToStringAsync().callbackTo(&tracker1::returnResponse); } Action returnResponse(const oatpp::String& body_){ auto response = tracker_inference(*infer_instance1, *tracker_instance1, body_, controller); return _return(response); } }; // public: // 多目标追踪 static std::shared_ptr<OutgoingResponse> tracker_inference(InferInstance& infer_, BYTETracker& track_infer, std::string body_, auto* controller){ auto base64Image = base64_decode(body_); if(base64Image.empty()){ return controller->createResponse(Status::CODE_400, "The image is empty!"); } std::vector<char> base64_img(base64Image.begin(), base64Image.end()); cv::Mat image = cv::imdecode(base64_img, 1); // 获取程序开始时间点 auto start_time = std::chrono::high_resolution_clock::now(); // 推理 yolo::BoxArray boxarray; CV_Assert(0 == infer_.inference(image, boxarray)); // 获取程序结束时间点 auto end_time = std::chrono::high_resolution_clock::now(); // 计算运行时间 auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time); // 打印运行时间(以微秒为单位) // std::cout << "程序运行时间: " << duration.count() << " 毫秒" << std::endl; // 结果处理 vector<Objects> objects; objects.resize(boxarray.size()); int index = 0; for(auto& box : boxarray) { objects[index].rect.x = box.left;; objects[index].rect.y = box.top; objects[index].rect.width = box.right - box.left; objects[index].rect.height = box.bottom - box.top; objects[index].prob = box.confidence; objects[index].label = box.class_label; index++; std::cout << "left: " << box.left << ", top: " << box.top << ", right: " << box.right << ", bottom: " << box.bottom << ", confidence: " << box.confidence << ", class_label: " << box.class_label << std::endl; } auto yoloDto = TrackYoloDto::createShared(); auto boxList = TrackBoxList::createShared(); std::vector<STrack> output_stracks = track_infer.update(objects); for (int i = 0; i < output_stracks.size(); i++) { auto trackBoxDto = TrackerBboxes::createShared(); vector<float> tlwh = output_stracks[i].tlwh; // 方框的位置 trackBoxDto->class_id = cocolabels[output_stracks[i].class_id]; trackBoxDto->track_id = output_stracks[i].track_id; trackBoxDto->x = tlwh[0]; trackBoxDto->y = tlwh[1]; trackBoxDto->width = tlwh[2]; trackBoxDto->height = tlwh[3]; boxList->push_back(trackBoxDto); } output_stracks.clear(); yoloDto->data = boxList; yoloDto->status = "successful"; yoloDto->time = currentDateTime(); return controller->createDtoResponse(Status::CODE_200, yoloDto); } static std::string currentDateTime(){ auto now = std::chrono::system_clock::now(); auto now_c = std::chrono::system_clock::to_time_t(now); auto now_ms = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()) % 1000; std::stringstream ss; ss << std::put_time(std::localtime(&now_c), "%Y-%m-%d %H:%M:%S") << '.' << std::setfill('0') << std::setw(3) << now_ms.count(); return ss.str(); } static unsigned char from_b64(unsigned char ch){ /* Inverse lookup map */ static const unsigned char tab[128] = { 255, 255, 255, 255, 255, 255, 255, 255, /* 0 */ 255, 255, 255, 255, 255, 255, 255, 255, /* 8 */ 255, 255, 255, 255, 255, 255, 255, 255, /* 16 */ 255, 255, 255, 255, 255, 255, 255, 255, /* 24 */ 255, 255, 255, 255, 255, 255, 255, 255, /* 32 */ 255, 255, 255, 62, 255, 255, 255, 63, /* 40 */ 52, 53, 54, 55, 56, 57, 58, 59, /* 48 */ 60, 61, 255, 255, 255, 200, 255, 255, /* 56 '=' is 200, on index 61 */ 255, 0, 1, 2, 3, 4, 5, 6, /* 64 */ 7, 8, 9, 10, 11, 12, 13, 14, /* 72 */ 15, 16, 17, 18, 19, 20, 21, 22, /* 80 */ 23, 24, 25, 255, 255, 255, 255, 255, /* 88 */ 255, 26, 27, 28, 29, 30, 31, 32, /* 96 */ 33, 34, 35, 36, 37, 38, 39, 40, /* 104 */ 41, 42, 43, 44, 45, 46, 47, 48, /* 112 */ 49, 50, 51, 255, 255, 255, 255, 255, /* 120 */ }; return tab[ch & 127]; } static std::string base64_decode(const std::string& base64){ if(base64.empty()) return ""; int len = base64.size(); auto s = (const unsigned char*)base64.data(); unsigned char a, b, c, d; int orig_len = len; int dec_len = 0; string out_data; auto end_s = s + base64.size(); int count_eq = 0; while(*--end_s == '='){ count_eq ++; } out_data.resize(len / 4 * 3 - count_eq); char *dst = const_cast<char*>(out_data.data()); char *orig_dst = dst; while (len >= 4 && (a = from_b64(s[0])) != 255 && (b = from_b64(s[1])) != 255 && (c = from_b64(s[2])) != 255 && (d = from_b64(s[3])) != 255) { s += 4; len -= 4; if (a == 200 || b == 200) break; /* '=' can't be there */ *dst++ = a << 2 | b >> 4; if (c == 200) break; *dst++ = b << 4 | c >> 2; if (d == 200) break; *dst++ = c << 6 | d; } dec_len = (dst - orig_dst); // dec_len必定等于out_data.size() return out_data; } }; #include OATPP_CODEGEN_END(ApiController) //<-- End Codegen #endif /* MyController_hpp */
- Iniciar el modelo
- Solicitar interfaz para razonar
prueba de implementación del modelo yolov8