如何使用TensorRT部署Yolov5模型,并给出相应的C++代码内容:要使用TensorRT来部署Yolov5模型,您需要按照以下步骤进行操作:\n\n1. 安装TensorRT:根据您的操作系统和CUDA版本,从NVIDIA官方网站下载并安装TensorRT。确保您已正确设置CUDA环境变量。\n\n2. 导出Yolov5模型:将训练好的Yolov5模型导出为ONNX格式。可以使用官方提供的导出脚本export.py,也可以从GitHub上下载已导出的ONNX模型。\n\n3. 编写C++代码:使用C++编写TensorRT推理代码。以下是一个简单的示例:\n\ncpp\n#include <iostream>\n#include <fstream>\n#include <sstream>\n#include <vector>\n#include <string>\n#include <opencv2/opencv.hpp>\n#include <NvInfer.h>\n#include <NvOnnxParser.h>\n#include <cuda_runtime_api.h>\n\nconst int INPUT_W = 640;\nconst int INPUT_H = 640;\nconst int OUTPUT_SIZE = 85;\nconst int MAX_OUTPUT_BOXES = 1000;\n\nstd::vector<std::string> getLabels(const std::string& filename) {\n std::ifstream file(filename);\n std::vector<std::string> labels;\n std::string line;\n while (std::getline(file, line)) {\n labels.push_back(line);\n }\n return labels;\n}\n\nvoid preprocessImage(const cv::Mat& image, float* inputData) {\n cv::Mat resizedImage;\n cv::resize(image, resizedImage, cv::Size(INPUT_W, INPUT_H));\n resizedImage.convertTo(resizedImage, CV_32FC3, 1.0 / 255.0);\n cv::Mat channels[3];\n cv::split(resizedImage, channels);\n float* inputDataPtr = inputData;\n memcpy(inputDataPtr, channels[2].data, INPUT_W * INPUT_H * sizeof(float));\n inputDataPtr += INPUT_W * INPUT_H;\n memcpy(inputDataPtr, channels[1].data, INPUT_W * INPUT_H * sizeof(float));\n inputDataPtr += INPUT_W * INPUT_H;\n memcpy(inputDataPtr, channels[0].data, INPUT_W * INPUT_H * sizeof(float));\n}\n\nint main(int argc, char** argv) {\n if (argc != 4) {\n std::cerr << "Usage: " << argv[0] << " <onnx_model_path> <label_path> <image_path>" << std::endl;\n return 1;\n }\n\n const std::string onnxModelPath = argv[1];\n const std::string labelPath = argv[2];\n const std::string imagePath = argv[3];\n\n // Load labels\n std::vector<std::string> labels = getLabels(labelPath);\n\n // Load image\n cv::Mat image = cv::imread(imagePath);\n if (image.empty()) {\n std::cerr << "Failed to load image: " << imagePath << std::endl;\n return 1;\n }\n\n // Create TensorRT engine\n nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);\n assert(runtime != nullptr);\n \n nvinfer1::ICudaEngine* engine = nullptr;\n nvinfer1::IHostMemory* engineBuffer = nullptr;\n {\n std::ifstream engineFile(onnxModelPath + ".engine", std::ios::binary);\n if (engineFile.good()) {\n std::stringstream engineStream;\n engineStream << engineFile.rdbuf();\n std::string engineStr = engineStream.str();\n engine = runtime->deserializeCudaEngine(engineStr.c_str(), engineStr.size(), nullptr);\n }\n else {\n nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);\n nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U);\n auto parser = nvonnxparser::createParser(*network, gLogger);\n parser->parseFromFile(onnxModelPath.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kWARNING));\n builder->setMaxBatchSize(1);\n builder->setMaxWorkspaceSize(1 << 30);\n engine = builder->buildCudaEngine(*network);\n engineBuffer = engine->serialize();\n std::ofstream engineFile(onnxModelPath + ".engine", std::ios::binary);\n engineFile.write(reinterpret_cast<const char*>(engineBuffer->data()), engineBuffer->size());\n }\n }\n assert(engine != nullptr);\n\n // Create execution context\n nvinfer1::IExecutionContext* context = engine->createExecutionContext();\n assert(context != nullptr);\n\n // Allocate buffers\n void* buffers[2];\n cudaMalloc(&buffers[0], 3 * INPUT_W * INPUT_H * sizeof(float)); // Input\n cudaMalloc(&buffers[1], MAX_OUTPUT_BOXES * sizeof(float) * (4 + 1 + OUTPUT_SIZE)); // Output\n cudaStream_t stream;\n cudaStreamCreate(&stream);\n\n // Preprocess image\n float* inputData = new float[3 * INPUT_W * INPUT_H];\n preprocessImage(image, inputData);\n\n // Copy input data to GPU\n cudaMemcpyAsync(buffers[0], inputData, 3 * INPUT_W * INPUT_H * sizeof(float), cudaMemcpyHostToDevice, stream);\n \n // Run inference\n context->enqueue(1, buffers, stream, nullptr);\n\n // Copy output data from GPU\n float* outputData = new float[MAX_OUTPUT_BOXES * (4 + 1 + OUTPUT_SIZE)];\n cudaMemcpyAsync(outputData, buffers[1], MAX_OUTPUT_BOXES * sizeof(float) * (4 + 1 + OUTPUT_SIZE), cudaMemcpyDeviceToHost, stream);\n cudaStreamSynchronize(stream);\n\n // Process output data\n const float confidenceThreshold = 0.5;\n const float nmsThreshold = 0.4;\n for (int i = 0; i < MAX_OUTPUT_BOXES; ++i) {\n float* boxData = outputData + i * (4 + 1 + OUTPUT_SIZE);\n float confidence = boxData[4];\n if (confidence < confidenceThreshold) {\n break;\n }\n float x1 = boxData[0] * image.cols;\n float y1 = boxData[1] * image.rows;\n float x2 = boxData[2] * image.cols;\n float y2 = boxData[3] * image.rows;\n cv::rectangle(image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2);\n std::stringstream labelStream;\n labelStream << labels[static_cast<int>(boxData[5])] << " (" << confidence << ")";\n cv::putText(image, labelStream.str(), cv::Point(x1, y1 - 10), cv::FONT_HERSHEY_SIMPLEX, 0.9, cv::Scalar(0, 255, 0), 2);\n }\n\n // Save output image\n cv::imwrite("output.jpg", image);\n\n // Clean up\n delete[] inputData;\n delete[] outputData;\n cudaStreamDestroy(stream);\n cudaFree(buffers[0]);\n cudaFree(buffers[1]);\n context->destroy();\n engine->destroy();\n runtime->destroy();\n\n return 0;\n}\n\n\n4. 编译代码:使用CMake或其他构建工具将C++代码编译为可执行文件。确保将TensorRT和OpenCV的头文件和库文件正确包含。\n\n5. 运行代码:将导出的ONNX模型、标签文件和输入图像的路径作为命令行参数传递给可执行文件。例如:\n\n\n./yolov5_trt yolov5.onnx labels.txt input.jpg\n\n\n这将运行推理并在当前目录下生成名为output.jpg的输出图像。\n\n请注意,这只是一个简单的示例代码,您可能需要根据自己的环境和需求进行适当的修改和调整。


原文地址: https://www.cveoy.top/t/topic/p5QC 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录