要使用TensorRT来部署Yolov5模型,您需要按照以下步骤进行操作:

  1. 安装TensorRT:根据您的操作系统和CUDA版本,从NVIDIA官方网站下载并安装TensorRT。确保您已正确设置CUDA环境变量。

  2. 导出Yolov5模型:将训练好的Yolov5模型导出为ONNX格式。可以使用官方提供的导出脚本export.py,也可以从GitHub上下载已导出的ONNX模型。

  3. 编写C++代码:使用C++编写TensorRT推理代码。以下是一个简单的示例:

#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <string>
#include <opencv2/opencv.hpp>
#include <NvInfer.h>
#include <NvOnnxParser.h>
#include <cuda_runtime_api.h>

const int INPUT_W = 640;
const int INPUT_H = 640;
const int OUTPUT_SIZE = 85;
const int MAX_OUTPUT_BOXES = 1000;

std::vector<std::string> getLabels(const std::string& filename) {
    std::ifstream file(filename);
    std::vector<std::string> labels;
    std::string line;
    while (std::getline(file, line)) {
        labels.push_back(line);
    }
    return labels;
}

void preprocessImage(const cv::Mat& image, float* inputData) {
    cv::Mat resizedImage;
    cv::resize(image, resizedImage, cv::Size(INPUT_W, INPUT_H));
    resizedImage.convertTo(resizedImage, CV_32FC3, 1.0 / 255.0);
    cv::Mat channels[3];
    cv::split(resizedImage, channels);
    float* inputDataPtr = inputData;
    memcpy(inputDataPtr, channels[2].data, INPUT_W * INPUT_H * sizeof(float));
    inputDataPtr += INPUT_W * INPUT_H;
    memcpy(inputDataPtr, channels[1].data, INPUT_W * INPUT_H * sizeof(float));
    inputDataPtr += INPUT_W * INPUT_H;
    memcpy(inputDataPtr, channels[0].data, INPUT_W * INPUT_H * sizeof(float));
}

int main(int argc, char** argv) {
    if (argc != 4) {
        std::cerr << "Usage: " << argv[0] << " <onnx_model_path> <label_path> <image_path>" << std::endl;
        return 1;
    }

    const std::string onnxModelPath = argv[1];
    const std::string labelPath = argv[2];
    const std::string imagePath = argv[3];

    // Load labels
    std::vector<std::string> labels = getLabels(labelPath);

    // Load image
    cv::Mat image = cv::imread(imagePath);
    if (image.empty()) {
        std::cerr << "Failed to load image: " << imagePath << std::endl;
        return 1;
    }

    // Create TensorRT engine
    nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);
    assert(runtime != nullptr);
    
    nvinfer1::ICudaEngine* engine = nullptr;
    nvinfer1::IHostMemory* engineBuffer = nullptr;
    {
        std::ifstream engineFile(onnxModelPath + ".engine", std::ios::binary);
        if (engineFile.good()) {
            std::stringstream engineStream;
            engineStream << engineFile.rdbuf();
            std::string engineStr = engineStream.str();
            engine = runtime->deserializeCudaEngine(engineStr.c_str(), engineStr.size(), nullptr);
        }
        else {
            nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
            nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U);
            auto parser = nvonnxparser::createParser(*network, gLogger);
            parser->parseFromFile(onnxModelPath.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kWARNING));
            builder->setMaxBatchSize(1);
            builder->setMaxWorkspaceSize(1 << 30);
            engine = builder->buildCudaEngine(*network);
            engineBuffer = engine->serialize();
            std::ofstream engineFile(onnxModelPath + ".engine", std::ios::binary);
            engineFile.write(reinterpret_cast<const char*>(engineBuffer->data()), engineBuffer->size());
        }
    }
    assert(engine != nullptr);

    // Create execution context
    nvinfer1::IExecutionContext* context = engine->createExecutionContext();
    assert(context != nullptr);

    // Allocate buffers
    void* buffers[2];
    cudaMalloc(&buffers[0], 3 * INPUT_W * INPUT_H * sizeof(float)); // Input
    cudaMalloc(&buffers[1], MAX_OUTPUT_BOXES * sizeof(float) * (4 + 1 + OUTPUT_SIZE)); // Output
    cudaStream_t stream;
    cudaStreamCreate(&stream);

    // Preprocess image
    float* inputData = new float[3 * INPUT_W * INPUT_H];
    preprocessImage(image, inputData);

    // Copy input data to GPU
    cudaMemcpyAsync(buffers[0], inputData, 3 * INPUT_W * INPUT_H * sizeof(float), cudaMemcpyHostToDevice, stream);
    
    // Run inference
    context->enqueue(1, buffers, stream, nullptr);

    // Copy output data from GPU
    float* outputData = new float[MAX_OUTPUT_BOXES * (4 + 1 + OUTPUT_SIZE)];
    cudaMemcpyAsync(outputData, buffers[1], MAX_OUTPUT_BOXES * sizeof(float) * (4 + 1 + OUTPUT_SIZE), cudaMemcpyDeviceToHost, stream);
    cudaStreamSynchronize(stream);

    // Process output data
    const float confidenceThreshold = 0.5;
    const float nmsThreshold = 0.4;
    for (int i = 0; i < MAX_OUTPUT_BOXES; ++i) {
        float* boxData = outputData + i * (4 + 1 + OUTPUT_SIZE);
        float confidence = boxData[4];
        if (confidence < confidenceThreshold) {
            break;
        }
        float x1 = boxData[0] * image.cols;
        float y1 = boxData[1] * image.rows;
        float x2 = boxData[2] * image.cols;
        float y2 = boxData[3] * image.rows;
        cv::rectangle(image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2);
        std::stringstream labelStream;
        labelStream << labels[static_cast<int>(boxData[5])] << " (" << confidence << ")";
        cv::putText(image, labelStream.str(), cv::Point(x1, y1 - 10), cv::FONT_HERSHEY_SIMPLEX, 0.9, cv::Scalar(0, 255, 0), 2);
    }

    // Save output image
    cv::imwrite("output.jpg", image);

    // Clean up
    delete[] inputData;
    delete[] outputData;
    cudaStreamDestroy(stream);
    cudaFree(buffers[0]);
    cudaFree(buffers[1]);
    context->destroy();
    engine->destroy();
    runtime->destroy();

    return 0;
}
  1. 编译代码:使用CMake或其他构建工具将C++代码编译为可执行文件。确保将TensorRT和OpenCV的头文件和库文件正确包含。

  2. 运行代码:将导出的ONNX模型、标签文件和输入图像的路径作为命令行参数传递给可执行文件。例如:

./yolov5_trt yolov5.onnx labels.txt input.jpg

这将运行推理并在当前目录下生成名为output.jpg的输出图像。

请注意,这只是一个简单的示例代码,您可能需要根据自己的环境和需求进行适当的修改和调整

如何使用tensorr部署yolov5模型并给出相应的c++代码

原文地址: https://www.cveoy.top/t/topic/inUo 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录