如何使用tensorr部署yolov5模型并给出相应的c++代码
要使用TensorRT来部署Yolov5模型,您需要按照以下步骤进行操作:
-
安装TensorRT:根据您的操作系统和CUDA版本,从NVIDIA官方网站下载并安装TensorRT。确保您已正确设置CUDA环境变量。
-
导出Yolov5模型:将训练好的Yolov5模型导出为ONNX格式。可以使用官方提供的导出脚本
export.py,也可以从GitHub上下载已导出的ONNX模型。 -
编写C++代码:使用C++编写TensorRT推理代码。以下是一个简单的示例:
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <string>
#include <opencv2/opencv.hpp>
#include <NvInfer.h>
#include <NvOnnxParser.h>
#include <cuda_runtime_api.h>
const int INPUT_W = 640;
const int INPUT_H = 640;
const int OUTPUT_SIZE = 85;
const int MAX_OUTPUT_BOXES = 1000;
std::vector<std::string> getLabels(const std::string& filename) {
std::ifstream file(filename);
std::vector<std::string> labels;
std::string line;
while (std::getline(file, line)) {
labels.push_back(line);
}
return labels;
}
void preprocessImage(const cv::Mat& image, float* inputData) {
cv::Mat resizedImage;
cv::resize(image, resizedImage, cv::Size(INPUT_W, INPUT_H));
resizedImage.convertTo(resizedImage, CV_32FC3, 1.0 / 255.0);
cv::Mat channels[3];
cv::split(resizedImage, channels);
float* inputDataPtr = inputData;
memcpy(inputDataPtr, channels[2].data, INPUT_W * INPUT_H * sizeof(float));
inputDataPtr += INPUT_W * INPUT_H;
memcpy(inputDataPtr, channels[1].data, INPUT_W * INPUT_H * sizeof(float));
inputDataPtr += INPUT_W * INPUT_H;
memcpy(inputDataPtr, channels[0].data, INPUT_W * INPUT_H * sizeof(float));
}
int main(int argc, char** argv) {
if (argc != 4) {
std::cerr << "Usage: " << argv[0] << " <onnx_model_path> <label_path> <image_path>" << std::endl;
return 1;
}
const std::string onnxModelPath = argv[1];
const std::string labelPath = argv[2];
const std::string imagePath = argv[3];
// Load labels
std::vector<std::string> labels = getLabels(labelPath);
// Load image
cv::Mat image = cv::imread(imagePath);
if (image.empty()) {
std::cerr << "Failed to load image: " << imagePath << std::endl;
return 1;
}
// Create TensorRT engine
nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);
assert(runtime != nullptr);
nvinfer1::ICudaEngine* engine = nullptr;
nvinfer1::IHostMemory* engineBuffer = nullptr;
{
std::ifstream engineFile(onnxModelPath + ".engine", std::ios::binary);
if (engineFile.good()) {
std::stringstream engineStream;
engineStream << engineFile.rdbuf();
std::string engineStr = engineStream.str();
engine = runtime->deserializeCudaEngine(engineStr.c_str(), engineStr.size(), nullptr);
}
else {
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U);
auto parser = nvonnxparser::createParser(*network, gLogger);
parser->parseFromFile(onnxModelPath.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kWARNING));
builder->setMaxBatchSize(1);
builder->setMaxWorkspaceSize(1 << 30);
engine = builder->buildCudaEngine(*network);
engineBuffer = engine->serialize();
std::ofstream engineFile(onnxModelPath + ".engine", std::ios::binary);
engineFile.write(reinterpret_cast<const char*>(engineBuffer->data()), engineBuffer->size());
}
}
assert(engine != nullptr);
// Create execution context
nvinfer1::IExecutionContext* context = engine->createExecutionContext();
assert(context != nullptr);
// Allocate buffers
void* buffers[2];
cudaMalloc(&buffers[0], 3 * INPUT_W * INPUT_H * sizeof(float)); // Input
cudaMalloc(&buffers[1], MAX_OUTPUT_BOXES * sizeof(float) * (4 + 1 + OUTPUT_SIZE)); // Output
cudaStream_t stream;
cudaStreamCreate(&stream);
// Preprocess image
float* inputData = new float[3 * INPUT_W * INPUT_H];
preprocessImage(image, inputData);
// Copy input data to GPU
cudaMemcpyAsync(buffers[0], inputData, 3 * INPUT_W * INPUT_H * sizeof(float), cudaMemcpyHostToDevice, stream);
// Run inference
context->enqueue(1, buffers, stream, nullptr);
// Copy output data from GPU
float* outputData = new float[MAX_OUTPUT_BOXES * (4 + 1 + OUTPUT_SIZE)];
cudaMemcpyAsync(outputData, buffers[1], MAX_OUTPUT_BOXES * sizeof(float) * (4 + 1 + OUTPUT_SIZE), cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
// Process output data
const float confidenceThreshold = 0.5;
const float nmsThreshold = 0.4;
for (int i = 0; i < MAX_OUTPUT_BOXES; ++i) {
float* boxData = outputData + i * (4 + 1 + OUTPUT_SIZE);
float confidence = boxData[4];
if (confidence < confidenceThreshold) {
break;
}
float x1 = boxData[0] * image.cols;
float y1 = boxData[1] * image.rows;
float x2 = boxData[2] * image.cols;
float y2 = boxData[3] * image.rows;
cv::rectangle(image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2);
std::stringstream labelStream;
labelStream << labels[static_cast<int>(boxData[5])] << " (" << confidence << ")";
cv::putText(image, labelStream.str(), cv::Point(x1, y1 - 10), cv::FONT_HERSHEY_SIMPLEX, 0.9, cv::Scalar(0, 255, 0), 2);
}
// Save output image
cv::imwrite("output.jpg", image);
// Clean up
delete[] inputData;
delete[] outputData;
cudaStreamDestroy(stream);
cudaFree(buffers[0]);
cudaFree(buffers[1]);
context->destroy();
engine->destroy();
runtime->destroy();
return 0;
}
-
编译代码:使用CMake或其他构建工具将C++代码编译为可执行文件。确保将TensorRT和OpenCV的头文件和库文件正确包含。
-
运行代码:将导出的ONNX模型、标签文件和输入图像的路径作为命令行参数传递给可执行文件。例如:
./yolov5_trt yolov5.onnx labels.txt input.jpg
这将运行推理并在当前目录下生成名为output.jpg的输出图像。
请注意,这只是一个简单的示例代码,您可能需要根据自己的环境和需求进行适当的修改和调整
原文地址: https://www.cveoy.top/t/topic/inUo 著作权归作者所有。请勿转载和采集!