检测网络重映射

Supported on:RVC2 RVC4

此示例演示了在处理立体深度数据的同时，在 RGB 流上运行 YOLOv6 对象检测，并在 RGB 和彩色深度帧上显示边界框。此示例需要 DepthAI v3 API，请参阅安装说明。

管道

源代码

Python

1#!/usr/bin/env python3
2
3import cv2
4import depthai as dai
5import numpy as np
6
7def colorizeDepth(frameDepth):
8    invalidMask = frameDepth == 0
9    # Log the depth, minDepth and maxDepth
10    try:
11        minDepth = np.percentile(frameDepth[frameDepth != 0], 3)
12        maxDepth = np.percentile(frameDepth[frameDepth != 0], 95)
13        logDepth = np.log(frameDepth, where=frameDepth != 0)
14        logMinDepth = np.log(minDepth)
15        logMaxDepth = np.log(maxDepth)
16        np.nan_to_num(logDepth, copy=False, nan=logMinDepth)
17        # Clip the values to be in the 0-255 range
18        logDepth = np.clip(logDepth, logMinDepth, logMaxDepth)
19
20        # Interpolate only valid logDepth values, setting the rest based on the mask
21        depthFrameColor = np.interp(logDepth, (logMinDepth, logMaxDepth), (0, 255))
22        depthFrameColor = np.nan_to_num(depthFrameColor)
23        depthFrameColor = depthFrameColor.astype(np.uint8)
24        depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_JET)
25        # Set invalid depth pixels to black
26        depthFrameColor[invalidMask] = 0
27    except IndexError:
28        # Frame is likely empty
29        depthFrameColor = np.zeros((frameDepth.shape[0], frameDepth.shape[1], 3), dtype=np.uint8)
30    except Exception as e:
31        raise e
32    return depthFrameColor
33
34# Create pipeline
35with dai.Pipeline() as pipeline:
36    cameraNode = pipeline.create(dai.node.Camera).build()
37    detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(cameraNode, dai.NNModelDescription("yolov6-nano"))
38    labelMap = detectionNetwork.getClasses()
39    monoLeft = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_B)
40    monoRight = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_C)
41    stereo = pipeline.create(dai.node.StereoDepth)
42
43    # Linking
44    monoLeftOut = monoLeft.requestOutput((1280, 720))
45    monoRightOut = monoRight.requestOutput((1280, 720))
46    monoLeftOut.link(stereo.left)
47    monoRightOut.link(stereo.right)
48
49    stereo.setRectification(True)
50    stereo.setExtendedDisparity(True)
51    stereo.setLeftRightCheck(True)
52    stereo.setSubpixel(True)
53
54    qRgb = detectionNetwork.passthrough.createOutputQueue()
55    qDet = detectionNetwork.out.createOutputQueue()
56    qDepth = stereo.disparity.createOutputQueue()
57
58    pipeline.start()
59
60    def displayFrame(name: str, frame: dai.ImgFrame, imgDetections: dai.ImgDetections):
61        color = (0, 255, 0)
62        assert imgDetections.getTransformation() is not None
63        cvFrame = frame.getFrame() if frame.getType() == dai.ImgFrame.Type.RAW16 else frame.getCvFrame()
64        if(frame.getType() == dai.ImgFrame.Type.RAW16):
65            cvFrame = colorizeDepth(cvFrame)
66        for detection in imgDetections.detections:
67            # Get the shape of the frame from which the detections originated for denormalization
68            normShape = imgDetections.getTransformation().getSize()
69
70            # Create rotated rectangle to remap
71            # Here we use an intermediate dai.Rect to create a dai.RotatedRect to simplify construction and denormalization
72            rotRect = dai.RotatedRect(dai.Rect(dai.Point2f(detection.xmin, detection.ymin), dai.Point2f(detection.xmax, detection.ymax)).denormalize(normShape[0], normShape[1]), 0)
73            # Remap the detection rectangle to target frame
74            remapped = imgDetections.getTransformation().remapRectTo(frame.getTransformation(), rotRect)
75            # Remapped rectangle could be rotated, so we get the bounding box
76            bbox = [int(l) for l in remapped.getOuterRect()]
77            cv2.putText(
78                cvFrame,
79                labelMap[detection.label],
80                (bbox[0] + 10, bbox[1] + 20),
81                cv2.FONT_HERSHEY_TRIPLEX,
82                0.5,
83                255,
84            )
85            cv2.putText(
86                cvFrame,
87                f"{int(detection.confidence * 100)}%",
88                (bbox[0] + 10, bbox[1] + 40),
89                cv2.FONT_HERSHEY_TRIPLEX,
90                0.5,
91                255,
92            )
93            cv2.rectangle(cvFrame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
94        # Show the frame
95        cv2.imshow(name, cvFrame)
96
97    while pipeline.isRunning():
98        inRgb: dai.ImgFrame = qRgb.get()
99        inDet: dai.ImgDetections = qDet.get()
100        inDepth: dai.ImgFrame = qDepth.get()
101        hasRgb = inRgb is not None
102        hasDepth = inDepth is not None
103        hasDet = inDet is not None
104        if hasRgb:
105            displayFrame("rgb", inRgb, inDet)
106        if hasDepth:
107            displayFrame("depth", inDepth, inDet)
108        if cv2.waitKey(1) == ord("q"):
109            pipeline.stop()
110            break

C++

GitHub

1#include <algorithm>  // Required for std::sort and std::unique
2#include <cmath>      // Required for std::log, std::isnan, std::isinf
3#include <iostream>
4#include <opencv2/opencv.hpp>
5#include <string>
6#include <vector>
7
8#include "depthai/depthai.hpp"
9#include "xtensor/containers/xadapt.hpp"
10#include "xtensor/core/xmath.hpp"
11
12cv::Mat colorizeDepth(cv::Mat frameDepth) {
13    cv::Mat invalidMask = frameDepth == 0;
14    cv::Mat depthFrameColor;
15
16    try {
17        cv::Mat frameDepthFloat;
18        frameDepth.convertTo(frameDepthFloat, CV_32F);
19        xt::xtensor<float, 2> depth =
20            xt::adapt((float*)frameDepthFloat.data, {static_cast<size_t>(frameDepthFloat.rows), static_cast<size_t>(frameDepthFloat.cols)});
21
22        // Get valid depth values (non-zero)
23        std::vector<float> validDepth;
24        validDepth.reserve(depth.size());
25        std::copy_if(depth.begin(), depth.end(), std::back_inserter(validDepth), [](float x) { return x != 0; });
26
27        if(validDepth.size() == 0) {
28            return cv::Mat::zeros(frameDepth.rows, frameDepth.cols, CV_8UC3);
29        }
30
31        // Calculate percentiles
32        std::sort(validDepth.begin(), validDepth.end());
33        float minDepth = validDepth[static_cast<size_t>(validDepth.size() * 0.03)];
34        float maxDepth = validDepth[static_cast<size_t>(validDepth.size() * 0.95)];
35
36        // Take log of depth values
37        auto logDepth = xt::eval(xt::log(depth));
38        float logMinDepth = std::log(minDepth);
39        float logMaxDepth = std::log(maxDepth);
40
41        // Replace invalid values with logMinDepth using a naive implementation
42        auto logDepthData = logDepth.data();
43        auto depthData = depth.data();
44        const size_t size = depth.size();
45        for(size_t i = 0; i < size; i++) {
46            if(std::isnan(logDepthData[i]) || std::isinf(logDepthData[i]) || depthData[i] == 0.0f) {
47                logDepthData[i] = logMinDepth;
48            }
49        }
50
51        // Clip values
52        logDepth = xt::clip(logDepth, logMinDepth, logMaxDepth);
53
54        // Normalize to 0-255 range
55        auto normalizedDepth = (logDepth - logMinDepth) / (logMaxDepth - logMinDepth) * 255.0f;
56
57        // Convert to CV_8UC1
58        cv::Mat depthMat(frameDepth.rows, frameDepth.cols, CV_8UC1);
59        std::transform(normalizedDepth.begin(), normalizedDepth.end(), depthMat.data, [](float x) { return static_cast<uchar>(x); });
60
61        // Apply colormap
62        cv::applyColorMap(depthMat, depthFrameColor, cv::COLORMAP_JET);
63
64        // Set invalid pixels to black
65        depthFrameColor.setTo(cv::Scalar(0, 0, 0), invalidMask);
66
67    } catch(const std::exception& e) {
68        std::cerr << "Error in colorizeDepth: " << e.what() << std::endl;
69        return cv::Mat::zeros(frameDepth.rows, frameDepth.cols, CV_8UC3);
70    }
71
72    return depthFrameColor;
73}
74
75// Helper function to display frames with detections
76void displayFrame(const std::string& name,
77                  std::shared_ptr<dai::ImgFrame> frame,
78                  std::shared_ptr<dai::ImgDetections> imgDetections,
79                  const std::vector<std::string>& labelMap) {
80    cv::Scalar color(0, 255, 0);
81    cv::Mat cvFrame;
82
83    if(frame->getType() == dai::ImgFrame::Type::RAW16) {
84        cvFrame = colorizeDepth(frame->getFrame());
85    } else {
86        cvFrame = frame->getCvFrame();
87    }
88
89    if(!imgDetections || !imgDetections->transformation.has_value()) {
90        // std::cout << "No detections or transformation data for " << name << std::endl;
91        cv::imshow(name, cvFrame);
92        return;
93    }
94
95    const auto& sourceTransform = *(imgDetections->transformation);
96    const auto& targetTransform = frame->transformation;
97
98    for(const auto& detection : imgDetections->detections) {
99        auto normShape = sourceTransform.getSize();
100
101        dai::Rect rect(dai::Point2f(detection.xmin, detection.ymin), dai::Point2f(detection.xmax, detection.ymax));
102        rect = rect.denormalize(static_cast<float>(normShape.first), static_cast<float>(normShape.second));
103        dai::RotatedRect rotRect(rect, 0);
104
105        auto remapped = sourceTransform.remapRectTo(targetTransform, rotRect);
106        auto bbox = remapped.getOuterRect();
107
108        cv::putText(cvFrame,
109                    labelMap[detection.label],
110                    cv::Point(static_cast<int>(bbox[0]) + 10, static_cast<int>(bbox[1]) + 20),
111                    cv::FONT_HERSHEY_TRIPLEX,
112                    0.5,
113                    cv::Scalar(255, 255, 255));
114        cv::putText(cvFrame,
115                    std::to_string(static_cast<int>(detection.confidence * 100)) + "%",
116                    cv::Point(static_cast<int>(bbox[0]) + 10, static_cast<int>(bbox[1]) + 40),
117                    cv::FONT_HERSHEY_TRIPLEX,
118                    0.5,
119                    cv::Scalar(255, 255, 255));
120        cv::rectangle(cvFrame,
121                      cv::Point(static_cast<int>(bbox[0]), static_cast<int>(bbox[1])),
122                      cv::Point(static_cast<int>(bbox[2]), static_cast<int>(bbox[3])),
123                      color,
124                      2);
125    }
126    cv::imshow(name, cvFrame);
127}
128
129int main() {
130    dai::Pipeline pipeline;
131
132    auto cameraNode = pipeline.create<dai::node::Camera>();
133    cameraNode->build();
134
135    auto detectionNetwork = pipeline.create<dai::node::DetectionNetwork>();
136    dai::NNModelDescription modelDescription;
137    modelDescription.model = "yolov6-nano";
138    detectionNetwork->build(cameraNode, modelDescription);
139    auto labelMap = detectionNetwork->getClasses().value_or(std::vector<std::string>{});
140
141    auto monoLeft = pipeline.create<dai::node::Camera>();
142    monoLeft->build(dai::CameraBoardSocket::CAM_B);
143    auto monoRight = pipeline.create<dai::node::Camera>();
144    monoRight->build(dai::CameraBoardSocket::CAM_C);
145    auto stereo = pipeline.create<dai::node::StereoDepth>();
146
147    // Linking
148    auto monoLeftOut = monoLeft->requestOutput(std::make_pair(1280, 720));
149    auto monoRightOut = monoRight->requestOutput(std::make_pair(1280, 720));
150    monoLeftOut->link(stereo->left);
151    monoRightOut->link(stereo->right);
152
153    stereo->setRectification(true);
154    stereo->setExtendedDisparity(true);
155    stereo->setLeftRightCheck(true);
156    stereo->setSubpixel(true);
157
158    auto qRgb = detectionNetwork->passthrough.createOutputQueue();
159    auto qDet = detectionNetwork->out.createOutputQueue();
160    auto qDepth = stereo->disparity.createOutputQueue();
161
162    pipeline.start();
163
164    while(pipeline.isRunning()) {
165        auto inRgb = qRgb->tryGet<dai::ImgFrame>();
166        auto inDet = qDet->tryGet<dai::ImgDetections>();
167        auto inDepth = qDepth->tryGet<dai::ImgFrame>();
168
169        bool hasRgb = inRgb != nullptr;
170        bool hasDepth = inDepth != nullptr;
171        bool hasDet = inDet != nullptr;
172
173        if(hasRgb && hasDet) {
174            displayFrame("rgb", inRgb, inDet, labelMap);
175        }
176        if(hasDepth && hasDet) {
177            displayFrame("depth", inDepth, inDet, labelMap);
178        }
179
180        if(cv::waitKey(1) == 'q') {
181            pipeline.stop();
182            break;
183        }
184    }
185
186    return 0;
187}

需要帮助？

请前往 Discussion Forum 获取技术支持或提出您可能有的任何其他问题。

本页目录

检测网络重映射View as Markdown

管道

源代码

Python

C++

需要帮助？

检测网络重映射