Detection network Remap

Supported on:RVC2 RVC4

This example demonstrates running YOLOv6 object detection on an RGB stream while simultaneously processing stereo depth data, displaying bounding boxes on both the RGB and colorized depth frames.This example requires the DepthAI v3 API, see installation instructions.

Pipeline

Source code

Python

GitHub

1#!/usr/bin/env python3
2
3import cv2
4import depthai as dai
5import numpy as np
6
7def colorizeDepth(frameDepth):
8    invalidMask = frameDepth == 0
9    # Log the depth, minDepth and maxDepth
10    try:
11        minDepth = np.percentile(frameDepth[frameDepth != 0], 3)
12        maxDepth = np.percentile(frameDepth[frameDepth != 0], 95)
13        logDepth = np.log(frameDepth, where=frameDepth != 0)
14        logMinDepth = np.log(minDepth)
15        logMaxDepth = np.log(maxDepth)
16        np.nan_to_num(logDepth, copy=False, nan=logMinDepth)
17        # Clip the values to be in the 0-255 range
18        logDepth = np.clip(logDepth, logMinDepth, logMaxDepth)
19
20        # Interpolate only valid logDepth values, setting the rest based on the mask
21        depthFrameColor = np.interp(logDepth, (logMinDepth, logMaxDepth), (0, 255))
22        depthFrameColor = np.nan_to_num(depthFrameColor)
23        depthFrameColor = depthFrameColor.astype(np.uint8)
24        depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_JET)
25        # Set invalid depth pixels to black
26        depthFrameColor[invalidMask] = 0
27    except IndexError:
28        # Frame is likely empty
29        depthFrameColor = np.zeros((frameDepth.shape[0], frameDepth.shape[1], 3), dtype=np.uint8)
30    except Exception as e:
31        raise e
32    return depthFrameColor
33
34# Create pipeline
35with dai.Pipeline() as pipeline:
36    cameraNode = pipeline.create(dai.node.Camera).build()
37    detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(cameraNode, dai.NNModelDescription("yolov6-nano"))
38    labelMap = detectionNetwork.getClasses()
39    monoLeft = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_B)
40    monoRight = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_C)
41    stereo = pipeline.create(dai.node.StereoDepth)
42
43    # Linking
44    monoLeftOut = monoLeft.requestOutput((1280, 720))
45    monoRightOut = monoRight.requestOutput((1280, 720))
46    monoLeftOut.link(stereo.left)
47    monoRightOut.link(stereo.right)
48
49    stereo.setRectification(True)
50    stereo.setExtendedDisparity(True)
51    stereo.setLeftRightCheck(True)
52    stereo.setSubpixel(True)
53
54    qRgb = detectionNetwork.passthrough.createOutputQueue()
55    qDet = detectionNetwork.out.createOutputQueue()
56    qDepth = stereo.disparity.createOutputQueue()
57
58    pipeline.start()
59
60    def displayFrame(name: str, frame: dai.ImgFrame, imgDetections: dai.ImgDetections):
61        color = (0, 255, 0)
62        assert imgDetections.getTransformation() is not None
63        cvFrame = frame.getFrame() if frame.getType() == dai.ImgFrame.Type.RAW16 else frame.getCvFrame()
64        if(frame.getType() == dai.ImgFrame.Type.RAW16):
65            cvFrame = colorizeDepth(cvFrame)
66        for detection in imgDetections.detections:
67            # Get the shape of the frame from which the detections originated for denormalization
68            normShape = imgDetections.getTransformation().getSize()
69
70            # Create rotated rectangle to remap
71            # Here we use an intermediate dai.Rect to create a dai.RotatedRect to simplify construction and denormalization
72            rotRect = dai.RotatedRect(dai.Rect(dai.Point2f(detection.xmin, detection.ymin), dai.Point2f(detection.xmax, detection.ymax)).denormalize(normShape[0], normShape[1]), 0)
73            # Remap the detection rectangle to target frame
74            remapped = imgDetections.getTransformation().remapRectTo(frame.getTransformation(), rotRect)
75            # Remapped rectangle could be rotated, so we get the bounding box
76            bbox = [int(l) for l in remapped.getOuterRect()]
77            cv2.putText(
78                cvFrame,
79                labelMap[detection.label],
80                (bbox[0] + 10, bbox[1] + 20),
81                cv2.FONT_HERSHEY_TRIPLEX,
82                0.5,
83                255,
84            )
85            cv2.putText(
86                cvFrame,
87                f"{int(detection.confidence * 100)}%",
88                (bbox[0] + 10, bbox[1] + 40),
89                cv2.FONT_HERSHEY_TRIPLEX,
90                0.5,
91                255,
92            )
93            cv2.rectangle(cvFrame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
94        # Show the frame
95        cv2.imshow(name, cvFrame)
96
97    while pipeline.isRunning():
98        inRgb: dai.ImgFrame = qRgb.get()
99        inDet: dai.ImgDetections = qDet.get()
100        inDepth: dai.ImgFrame = qDepth.get()
101        hasRgb = inRgb is not None
102        hasDepth = inDepth is not None
103        hasDet = inDet is not None
104        if hasRgb:
105            displayFrame("rgb", inRgb, inDet)
106        if hasDepth:
107            displayFrame("depth", inDepth, inDet)
108        if cv2.waitKey(1) == ord("q"):
109            pipeline.stop()
110            break

C++

GitHub

1#include <algorithm>  // Required for std::sort and std::unique
2#include <cmath>      // Required for std::log, std::isnan, std::isinf
3#include <csignal>
4#include <iostream>
5#include <opencv2/opencv.hpp>
6#include <string>
7#include <vector>
8
9#include "depthai/depthai.hpp"
10#include "xtensor/containers/xadapt.hpp"
11#include "xtensor/core/xmath.hpp"
12
13std::atomic<bool> quitEvent(false);
14
15void signalHandler(int) {
16    quitEvent = true;
17}
18
19cv::Mat colorizeDepth(cv::Mat frameDepth) {
20    cv::Mat invalidMask = frameDepth == 0;
21    cv::Mat depthFrameColor;
22
23    try {
24        cv::Mat frameDepthFloat;
25        frameDepth.convertTo(frameDepthFloat, CV_32F);
26        xt::xtensor<float, 2> depth =
27            xt::adapt((float*)frameDepthFloat.data, {static_cast<size_t>(frameDepthFloat.rows), static_cast<size_t>(frameDepthFloat.cols)});
28
29        // Get valid depth values (non-zero)
30        std::vector<float> validDepth;
31        validDepth.reserve(depth.size());
32        std::copy_if(depth.begin(), depth.end(), std::back_inserter(validDepth), [](float x) { return x != 0; });
33
34        if(validDepth.size() == 0) {
35            return cv::Mat::zeros(frameDepth.rows, frameDepth.cols, CV_8UC3);
36        }
37
38        // Calculate percentiles
39        std::sort(validDepth.begin(), validDepth.end());
40        float minDepth = validDepth[static_cast<size_t>(validDepth.size() * 0.03)];
41        float maxDepth = validDepth[static_cast<size_t>(validDepth.size() * 0.95)];
42
43        // Take log of depth values
44        auto logDepth = xt::eval(xt::log(depth));
45        float logMinDepth = std::log(minDepth);
46        float logMaxDepth = std::log(maxDepth);
47
48        // Replace invalid values with logMinDepth using a naive implementation
49        auto logDepthData = logDepth.data();
50        auto depthData = depth.data();
51        const size_t size = depth.size();
52        for(size_t i = 0; i < size; i++) {
53            if(std::isnan(logDepthData[i]) || std::isinf(logDepthData[i]) || depthData[i] == 0.0f) {
54                logDepthData[i] = logMinDepth;
55            }
56        }
57
58        // Clip values
59        logDepth = xt::clip(logDepth, logMinDepth, logMaxDepth);
60
61        // Normalize to 0-255 range
62        auto normalizedDepth = (logDepth - logMinDepth) / (logMaxDepth - logMinDepth) * 255.0f;
63
64        // Convert to CV_8UC1
65        cv::Mat depthMat(frameDepth.rows, frameDepth.cols, CV_8UC1);
66        std::transform(normalizedDepth.begin(), normalizedDepth.end(), depthMat.data, [](float x) { return static_cast<uchar>(x); });
67
68        // Apply colormap
69        cv::applyColorMap(depthMat, depthFrameColor, cv::COLORMAP_JET);
70
71        // Set invalid pixels to black
72        depthFrameColor.setTo(cv::Scalar(0, 0, 0), invalidMask);
73
74    } catch(const std::exception& e) {
75        std::cerr << "Error in colorizeDepth: " << e.what() << std::endl;
76        return cv::Mat::zeros(frameDepth.rows, frameDepth.cols, CV_8UC3);
77    }
78
79    return depthFrameColor;
80}
81
82// Helper function to display frames with detections
83void displayFrame(const std::string& name,
84                  std::shared_ptr<dai::ImgFrame> frame,
85                  std::shared_ptr<dai::ImgDetections> imgDetections,
86                  const std::vector<std::string>& labelMap) {
87    cv::Scalar color(0, 255, 0);
88    cv::Mat cvFrame;
89
90    if(frame->getType() == dai::ImgFrame::Type::RAW16) {
91        cvFrame = colorizeDepth(frame->getFrame());
92    } else {
93        cvFrame = frame->getCvFrame();
94    }
95
96    if(!imgDetections || !imgDetections->transformation.has_value()) {
97        // std::cout << "No detections or transformation data for " << name << std::endl;
98        cv::imshow(name, cvFrame);
99        return;
100    }
101
102    const auto& sourceTransform = *(imgDetections->transformation);
103    const auto& targetTransform = frame->transformation;
104
105    for(const auto& detection : imgDetections->detections) {
106        auto normShape = sourceTransform.getSize();
107
108        dai::Rect rect(dai::Point2f(detection.xmin, detection.ymin), dai::Point2f(detection.xmax, detection.ymax));
109        rect = rect.denormalize(static_cast<float>(normShape.first), static_cast<float>(normShape.second));
110        dai::RotatedRect rotRect(rect, 0);
111
112        auto remapped = sourceTransform.remapRectTo(targetTransform, rotRect);
113        auto bbox = remapped.getOuterRect();
114
115        cv::putText(cvFrame,
116                    labelMap[detection.label],
117                    cv::Point(static_cast<int>(bbox[0]) + 10, static_cast<int>(bbox[1]) + 20),
118                    cv::FONT_HERSHEY_TRIPLEX,
119                    0.5,
120                    cv::Scalar(255, 255, 255));
121        cv::putText(cvFrame,
122                    std::to_string(static_cast<int>(detection.confidence * 100)) + "%",
123                    cv::Point(static_cast<int>(bbox[0]) + 10, static_cast<int>(bbox[1]) + 40),
124                    cv::FONT_HERSHEY_TRIPLEX,
125                    0.5,
126                    cv::Scalar(255, 255, 255));
127        cv::rectangle(cvFrame,
128                      cv::Point(static_cast<int>(bbox[0]), static_cast<int>(bbox[1])),
129                      cv::Point(static_cast<int>(bbox[2]), static_cast<int>(bbox[3])),
130                      color,
131                      2);
132    }
133    cv::imshow(name, cvFrame);
134}
135
136int main() {
137    signal(SIGTERM, signalHandler);
138    signal(SIGINT, signalHandler);
139
140    dai::Pipeline pipeline;
141
142    auto cameraNode = pipeline.create<dai::node::Camera>();
143    cameraNode->build();
144
145    auto detectionNetwork = pipeline.create<dai::node::DetectionNetwork>();
146    dai::NNModelDescription modelDescription;
147    modelDescription.model = "yolov6-nano";
148    detectionNetwork->build(cameraNode, modelDescription);
149    auto labelMap = detectionNetwork->getClasses().value_or(std::vector<std::string>{});
150
151    auto monoLeft = pipeline.create<dai::node::Camera>();
152    monoLeft->build(dai::CameraBoardSocket::CAM_B);
153    auto monoRight = pipeline.create<dai::node::Camera>();
154    monoRight->build(dai::CameraBoardSocket::CAM_C);
155    auto stereo = pipeline.create<dai::node::StereoDepth>();
156
157    // Linking
158    auto monoLeftOut = monoLeft->requestOutput(std::make_pair(1280, 720));
159    auto monoRightOut = monoRight->requestOutput(std::make_pair(1280, 720));
160    monoLeftOut->link(stereo->left);
161    monoRightOut->link(stereo->right);
162
163    stereo->setRectification(true);
164    stereo->setExtendedDisparity(true);
165    stereo->setLeftRightCheck(true);
166    stereo->setSubpixel(true);
167
168    auto qRgb = detectionNetwork->passthrough.createOutputQueue();
169    auto qDet = detectionNetwork->out.createOutputQueue();
170    auto qDepth = stereo->disparity.createOutputQueue();
171
172    pipeline.start();
173
174    while(pipeline.isRunning() && !quitEvent) {
175        auto inRgb = qRgb->tryGet<dai::ImgFrame>();
176        auto inDet = qDet->tryGet<dai::ImgDetections>();
177        auto inDepth = qDepth->tryGet<dai::ImgFrame>();
178
179        bool hasRgb = inRgb != nullptr;
180        bool hasDepth = inDepth != nullptr;
181        bool hasDet = inDet != nullptr;
182
183        if(hasRgb && hasDet) {
184            displayFrame("rgb", inRgb, inDet, labelMap);
185        }
186        if(hasDepth && hasDet) {
187            displayFrame("depth", inDepth, inDet, labelMap);
188        }
189
190        if(cv::waitKey(1) == 'q') {
191            pipeline.stop();
192            break;
193        }
194    }
195
196    return 0;
197}

Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.

ON THIS PAGE

Detection network Remap

Pipeline

Source code

Python

C++

Need assistance?