DepthAI
Software Stack

ON THIS PAGE

  • Pipeline
  • Source code

Detection network Remap

Supported on:RVC2RVC4
This example demonstrates running YOLOv6 object detection on an RGB stream while simultaneously processing stereo depth data, displaying bounding boxes on both the RGB and colorized depth frames.This example requires the DepthAI v3 API, see installation instructions.

Pipeline

Source code

Python

Python
GitHub
1#!/usr/bin/env python3
2
3import cv2
4import depthai as dai
5import numpy as np
6
7def colorizeDepth(frameDepth):
8    invalidMask = frameDepth == 0
9    # Log the depth, minDepth and maxDepth
10    try:
11        minDepth = np.percentile(frameDepth[frameDepth != 0], 3)
12        maxDepth = np.percentile(frameDepth[frameDepth != 0], 95)
13        logDepth = np.log(frameDepth, where=frameDepth != 0)
14        logMinDepth = np.log(minDepth)
15        logMaxDepth = np.log(maxDepth)
16        np.nan_to_num(logDepth, copy=False, nan=logMinDepth)
17        # Clip the values to be in the 0-255 range
18        logDepth = np.clip(logDepth, logMinDepth, logMaxDepth)
19
20        # Interpolate only valid logDepth values, setting the rest based on the mask
21        depthFrameColor = np.interp(logDepth, (logMinDepth, logMaxDepth), (0, 255))
22        depthFrameColor = np.nan_to_num(depthFrameColor)
23        depthFrameColor = depthFrameColor.astype(np.uint8)
24        depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_JET)
25        # Set invalid depth pixels to black
26        depthFrameColor[invalidMask] = 0
27    except IndexError:
28        # Frame is likely empty
29        depthFrameColor = np.zeros((frameDepth.shape[0], frameDepth.shape[1], 3), dtype=np.uint8)
30    except Exception as e:
31        raise e
32    return depthFrameColor
33
34# Create pipeline
35with dai.Pipeline() as pipeline:
36    cameraNode = pipeline.create(dai.node.Camera).build()
37    detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(cameraNode, dai.NNModelDescription("yolov6-nano"))
38    labelMap = detectionNetwork.getClasses()
39    monoLeft = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_B)
40    monoRight = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_C)
41    stereo = pipeline.create(dai.node.StereoDepth)
42
43    # Linking
44    monoLeftOut = monoLeft.requestOutput((1280, 720))
45    monoRightOut = monoRight.requestOutput((1280, 720))
46    monoLeftOut.link(stereo.left)
47    monoRightOut.link(stereo.right)
48
49    stereo.setRectification(True)
50    stereo.setExtendedDisparity(True)
51    stereo.setLeftRightCheck(True)
52    stereo.setSubpixel(True)
53
54    qRgb = detectionNetwork.passthrough.createOutputQueue()
55    qDet = detectionNetwork.out.createOutputQueue()
56    qDepth = stereo.disparity.createOutputQueue()
57
58    pipeline.start()
59
60    def displayFrame(name: str, frame: dai.ImgFrame, imgDetections: dai.ImgDetections):
61        color = (0, 255, 0)
62        assert imgDetections.getTransformation() is not None
63        cvFrame = frame.getFrame() if frame.getType() == dai.ImgFrame.Type.RAW16 else frame.getCvFrame()
64        if(frame.getType() == dai.ImgFrame.Type.RAW16):
65            cvFrame = colorizeDepth(cvFrame)
66        for detection in imgDetections.detections:
67            # Get the shape of the frame from which the detections originated for denormalization
68            normShape = imgDetections.getTransformation().getSize()
69
70            # Create rotated rectangle to remap
71            # Here we use an intermediate dai.Rect to create a dai.RotatedRect to simplify construction and denormalization
72            rotRect = dai.RotatedRect(dai.Rect(dai.Point2f(detection.xmin, detection.ymin), dai.Point2f(detection.xmax, detection.ymax)).denormalize(normShape[0], normShape[1]), 0)
73            # Remap the detection rectangle to target frame
74            remapped = imgDetections.getTransformation().remapRectTo(frame.getTransformation(), rotRect)
75            # Remapped rectangle could be rotated, so we get the bounding box
76            bbox = [int(l) for l in remapped.getOuterRect()]
77            cv2.putText(
78                cvFrame,
79                labelMap[detection.label],
80                (bbox[0] + 10, bbox[1] + 20),
81                cv2.FONT_HERSHEY_TRIPLEX,
82                0.5,
83                255,
84            )
85            cv2.putText(
86                cvFrame,
87                f"{int(detection.confidence * 100)}%",
88                (bbox[0] + 10, bbox[1] + 40),
89                cv2.FONT_HERSHEY_TRIPLEX,
90                0.5,
91                255,
92            )
93            cv2.rectangle(cvFrame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
94        # Show the frame
95        cv2.imshow(name, cvFrame)
96
97    while pipeline.isRunning():
98        inRgb: dai.ImgFrame = qRgb.get()
99        inDet: dai.ImgDetections = qDet.get()
100        inDepth: dai.ImgFrame = qDepth.get()
101        hasRgb = inRgb is not None
102        hasDepth = inDepth is not None
103        hasDet = inDet is not None
104        if hasRgb:
105            displayFrame("rgb", inRgb, inDet)
106        if hasDepth:
107            displayFrame("depth", inDepth, inDet)
108        if cv2.waitKey(1) == ord("q"):
109            pipeline.stop()
110            break

C++

1#include <algorithm>  // Required for std::sort and std::unique
2#include <cmath>      // Required for std::log, std::isnan, std::isinf
3#include <iostream>
4#include <opencv2/opencv.hpp>
5#include <string>
6#include <vector>
7
8#include "depthai/depthai.hpp"
9#include "xtensor/containers/xadapt.hpp"
10#include "xtensor/core/xmath.hpp"
11
12cv::Mat colorizeDepth(cv::Mat frameDepth) {
13    cv::Mat invalidMask = frameDepth == 0;
14    cv::Mat depthFrameColor;
15
16    try {
17        cv::Mat frameDepthFloat;
18        frameDepth.convertTo(frameDepthFloat, CV_32F);
19        xt::xtensor<float, 2> depth =
20            xt::adapt((float*)frameDepthFloat.data, {static_cast<size_t>(frameDepthFloat.rows), static_cast<size_t>(frameDepthFloat.cols)});
21
22        // Get valid depth values (non-zero)
23        std::vector<float> validDepth;
24        validDepth.reserve(depth.size());
25        std::copy_if(depth.begin(), depth.end(), std::back_inserter(validDepth), [](float x) { return x != 0; });
26
27        if(validDepth.size() == 0) {
28            return cv::Mat::zeros(frameDepth.rows, frameDepth.cols, CV_8UC3);
29        }
30
31        // Calculate percentiles
32        std::sort(validDepth.begin(), validDepth.end());
33        float minDepth = validDepth[static_cast<size_t>(validDepth.size() * 0.03)];
34        float maxDepth = validDepth[static_cast<size_t>(validDepth.size() * 0.95)];
35
36        // Take log of depth values
37        auto logDepth = xt::eval(xt::log(depth));
38        float logMinDepth = std::log(minDepth);
39        float logMaxDepth = std::log(maxDepth);
40
41        // Replace invalid values with logMinDepth using a naive implementation
42        auto logDepthData = logDepth.data();
43        auto depthData = depth.data();
44        const size_t size = depth.size();
45        for(size_t i = 0; i < size; i++) {
46            if(std::isnan(logDepthData[i]) || std::isinf(logDepthData[i]) || depthData[i] == 0.0f) {
47                logDepthData[i] = logMinDepth;
48            }
49        }
50
51        // Clip values
52        logDepth = xt::clip(logDepth, logMinDepth, logMaxDepth);
53
54        // Normalize to 0-255 range
55        auto normalizedDepth = (logDepth - logMinDepth) / (logMaxDepth - logMinDepth) * 255.0f;
56
57        // Convert to CV_8UC1
58        cv::Mat depthMat(frameDepth.rows, frameDepth.cols, CV_8UC1);
59        std::transform(normalizedDepth.begin(), normalizedDepth.end(), depthMat.data, [](float x) { return static_cast<uchar>(x); });
60
61        // Apply colormap
62        cv::applyColorMap(depthMat, depthFrameColor, cv::COLORMAP_JET);
63
64        // Set invalid pixels to black
65        depthFrameColor.setTo(cv::Scalar(0, 0, 0), invalidMask);
66
67    } catch(const std::exception& e) {
68        std::cerr << "Error in colorizeDepth: " << e.what() << std::endl;
69        return cv::Mat::zeros(frameDepth.rows, frameDepth.cols, CV_8UC3);
70    }
71
72    return depthFrameColor;
73}
74
75// Helper function to display frames with detections
76void displayFrame(const std::string& name,
77                  std::shared_ptr<dai::ImgFrame> frame,
78                  std::shared_ptr<dai::ImgDetections> imgDetections,
79                  const std::vector<std::string>& labelMap) {
80    cv::Scalar color(0, 255, 0);
81    cv::Mat cvFrame;
82
83    if(frame->getType() == dai::ImgFrame::Type::RAW16) {
84        cvFrame = colorizeDepth(frame->getFrame());
85    } else {
86        cvFrame = frame->getCvFrame();
87    }
88
89    if(!imgDetections || !imgDetections->transformation.has_value()) {
90        // std::cout << "No detections or transformation data for " << name << std::endl;
91        cv::imshow(name, cvFrame);
92        return;
93    }
94
95    const auto& sourceTransform = *(imgDetections->transformation);
96    const auto& targetTransform = frame->transformation;
97
98    for(const auto& detection : imgDetections->detections) {
99        auto normShape = sourceTransform.getSize();
100
101        dai::Rect rect(dai::Point2f(detection.xmin, detection.ymin), dai::Point2f(detection.xmax, detection.ymax));
102        rect = rect.denormalize(static_cast<float>(normShape.first), static_cast<float>(normShape.second));
103        dai::RotatedRect rotRect(rect, 0);
104
105        auto remapped = sourceTransform.remapRectTo(targetTransform, rotRect);
106        auto bbox = remapped.getOuterRect();
107
108        cv::putText(cvFrame,
109                    labelMap[detection.label],
110                    cv::Point(static_cast<int>(bbox[0]) + 10, static_cast<int>(bbox[1]) + 20),
111                    cv::FONT_HERSHEY_TRIPLEX,
112                    0.5,
113                    cv::Scalar(255, 255, 255));
114        cv::putText(cvFrame,
115                    std::to_string(static_cast<int>(detection.confidence * 100)) + "%",
116                    cv::Point(static_cast<int>(bbox[0]) + 10, static_cast<int>(bbox[1]) + 40),
117                    cv::FONT_HERSHEY_TRIPLEX,
118                    0.5,
119                    cv::Scalar(255, 255, 255));
120        cv::rectangle(cvFrame,
121                      cv::Point(static_cast<int>(bbox[0]), static_cast<int>(bbox[1])),
122                      cv::Point(static_cast<int>(bbox[2]), static_cast<int>(bbox[3])),
123                      color,
124                      2);
125    }
126    cv::imshow(name, cvFrame);
127}
128
129int main() {
130    dai::Pipeline pipeline;
131
132    auto cameraNode = pipeline.create<dai::node::Camera>();
133    cameraNode->build();
134
135    auto detectionNetwork = pipeline.create<dai::node::DetectionNetwork>();
136    dai::NNModelDescription modelDescription;
137    modelDescription.model = "yolov6-nano";
138    detectionNetwork->build(cameraNode, modelDescription);
139    auto labelMap = detectionNetwork->getClasses().value_or(std::vector<std::string>{});
140
141    auto monoLeft = pipeline.create<dai::node::Camera>();
142    monoLeft->build(dai::CameraBoardSocket::CAM_B);
143    auto monoRight = pipeline.create<dai::node::Camera>();
144    monoRight->build(dai::CameraBoardSocket::CAM_C);
145    auto stereo = pipeline.create<dai::node::StereoDepth>();
146
147    // Linking
148    auto monoLeftOut = monoLeft->requestOutput(std::make_pair(1280, 720));
149    auto monoRightOut = monoRight->requestOutput(std::make_pair(1280, 720));
150    monoLeftOut->link(stereo->left);
151    monoRightOut->link(stereo->right);
152
153    stereo->setRectification(true);
154    stereo->setExtendedDisparity(true);
155    stereo->setLeftRightCheck(true);
156    stereo->setSubpixel(true);
157
158    auto qRgb = detectionNetwork->passthrough.createOutputQueue();
159    auto qDet = detectionNetwork->out.createOutputQueue();
160    auto qDepth = stereo->disparity.createOutputQueue();
161
162    pipeline.start();
163
164    while(pipeline.isRunning()) {
165        auto inRgb = qRgb->tryGet<dai::ImgFrame>();
166        auto inDet = qDet->tryGet<dai::ImgDetections>();
167        auto inDepth = qDepth->tryGet<dai::ImgFrame>();
168
169        bool hasRgb = inRgb != nullptr;
170        bool hasDepth = inDepth != nullptr;
171        bool hasDet = inDet != nullptr;
172
173        if(hasRgb && hasDet) {
174            displayFrame("rgb", inRgb, inDet, labelMap);
175        }
176        if(hasDepth && hasDet) {
177            displayFrame("depth", inDepth, inDet, labelMap);
178        }
179
180        if(cv::waitKey(1) == 'q') {
181            pipeline.stop();
182            break;
183        }
184    }
185
186    return 0;
187}

Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.