DepthAI
Software Stack

ON THIS PAGE

  • Demo
  • Pipeline
  • Source code

Object Tracker Remap

Supported on:RVC2RVC4
This example demonstrates running YOLOv6-nano detection with the ObjectTracker node and remapping bounding boxes onto a colorized depth frame using transformation metadata, ensuring accurate alignment across streams (RGB ↔ Depth).

Demo

This example requires the DepthAI v3 API, see installation instructions.

Pipeline

Source code

Python

Python
GitHub
1#!/usr/bin/env python3
2
3import cv2
4import depthai as dai
5import numpy as np
6
7def colorizeDepth(frameDepth):
8    invalidMask = frameDepth == 0
9    # Log the depth, minDepth and maxDepth
10    try:
11        minDepth = np.percentile(frameDepth[frameDepth != 0], 3)
12        maxDepth = np.percentile(frameDepth[frameDepth != 0], 95)
13        logDepth = np.zeros_like(frameDepth, dtype=np.float32)
14        np.log(frameDepth, where=frameDepth != 0, out=logDepth)
15        logMinDepth = np.log(minDepth)
16        logMaxDepth = np.log(maxDepth)
17        np.nan_to_num(logDepth, copy=False, nan=logMinDepth)
18        # Clip the values to be in the 0-255 range
19        logDepth = np.clip(logDepth, logMinDepth, logMaxDepth)
20
21        # Interpolate only valid logDepth values, setting the rest based on the mask
22        depthFrameColor = np.interp(logDepth, (logMinDepth, logMaxDepth), (0, 255))
23        depthFrameColor = np.nan_to_num(depthFrameColor)
24        depthFrameColor = depthFrameColor.astype(np.uint8)
25        depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_JET)
26        # Set invalid depth pixels to black
27        depthFrameColor[invalidMask] = 0
28    except IndexError:
29        # Frame is likely empty
30        depthFrameColor = np.zeros((frameDepth.shape[0], frameDepth.shape[1], 3), dtype=np.uint8)
31    except Exception as e:
32        raise e
33    return depthFrameColor
34
35# Create pipeline
36with dai.Pipeline() as pipeline:
37    cameraNode = pipeline.create(dai.node.Camera).build()
38    detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(cameraNode, dai.NNModelDescription("yolov6-nano"))
39    objectTracker = pipeline.create(dai.node.ObjectTracker)
40    labelMap = detectionNetwork.getClasses()
41    monoLeft = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_B)
42    monoRight = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_C)
43    stereo = pipeline.create(dai.node.StereoDepth)
44
45    # Linking
46    monoLeftOut = monoLeft.requestOutput((1280, 720))
47    monoRightOut = monoRight.requestOutput((1280, 720))
48    monoLeftOut.link(stereo.left)
49    monoRightOut.link(stereo.right)
50
51    detectionNetwork.out.link(objectTracker.inputDetections)
52    detectionNetwork.passthrough.link(objectTracker.inputDetectionFrame)
53    detectionNetwork.passthrough.link(objectTracker.inputTrackerFrame)
54
55    stereo.setRectification(True)
56    stereo.setExtendedDisparity(True)
57    stereo.setLeftRightCheck(True)
58    stereo.setSubpixel(True)
59
60
61    qRgb = detectionNetwork.passthrough.createOutputQueue()
62    qTrack = objectTracker.out.createOutputQueue()
63    qDepth = stereo.disparity.createOutputQueue()
64
65    pipeline.start()
66
67    def displayFrame(name: str, frame: dai.ImgFrame, tracklets: dai.Tracklets):
68        color = (0, 255, 0)
69        assert tracklets.getTransformation() is not None
70        cvFrame = frame.getFrame() if frame.getType() == dai.ImgFrame.Type.RAW16 else frame.getCvFrame()
71        if(frame.getType() == dai.ImgFrame.Type.RAW16):
72            cvFrame = colorizeDepth(cvFrame)
73        for tracklet in tracklets.tracklets:
74            # Get the shape of the frame from which the detections originated for denormalization
75            normShape = tracklets.getTransformation().getSize()
76
77            # Create rotated rectangle to remap
78            # Here we use an intermediate dai.Rect to create a dai.RotatedRect to simplify construction and denormalization
79            rotRect = dai.RotatedRect(tracklet.roi.denormalize(normShape[0], normShape[1]), 0)
80            # Remap the detection rectangle to target frame
81            remapped = tracklets.getTransformation().remapRectTo(frame.getTransformation(), rotRect)
82            # Remapped rectangle could be rotated, so we get the bounding box
83            bbox = [int(l) for l in remapped.getOuterRect()]
84            cv2.putText(
85                cvFrame,
86                labelMap[tracklet.label],
87                (bbox[0] + 10, bbox[1] + 20),
88                cv2.FONT_HERSHEY_TRIPLEX,
89                0.5,
90                255,
91            )
92            cv2.putText(
93                cvFrame,
94                f"{int(tracklet.srcImgDetection.confidence * 100)}%",
95                (bbox[0] + 10, bbox[1] + 40),
96                cv2.FONT_HERSHEY_TRIPLEX,
97                0.5,
98                255,
99            )
100            cv2.rectangle(cvFrame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
101        # Show the frame
102        cv2.imshow(name, cvFrame)
103
104    while pipeline.isRunning():
105        inRgb: dai.ImgFrame = qRgb.get()
106        inTrack: dai.Tracklets = qTrack.get()
107        inDepth: dai.ImgFrame = qDepth.get()
108        hasRgb = inRgb is not None
109        hasDepth = inDepth is not None
110        hasTrack = inTrack is not None
111        if hasRgb:
112            displayFrame("rgb", inRgb, inTrack)
113        if hasDepth:
114            displayFrame("depth", inDepth, inTrack)
115        if cv2.waitKey(1) == ord("q"):
116            pipeline.stop()
117            break

C++

1#include <algorithm>  // Required for std::sort and std::unique
2#include <cmath>      // Required for std::log, std::isnan, std::isinf
3#include <iostream>
4#include <opencv2/opencv.hpp>
5#include <string>
6#include <vector>
7
8#include "depthai/depthai.hpp"
9#include "xtensor/containers/xadapt.hpp"
10#include "xtensor/core/xmath.hpp"
11
12cv::Mat colorizeDepth(cv::Mat frameDepth) {
13    cv::Mat invalidMask = frameDepth == 0;
14    cv::Mat depthFrameColor;
15
16    try {
17        cv::Mat frameDepthFloat;
18        frameDepth.convertTo(frameDepthFloat, CV_32F);
19        xt::xtensor<float, 2> depth =
20            xt::adapt((float*)frameDepthFloat.data, {static_cast<size_t>(frameDepthFloat.rows), static_cast<size_t>(frameDepthFloat.cols)});
21
22        // Get valid depth values (non-zero)
23        std::vector<float> validDepth;
24        validDepth.reserve(depth.size());
25        std::copy_if(depth.begin(), depth.end(), std::back_inserter(validDepth), [](float x) { return x != 0; });
26
27        if(validDepth.size() == 0) {
28            return cv::Mat::zeros(frameDepth.rows, frameDepth.cols, CV_8UC3);
29        }
30
31        // Calculate percentiles
32        std::sort(validDepth.begin(), validDepth.end());
33        float minDepth = validDepth[static_cast<size_t>(validDepth.size() * 0.03)];
34        float maxDepth = validDepth[static_cast<size_t>(validDepth.size() * 0.95)];
35
36        // Take log of depth values
37        auto logDepth = xt::eval(xt::log(depth));
38        float logMinDepth = std::log(minDepth);
39        float logMaxDepth = std::log(maxDepth);
40
41        // Replace invalid values with logMinDepth using a naive implementation
42        auto logDepthData = logDepth.data();
43        auto depthData = depth.data();
44        const size_t size = depth.size();
45        for(size_t i = 0; i < size; i++) {
46            if(std::isnan(logDepthData[i]) || std::isinf(logDepthData[i]) || depthData[i] == 0.0f) {
47                logDepthData[i] = logMinDepth;
48            }
49        }
50
51        // Clip values
52        logDepth = xt::clip(logDepth, logMinDepth, logMaxDepth);
53
54        // Normalize to 0-255 range
55        auto normalizedDepth = (logDepth - logMinDepth) / (logMaxDepth - logMinDepth) * 255.0f;
56
57        // Convert to CV_8UC1
58        cv::Mat depthMat(frameDepth.rows, frameDepth.cols, CV_8UC1);
59        std::transform(normalizedDepth.begin(), normalizedDepth.end(), depthMat.data, [](float x) { return static_cast<uchar>(x); });
60
61        // Apply colormap
62        cv::applyColorMap(depthMat, depthFrameColor, cv::COLORMAP_JET);
63
64        // Set invalid pixels to black
65        depthFrameColor.setTo(cv::Scalar(0, 0, 0), invalidMask);
66
67    } catch(const std::exception& e) {
68        std::cerr << "Error in colorizeDepth: " << e.what() << std::endl;
69        return cv::Mat::zeros(frameDepth.rows, frameDepth.cols, CV_8UC3);
70    }
71
72    return depthFrameColor;
73}
74
75// Helper function to display frames with detections
76void displayFrame(const std::string& name,
77                  std::shared_ptr<dai::ImgFrame> frame,
78                  std::shared_ptr<dai::Tracklets> tracklets,
79                  const std::vector<std::string>& labelMap) {
80    cv::Scalar color(0, 255, 0);
81    cv::Mat cvFrame;
82
83    if(frame->getType() == dai::ImgFrame::Type::RAW16) {
84        cvFrame = colorizeDepth(frame->getFrame());
85    } else {
86        cvFrame = frame->getCvFrame();
87    }
88
89    if(!tracklets) {
90        // std::cout << "No detections or transformation data for " << name << std::endl;
91        cv::imshow(name, cvFrame);
92        return;
93    }
94
95    const auto& sourceTransform = tracklets->transformation;
96    const auto& targetTransform = frame->transformation;
97
98    for(const auto& tracklet : tracklets->tracklets) {
99        auto normShape = sourceTransform.getSize();
100
101        dai::Rect rect = tracklet.roi;
102        rect = rect.denormalize(static_cast<float>(normShape.first), static_cast<float>(normShape.second));
103        dai::RotatedRect rotRect(rect, 0);
104
105        auto remapped = sourceTransform.remapRectTo(targetTransform, rotRect);
106        auto bbox = remapped.getOuterRect();
107
108        cv::putText(cvFrame,
109                    labelMap[tracklet.label],
110                    cv::Point(static_cast<int>(bbox[0]) + 10, static_cast<int>(bbox[1]) + 20),
111                    cv::FONT_HERSHEY_TRIPLEX,
112                    0.5,
113                    cv::Scalar(255, 255, 255));
114        cv::putText(cvFrame,
115                    std::to_string(static_cast<int>(tracklet.srcImgDetection.confidence * 100)) + "%",
116                    cv::Point(static_cast<int>(bbox[0]) + 10, static_cast<int>(bbox[1]) + 40),
117                    cv::FONT_HERSHEY_TRIPLEX,
118                    0.5,
119                    cv::Scalar(255, 255, 255));
120        cv::rectangle(cvFrame,
121                      cv::Point(static_cast<int>(bbox[0]), static_cast<int>(bbox[1])),
122                      cv::Point(static_cast<int>(bbox[2]), static_cast<int>(bbox[3])),
123                      color,
124                      2);
125    }
126    cv::imshow(name, cvFrame);
127}
128
129int main() {
130    dai::Pipeline pipeline;
131
132    auto cameraNode = pipeline.create<dai::node::Camera>();
133    cameraNode->build();
134
135    auto detectionNetwork = pipeline.create<dai::node::DetectionNetwork>();
136    dai::NNModelDescription modelDescription;
137    modelDescription.model = "yolov6-nano";
138    detectionNetwork->build(cameraNode, modelDescription);
139    auto objectTracker = pipeline.create<dai::node::ObjectTracker>();
140    auto labelMap = detectionNetwork->getClasses().value_or(std::vector<std::string>{});
141
142    auto monoLeft = pipeline.create<dai::node::Camera>();
143    monoLeft->build(dai::CameraBoardSocket::CAM_B);
144    auto monoRight = pipeline.create<dai::node::Camera>();
145    monoRight->build(dai::CameraBoardSocket::CAM_C);
146    auto stereo = pipeline.create<dai::node::StereoDepth>();
147
148    // Linking
149    auto monoLeftOut = monoLeft->requestOutput(std::make_pair(1280, 720));
150    auto monoRightOut = monoRight->requestOutput(std::make_pair(1280, 720));
151    monoLeftOut->link(stereo->left);
152    monoRightOut->link(stereo->right);
153
154    detectionNetwork->out.link(objectTracker->inputDetections);
155    detectionNetwork->passthrough.link(objectTracker->inputDetectionFrame);
156    detectionNetwork->passthrough.link(objectTracker->inputTrackerFrame);
157
158    stereo->setRectification(true);
159    stereo->setExtendedDisparity(true);
160    stereo->setLeftRightCheck(true);
161    stereo->setSubpixel(true);
162
163    auto qRgb = detectionNetwork->passthrough.createOutputQueue();
164    auto qTrack = objectTracker->out.createOutputQueue();
165    auto qDepth = stereo->disparity.createOutputQueue();
166
167    pipeline.start();
168
169    while(pipeline.isRunning()) {
170        auto inRgb = qRgb->tryGet<dai::ImgFrame>();
171        auto inTrack = qTrack->tryGet<dai::Tracklets>();
172        auto inDepth = qDepth->tryGet<dai::ImgFrame>();
173
174        bool hasRgb = inRgb != nullptr;
175        bool hasDepth = inDepth != nullptr;
176        bool hasTrack = inTrack != nullptr;
177
178        if(hasRgb && hasTrack) {
179            displayFrame("rgb", inRgb, inTrack, labelMap);
180        }
181        if(hasDepth && hasTrack) {
182            displayFrame("depth", inDepth, inTrack, labelMap);
183        }
184
185        if(cv::waitKey(1) == 'q') {
186            pipeline.stop();
187            break;
188        }
189    }
190
191    return 0;
192}

Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.