Spatial object tracker on RGB

This example shows how to run MobileNetv2SSD on the RGB input frame, and perform spatial object tracking on persons.setConfidenceThreshold() - confidence threshold above which objects are detected

Similar samples:

Setup

Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the script

Command Line

1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py

For additional information, please follow the installation guide.

Source code

Python

GitHub

1#!/usr/bin/env python3
2
3from pathlib import Path
4import cv2
5import depthai as dai
6import numpy as np
7import time
8import argparse
9
10labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
11            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
12
13nnPathDefault = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_5shave.blob')).resolve().absolute())
14parser = argparse.ArgumentParser()
15parser.add_argument('nnPath', nargs='?', help="Path to mobilenet detection network blob", default=nnPathDefault)
16parser.add_argument('-ff', '--full_frame', action="store_true", help="Perform tracking on full RGB frame", default=False)
17
18args = parser.parse_args()
19
20fullFrameTracking = args.full_frame
21
22# Create pipeline
23pipeline = dai.Pipeline()
24
25# Define sources and outputs
26camRgb = pipeline.create(dai.node.ColorCamera)
27spatialDetectionNetwork = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork)
28monoLeft = pipeline.create(dai.node.MonoCamera)
29monoRight = pipeline.create(dai.node.MonoCamera)
30stereo = pipeline.create(dai.node.StereoDepth)
31objectTracker = pipeline.create(dai.node.ObjectTracker)
32
33xoutRgb = pipeline.create(dai.node.XLinkOut)
34trackerOut = pipeline.create(dai.node.XLinkOut)
35
36xoutRgb.setStreamName("preview")
37trackerOut.setStreamName("tracklets")
38
39# Properties
40camRgb.setPreviewSize(300, 300)
41camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
42camRgb.setInterleaved(False)
43camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
44
45monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
46monoLeft.setCamera("left")
47monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
48monoRight.setCamera("right")
49
50# setting node configs
51stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
52# Align depth map to the perspective of RGB camera, on which inference is done
53stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)
54stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())
55
56spatialDetectionNetwork.setBlobPath(args.nnPath)
57spatialDetectionNetwork.setConfidenceThreshold(0.5)
58spatialDetectionNetwork.input.setBlocking(False)
59spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
60spatialDetectionNetwork.setDepthLowerThreshold(100)
61spatialDetectionNetwork.setDepthUpperThreshold(5000)
62
63objectTracker.setDetectionLabelsToTrack([15])  # track only person
64# possible tracking types: ZERO_TERM_COLOR_HISTOGRAM, ZERO_TERM_IMAGELESS, SHORT_TERM_IMAGELESS, SHORT_TERM_KCF
65objectTracker.setTrackerType(dai.TrackerType.ZERO_TERM_COLOR_HISTOGRAM)
66# take the smallest ID when new object is tracked, possible options: SMALLEST_ID, UNIQUE_ID
67objectTracker.setTrackerIdAssignmentPolicy(dai.TrackerIdAssignmentPolicy.SMALLEST_ID)
68
69# Linking
70monoLeft.out.link(stereo.left)
71monoRight.out.link(stereo.right)
72
73camRgb.preview.link(spatialDetectionNetwork.input)
74objectTracker.passthroughTrackerFrame.link(xoutRgb.input)
75objectTracker.out.link(trackerOut.input)
76
77if fullFrameTracking:
78    camRgb.setPreviewKeepAspectRatio(False)
79    camRgb.video.link(objectTracker.inputTrackerFrame)
80    objectTracker.inputTrackerFrame.setBlocking(False)
81    # do not block the pipeline if it's too slow on full frame
82    objectTracker.inputTrackerFrame.setQueueSize(2)
83else:
84    spatialDetectionNetwork.passthrough.link(objectTracker.inputTrackerFrame)
85
86spatialDetectionNetwork.passthrough.link(objectTracker.inputDetectionFrame)
87spatialDetectionNetwork.out.link(objectTracker.inputDetections)
88stereo.depth.link(spatialDetectionNetwork.inputDepth)
89
90# Connect to device and start pipeline
91with dai.Device(pipeline) as device:
92
93    preview = device.getOutputQueue("preview", 4, False)
94    tracklets = device.getOutputQueue("tracklets", 4, False)
95
96    startTime = time.monotonic()
97    counter = 0
98    fps = 0
99    color = (255, 255, 255)
100
101    while(True):
102        imgFrame = preview.get()
103        track = tracklets.get()
104
105        counter+=1
106        current_time = time.monotonic()
107        if (current_time - startTime) > 1 :
108            fps = counter / (current_time - startTime)
109            counter = 0
110            startTime = current_time
111
112        frame = imgFrame.getCvFrame()
113        trackletsData = track.tracklets
114        for t in trackletsData:
115            roi = t.roi.denormalize(frame.shape[1], frame.shape[0])
116            x1 = int(roi.topLeft().x)
117            y1 = int(roi.topLeft().y)
118            x2 = int(roi.bottomRight().x)
119            y2 = int(roi.bottomRight().y)
120
121            try:
122                label = labelMap[t.label]
123            except:
124                label = t.label
125
126            cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
127            cv2.putText(frame, f"ID: {[t.id]}", (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
128            cv2.putText(frame, t.status.name, (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
129            cv2.rectangle(frame, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX)
130
131            cv2.putText(frame, f"X: {int(t.spatialCoordinates.x)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
132            cv2.putText(frame, f"Y: {int(t.spatialCoordinates.y)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
133            cv2.putText(frame, f"Z: {int(t.spatialCoordinates.z)} mm", (x1 + 10, y1 + 95), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
134
135        cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)
136
137        cv2.imshow("tracker", frame)
138
139        if cv2.waitKey(1) == ord('q'):
140            break

C++

GitHub

1#include <chrono>
2
3#include "utility.hpp"
4
5// Includes common necessary includes for development using depthai library
6#include "depthai/depthai.hpp"
7
8static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle",     "bird",  "boat",        "bottle", "bus",
9                                                  "car",        "cat",       "chair",       "cow",   "diningtable", "dog",    "horse",
10                                                  "motorbike",  "person",    "pottedplant", "sheep", "sofa",        "train",  "tvmonitor"};
11
12static std::atomic<bool> fullFrameTracking{false};
13
14int main(int argc, char** argv) {
15    using namespace std;
16    using namespace std::chrono;
17    std::string nnPath(BLOB_PATH);
18
19    // If path to blob specified, use that
20    if(argc > 1) {
21        nnPath = std::string(argv[1]);
22    }
23
24    // Print which blob we are using
25    printf("Using blob at path: %s\n", nnPath.c_str());
26
27    // Create pipeline
28    dai::Pipeline pipeline;
29
30    // Define sources and outputs
31    auto camRgb = pipeline.create<dai::node::ColorCamera>();
32    auto spatialDetectionNetwork = pipeline.create<dai::node::MobileNetSpatialDetectionNetwork>();
33    auto monoLeft = pipeline.create<dai::node::MonoCamera>();
34    auto monoRight = pipeline.create<dai::node::MonoCamera>();
35    auto stereo = pipeline.create<dai::node::StereoDepth>();
36    auto objectTracker = pipeline.create<dai::node::ObjectTracker>();
37
38    auto xoutRgb = pipeline.create<dai::node::XLinkOut>();
39    auto trackerOut = pipeline.create<dai::node::XLinkOut>();
40
41    xoutRgb->setStreamName("preview");
42    trackerOut->setStreamName("tracklets");
43
44    // Properties
45    camRgb->setPreviewSize(300, 300);
46    camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P);
47    camRgb->setInterleaved(false);
48    camRgb->setColorOrder(dai::ColorCameraProperties::ColorOrder::BGR);
49
50    monoLeft->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);
51    monoLeft->setCamera("left");
52    monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);
53    monoRight->setCamera("right");
54
55    // setting node configs
56    stereo->setDefaultProfilePreset(dai::node::StereoDepth::PresetMode::HIGH_DENSITY);
57    // Align depth map to the perspective of RGB camera, on which inference is done
58    stereo->setDepthAlign(dai::CameraBoardSocket::CAM_A);
59    stereo->setOutputSize(monoLeft->getResolutionWidth(), monoLeft->getResolutionHeight());
60
61    spatialDetectionNetwork->setBlobPath(nnPath);
62    spatialDetectionNetwork->setConfidenceThreshold(0.5f);
63    spatialDetectionNetwork->input.setBlocking(false);
64    spatialDetectionNetwork->setBoundingBoxScaleFactor(0.5);
65    spatialDetectionNetwork->setDepthLowerThreshold(100);
66    spatialDetectionNetwork->setDepthUpperThreshold(5000);
67
68    objectTracker->setDetectionLabelsToTrack({15});  // track only person
69    // possible tracking types: ZERO_TERM_COLOR_HISTOGRAM, ZERO_TERM_IMAGELESS, SHORT_TERM_IMAGELESS, SHORT_TERM_KCF
70    objectTracker->setTrackerType(dai::TrackerType::ZERO_TERM_COLOR_HISTOGRAM);
71    // take the smallest ID when new object is tracked, possible options: SMALLEST_ID, UNIQUE_ID
72    objectTracker->setTrackerIdAssignmentPolicy(dai::TrackerIdAssignmentPolicy::SMALLEST_ID);
73
74    // Linking
75    monoLeft->out.link(stereo->left);
76    monoRight->out.link(stereo->right);
77
78    camRgb->preview.link(spatialDetectionNetwork->input);
79    objectTracker->passthroughTrackerFrame.link(xoutRgb->input);
80    objectTracker->out.link(trackerOut->input);
81
82    if(fullFrameTracking) {
83        camRgb->setPreviewKeepAspectRatio(false);
84        camRgb->video.link(objectTracker->inputTrackerFrame);
85        objectTracker->inputTrackerFrame.setBlocking(false);
86        // do not block the pipeline if it's too slow on full frame
87        objectTracker->inputTrackerFrame.setQueueSize(2);
88    } else {
89        spatialDetectionNetwork->passthrough.link(objectTracker->inputTrackerFrame);
90    }
91
92    spatialDetectionNetwork->passthrough.link(objectTracker->inputDetectionFrame);
93    spatialDetectionNetwork->out.link(objectTracker->inputDetections);
94    stereo->depth.link(spatialDetectionNetwork->inputDepth);
95
96    // Connect to device and start pipeline
97    dai::Device device(pipeline);
98
99    auto preview = device.getOutputQueue("preview", 4, false);
100    auto tracklets = device.getOutputQueue("tracklets", 4, false);
101
102    auto startTime = steady_clock::now();
103    int counter = 0;
104    float fps = 0;
105    auto color = cv::Scalar(255, 255, 255);
106
107    while(true) {
108        auto imgFrame = preview->get<dai::ImgFrame>();
109        auto track = tracklets->get<dai::Tracklets>();
110
111        counter++;
112        auto currentTime = steady_clock::now();
113        auto elapsed = duration_cast<duration<float>>(currentTime - startTime);
114        if(elapsed > seconds(1)) {
115            fps = counter / elapsed.count();
116            counter = 0;
117            startTime = currentTime;
118        }
119
120        cv::Mat frame = imgFrame->getCvFrame();
121        auto trackletsData = track->tracklets;
122        for(auto& t : trackletsData) {
123            auto roi = t.roi.denormalize(frame.cols, frame.rows);
124            int x1 = roi.topLeft().x;
125            int y1 = roi.topLeft().y;
126            int x2 = roi.bottomRight().x;
127            int y2 = roi.bottomRight().y;
128
129            uint32_t labelIndex = t.label;
130            std::string labelStr = to_string(labelIndex);
131            if(labelIndex < labelMap.size()) {
132                labelStr = labelMap[labelIndex];
133            }
134            cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
135
136            std::stringstream idStr;
137            idStr << "ID: " << t.id;
138            cv::putText(frame, idStr.str(), cv::Point(x1 + 10, y1 + 35), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
139            std::stringstream statusStr;
140            statusStr << "Status: " << t.status;
141            cv::putText(frame, statusStr.str(), cv::Point(x1 + 10, y1 + 50), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
142
143            std::stringstream depthX;
144            depthX << "X: " << (int)t.spatialCoordinates.x << " mm";
145            cv::putText(frame, depthX.str(), cv::Point(x1 + 10, y1 + 65), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
146            std::stringstream depthY;
147            depthY << "Y: " << (int)t.spatialCoordinates.y << " mm";
148            cv::putText(frame, depthY.str(), cv::Point(x1 + 10, y1 + 80), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
149            std::stringstream depthZ;
150            depthZ << "Z: " << (int)t.spatialCoordinates.z << " mm";
151            cv::putText(frame, depthZ.str(), cv::Point(x1 + 10, y1 + 95), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
152
153            cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
154        }
155
156        std::stringstream fpsStr;
157        fpsStr << "NN fps: " << std::fixed << std::setprecision(2) << fps;
158        cv::putText(frame, fpsStr.str(), cv::Point(2, imgFrame->getHeight() - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, color);
159
160        cv::imshow("tracker", frame);
161
162        int key = cv::waitKey(1);
163        if(key == 'q') {
164            return 0;
165        }
166    }
167    return 0;
168}

Pipeline

Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.

ON THIS PAGE

Spatial object tracker on RGBView as Markdown