RGB & MobilenetSSD with spatial data

This example shows how to run MobileNetv2SSD on the RGB input frame, and how to display both the RGB preview, detections, depth map and spatial information (X,Y,Z). It's similar to example RGB & MobilenetSSD except it has spatial data. X,Y,Z coordinates are relative to the center of depth map.setConfidenceThreshold - confidence threshold above which objects are detected

Similar samples:

Demo

Setup

Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the script

Command Line

1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py

For additional information, please follow the installation guide.

Source code

Python

GitHub

1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8import time
9
10'''
11Spatial detection network demo.
12    Performs inference on RGB camera and retrieves spatial location coordinates: x,y,z relative to the center of depth map.
13'''
14
15# Get argument first
16nnBlobPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
17if len(sys.argv) > 1:
18    nnBlobPath = sys.argv[1]
19
20if not Path(nnBlobPath).exists():
21    import sys
22    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
23
24# MobilenetSSD label texts
25labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
26            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
27
28syncNN = True
29
30# Create pipeline
31pipeline = dai.Pipeline()
32
33# Define sources and outputs
34camRgb = pipeline.create(dai.node.ColorCamera)
35spatialDetectionNetwork = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork)
36monoLeft = pipeline.create(dai.node.MonoCamera)
37monoRight = pipeline.create(dai.node.MonoCamera)
38stereo = pipeline.create(dai.node.StereoDepth)
39
40xoutRgb = pipeline.create(dai.node.XLinkOut)
41xoutNN = pipeline.create(dai.node.XLinkOut)
42xoutDepth = pipeline.create(dai.node.XLinkOut)
43
44xoutRgb.setStreamName("rgb")
45xoutNN.setStreamName("detections")
46xoutDepth.setStreamName("depth")
47
48# Properties
49camRgb.setPreviewSize(300, 300)
50camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
51camRgb.setInterleaved(False)
52camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
53
54monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
55monoLeft.setCamera("left")
56monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
57monoRight.setCamera("right")
58
59# Setting node configs
60stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
61# Align depth map to the perspective of RGB camera, on which inference is done
62stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)
63stereo.setSubpixel(True)
64stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())
65
66spatialDetectionNetwork.setBlobPath(nnBlobPath)
67spatialDetectionNetwork.setConfidenceThreshold(0.5)
68spatialDetectionNetwork.input.setBlocking(False)
69spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
70spatialDetectionNetwork.setDepthLowerThreshold(100)
71spatialDetectionNetwork.setDepthUpperThreshold(5000)
72
73# Linking
74monoLeft.out.link(stereo.left)
75monoRight.out.link(stereo.right)
76
77camRgb.preview.link(spatialDetectionNetwork.input)
78if syncNN:
79    spatialDetectionNetwork.passthrough.link(xoutRgb.input)
80else:
81    camRgb.preview.link(xoutRgb.input)
82
83spatialDetectionNetwork.out.link(xoutNN.input)
84
85stereo.depth.link(spatialDetectionNetwork.inputDepth)
86spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)
87
88# Connect to device and start pipeline
89with dai.Device(pipeline) as device:
90
91    # Output queues will be used to get the rgb frames and nn data from the outputs defined above
92    previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
93    detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
94    depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)
95
96    startTime = time.monotonic()
97    counter = 0
98    fps = 0
99    color = (255, 255, 255)
100
101    while True:
102        inPreview = previewQueue.get()
103        inDet = detectionNNQueue.get()
104        depth = depthQueue.get()
105
106        counter+=1
107        current_time = time.monotonic()
108        if (current_time - startTime) > 1 :
109            fps = counter / (current_time - startTime)
110            counter = 0
111            startTime = current_time
112
113        frame = inPreview.getCvFrame()
114
115        depthFrame = depth.getFrame() # depthFrame values are in millimeters
116
117        depth_downscaled = depthFrame[::4]
118        if np.all(depth_downscaled == 0):
119            min_depth = 0  # Set a default minimum depth value when all elements are zero
120        else:
121            min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
122        max_depth = np.percentile(depth_downscaled, 99)
123        depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
124        depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
125
126        detections = inDet.detections
127
128        # If the frame is available, draw bounding boxes on it and show the frame
129        height = frame.shape[0]
130        width  = frame.shape[1]
131        for detection in detections:
132            roiData = detection.boundingBoxMapping
133            roi = roiData.roi
134            roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])
135            topLeft = roi.topLeft()
136            bottomRight = roi.bottomRight()
137            xmin = int(topLeft.x)
138            ymin = int(topLeft.y)
139            xmax = int(bottomRight.x)
140            ymax = int(bottomRight.y)
141            cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1)
142
143            # Denormalize bounding box
144            x1 = int(detection.xmin * width)
145            x2 = int(detection.xmax * width)
146            y1 = int(detection.ymin * height)
147            y2 = int(detection.ymax * height)
148            try:
149                label = labelMap[detection.label]
150            except:
151                label = detection.label
152            cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
153            cv2.putText(frame, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
154            cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
155            cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
156            cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
157
158            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), cv2.FONT_HERSHEY_SIMPLEX)
159
160        cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, (255,255,255))
161        cv2.imshow("depth", depthFrameColor)
162        cv2.imshow("preview", frame)
163
164        if cv2.waitKey(1) == ord('q'):
165            break

C++

GitHub

1#include <chrono>
2#include <iostream>
3
4// Includes common necessary includes for development using depthai library
5#include "depthai/depthai.hpp"
6
7static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle",     "bird",  "boat",        "bottle", "bus",
8                                                  "car",        "cat",       "chair",       "cow",   "diningtable", "dog",    "horse",
9                                                  "motorbike",  "person",    "pottedplant", "sheep", "sofa",        "train",  "tvmonitor"};
10
11static std::atomic<bool> syncNN{true};
12
13int main(int argc, char** argv) {
14    using namespace std;
15    using namespace std::chrono;
16    std::string nnPath(BLOB_PATH);
17
18    // If path to blob specified, use that
19    if(argc > 1) {
20        nnPath = std::string(argv[1]);
21    }
22
23    // Print which blob we are using
24    printf("Using blob at path: %s\n", nnPath.c_str());
25
26    // Create pipeline
27    dai::Pipeline pipeline;
28
29    // Define sources and outputs
30    auto camRgb = pipeline.create<dai::node::ColorCamera>();
31    auto spatialDetectionNetwork = pipeline.create<dai::node::MobileNetSpatialDetectionNetwork>();
32    auto monoLeft = pipeline.create<dai::node::MonoCamera>();
33    auto monoRight = pipeline.create<dai::node::MonoCamera>();
34    auto stereo = pipeline.create<dai::node::StereoDepth>();
35
36    auto xoutRgb = pipeline.create<dai::node::XLinkOut>();
37    auto xoutNN = pipeline.create<dai::node::XLinkOut>();
38    auto xoutDepth = pipeline.create<dai::node::XLinkOut>();
39
40    xoutRgb->setStreamName("rgb");
41    xoutNN->setStreamName("detections");
42    xoutDepth->setStreamName("depth");
43
44    // Properties
45    camRgb->setPreviewSize(300, 300);
46    camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P);
47    camRgb->setInterleaved(false);
48    camRgb->setColorOrder(dai::ColorCameraProperties::ColorOrder::BGR);
49
50    monoLeft->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);
51    monoLeft->setCamera("left");
52    monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);
53    monoRight->setCamera("right");
54
55    // Setting node configs
56    stereo->setDefaultProfilePreset(dai::node::StereoDepth::PresetMode::HIGH_DENSITY);
57    // Align depth map to the perspective of RGB camera, on which inference is done
58    stereo->setDepthAlign(dai::CameraBoardSocket::CAM_A);
59    stereo->setOutputSize(monoLeft->getResolutionWidth(), monoLeft->getResolutionHeight());
60
61    spatialDetectionNetwork->setBlobPath(nnPath);
62    spatialDetectionNetwork->setConfidenceThreshold(0.5f);
63    spatialDetectionNetwork->input.setBlocking(false);
64    spatialDetectionNetwork->setBoundingBoxScaleFactor(0.5);
65    spatialDetectionNetwork->setDepthLowerThreshold(100);
66    spatialDetectionNetwork->setDepthUpperThreshold(5000);
67
68    // Linking
69    monoLeft->out.link(stereo->left);
70    monoRight->out.link(stereo->right);
71
72    camRgb->preview.link(spatialDetectionNetwork->input);
73    if(syncNN) {
74        spatialDetectionNetwork->passthrough.link(xoutRgb->input);
75    } else {
76        camRgb->preview.link(xoutRgb->input);
77    }
78
79    spatialDetectionNetwork->out.link(xoutNN->input);
80
81    stereo->depth.link(spatialDetectionNetwork->inputDepth);
82    spatialDetectionNetwork->passthroughDepth.link(xoutDepth->input);
83
84    // Connect to device and start pipeline
85    dai::Device device(pipeline);
86
87    auto previewQueue = device.getOutputQueue("rgb", 4, false);
88    auto detectionNNQueue = device.getOutputQueue("detections", 4, false);
89    auto depthQueue = device.getOutputQueue("depth", 4, false);
90
91    auto startTime = steady_clock::now();
92    int counter = 0;
93    float fps = 0;
94    auto color = cv::Scalar(255, 255, 255);
95
96    while(true) {
97        auto inPreview = previewQueue->get<dai::ImgFrame>();
98        auto inDet = detectionNNQueue->get<dai::SpatialImgDetections>();
99        auto depth = depthQueue->get<dai::ImgFrame>();
100
101        counter++;
102        auto currentTime = steady_clock::now();
103        auto elapsed = duration_cast<duration<float>>(currentTime - startTime);
104        if(elapsed > seconds(1)) {
105            fps = counter / elapsed.count();
106            counter = 0;
107            startTime = currentTime;
108        }
109
110        cv::Mat frame = inPreview->getCvFrame();
111        cv::Mat depthFrame = depth->getFrame();  // depthFrame values are in millimeters
112
113        cv::Mat depthFrameColor;
114        cv::normalize(depthFrame, depthFrameColor, 255, 0, cv::NORM_INF, CV_8UC1);
115        cv::equalizeHist(depthFrameColor, depthFrameColor);
116        cv::applyColorMap(depthFrameColor, depthFrameColor, cv::COLORMAP_HOT);
117
118        auto detections = inDet->detections;
119
120        for(const auto& detection : detections) {
121            auto roiData = detection.boundingBoxMapping;
122            auto roi = roiData.roi;
123            roi = roi.denormalize(depthFrameColor.cols, depthFrameColor.rows);
124            auto topLeft = roi.topLeft();
125            auto bottomRight = roi.bottomRight();
126            auto xmin = (int)topLeft.x;
127            auto ymin = (int)topLeft.y;
128            auto xmax = (int)bottomRight.x;
129            auto ymax = (int)bottomRight.y;
130            cv::rectangle(depthFrameColor, cv::Rect(cv::Point(xmin, ymin), cv::Point(xmax, ymax)), color, cv::FONT_HERSHEY_SIMPLEX);
131
132            int x1 = detection.xmin * frame.cols;
133            int y1 = detection.ymin * frame.rows;
134            int x2 = detection.xmax * frame.cols;
135            int y2 = detection.ymax * frame.rows;
136
137            uint32_t labelIndex = detection.label;
138            std::string labelStr = to_string(labelIndex);
139            if(labelIndex < labelMap.size()) {
140                labelStr = labelMap[labelIndex];
141            }
142            cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
143            std::stringstream confStr;
144            confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
145            cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 35), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
146
147            std::stringstream depthX;
148            depthX << "X: " << (int)detection.spatialCoordinates.x << " mm";
149            cv::putText(frame, depthX.str(), cv::Point(x1 + 10, y1 + 50), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
150            std::stringstream depthY;
151            depthY << "Y: " << (int)detection.spatialCoordinates.y << " mm";
152            cv::putText(frame, depthY.str(), cv::Point(x1 + 10, y1 + 65), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
153            std::stringstream depthZ;
154            depthZ << "Z: " << (int)detection.spatialCoordinates.z << " mm";
155            cv::putText(frame, depthZ.str(), cv::Point(x1 + 10, y1 + 80), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
156
157            cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
158        }
159
160        std::stringstream fpsStr;
161        fpsStr << "NN fps: " << std::fixed << std::setprecision(2) << fps;
162        cv::putText(frame, fpsStr.str(), cv::Point(2, inPreview->getHeight() - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, color);
163
164        cv::imshow("depth", depthFrameColor);
165        cv::imshow("preview", frame);
166
167        int key = cv::waitKey(1);
168        if(key == 'q' || key == 'Q') {
169            return 0;
170        }
171    }
172    return 0;
173}

Pipeline

Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.

ON THIS PAGE

RGB & MobilenetSSD with spatial dataView as Markdown