DepthAI v2 has been superseded by DepthAI v3. You are viewing legacy documentation.
DepthAI Tutorials
DepthAI API References

ON THIS PAGE

  • Demo
  • Setup
  • Source code
  • Pipeline

Mono & MobilenetSSD with spatial data

This example shows how to run MobileNetv2SSD on the rectified right input frame, and how to display both the preview, detections, depth map and spatial information (X,Y,Z). It's similar to example RGB & MobilenetSSD except it has spatial data. X,Y,Z coordinates are relative to the center of depth map.setConfidenceThreshold - confidence threshold above which objects are detected

Similar samples:

Demo

Setup

This example requires the DepthAI v3 API, see installation instructions.

Source code

Python

Python
GitHub
1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8import time
9
10'''
11Mobilenet SSD device side decoding demo
12  The "mobilenet-ssd" model is a Single-Shot multibox Detection (SSD) network intended
13  to perform object detection. This model is implemented using the Caffe* framework.
14  For details about this model, check out the repository <https://github.com/chuanqi305/MobileNet-SSD>.
15'''
16
17# Get argument first
18nnPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
19if len(sys.argv) > 1:
20    nnPath = sys.argv[1]
21
22if not Path(nnPath).exists():
23    import sys
24    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
25
26# MobilenetSSD label texts
27labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
28            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
29
30syncNN = True
31
32# Create pipeline
33pipeline = dai.Pipeline()
34
35# Define sources and outputs
36monoLeft = pipeline.create(dai.node.MonoCamera)
37monoRight = pipeline.create(dai.node.MonoCamera)
38stereo = pipeline.create(dai.node.StereoDepth)
39spatialDetectionNetwork = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork)
40imageManip = pipeline.create(dai.node.ImageManip)
41
42xoutManip = pipeline.create(dai.node.XLinkOut)
43nnOut = pipeline.create(dai.node.XLinkOut)
44xoutDepth = pipeline.create(dai.node.XLinkOut)
45
46xoutManip.setStreamName("right")
47nnOut.setStreamName("detections")
48xoutDepth.setStreamName("depth")
49
50# Properties
51imageManip.initialConfig.setResize(300, 300)
52# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
53imageManip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p)
54
55monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
56monoLeft.setCamera("left")
57monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
58monoRight.setCamera("right")
59
60# StereoDepth
61stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
62stereo.setSubpixel(True)
63
64# Define a neural network that will make predictions based on the source frames
65spatialDetectionNetwork.setConfidenceThreshold(0.5)
66spatialDetectionNetwork.setBlobPath(nnPath)
67spatialDetectionNetwork.input.setBlocking(False)
68spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
69spatialDetectionNetwork.setDepthLowerThreshold(100)
70spatialDetectionNetwork.setDepthUpperThreshold(5000)
71
72# Linking
73monoLeft.out.link(stereo.left)
74monoRight.out.link(stereo.right)
75
76imageManip.out.link(spatialDetectionNetwork.input)
77if syncNN:
78    spatialDetectionNetwork.passthrough.link(xoutManip.input)
79else:
80    imageManip.out.link(xoutManip.input)
81
82spatialDetectionNetwork.out.link(nnOut.input)
83
84stereo.rectifiedRight.link(imageManip.inputImage)
85stereo.depth.link(spatialDetectionNetwork.inputDepth)
86spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)
87
88# Connect to device and start pipeline
89with dai.Device(pipeline) as device:
90
91    # Output queues will be used to get the rgb frames and nn data from the outputs defined above
92    previewQueue = device.getOutputQueue(name="right", maxSize=4, blocking=False)
93    detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
94    depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)
95
96    rectifiedRight = None
97    detections = []
98
99    startTime = time.monotonic()
100    counter = 0
101    fps = 0
102    color = (255, 255, 255)
103
104    while True:
105        inRectified = previewQueue.get()
106        inDet = detectionNNQueue.get()
107        inDepth = depthQueue.get()
108
109        counter += 1
110        currentTime = time.monotonic()
111        if (currentTime - startTime) > 1:
112            fps = counter / (currentTime - startTime)
113            counter = 0
114            startTime = currentTime
115
116        rectifiedRight = inRectified.getCvFrame()
117
118        depthFrame = inDepth.getFrame() # depthFrame values are in millimeters
119
120        depth_downscaled = depthFrame[::4]
121        if np.all(depth_downscaled == 0):
122            min_depth = 0  # Set a default minimum depth value when all elements are zero
123        else:
124            min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
125        max_depth = np.percentile(depth_downscaled, 99)
126        depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
127        depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
128
129        detections = inDet.detections
130
131        # If the rectifiedRight is available, draw bounding boxes on it and show the rectifiedRight
132        height = rectifiedRight.shape[0]
133        width = rectifiedRight.shape[1]
134        for detection in detections:
135            roiData = detection.boundingBoxMapping
136            roi = roiData.roi
137            roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])
138            topLeft = roi.topLeft()
139            bottomRight = roi.bottomRight()
140            xmin = int(topLeft.x)
141            ymin = int(topLeft.y)
142            xmax = int(bottomRight.x)
143            ymax = int(bottomRight.y)
144            cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, cv2.FONT_HERSHEY_SCRIPT_SIMPLEX)
145
146            # Denormalize bounding box
147            x1 = int(detection.xmin * width)
148            x2 = int(detection.xmax * width)
149            y1 = int(detection.ymin * height)
150            y2 = int(detection.ymax * height)
151
152            try:
153                label = labelMap[detection.label]
154            except:
155                label = detection.label
156
157            cv2.putText(rectifiedRight, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
158            cv2.putText(rectifiedRight, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
159            cv2.putText(rectifiedRight, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
160            cv2.putText(rectifiedRight, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
161            cv2.putText(rectifiedRight, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
162
163            cv2.rectangle(rectifiedRight, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX)
164
165        cv2.putText(rectifiedRight, "NN fps: {:.2f}".format(fps), (2, rectifiedRight.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)
166        cv2.imshow("depth", depthFrameColor)
167        cv2.imshow("rectified right", rectifiedRight)
168
169        if cv2.waitKey(1) == ord('q'):
170            break

C++

1#include <chrono>
2#include <iostream>
3
4// Includes common necessary includes for development using depthai library
5#include "depthai/depthai.hpp"
6
7static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle",     "bird",  "boat",        "bottle", "bus",
8                                                  "car",        "cat",       "chair",       "cow",   "diningtable", "dog",    "horse",
9                                                  "motorbike",  "person",    "pottedplant", "sheep", "sofa",        "train",  "tvmonitor"};
10
11static std::atomic<bool> syncNN{true};
12
13int main(int argc, char** argv) {
14    using namespace std;
15    using namespace std::chrono;
16    std::string nnPath(BLOB_PATH);
17
18    // If path to blob specified, use that
19    if(argc > 1) {
20        nnPath = std::string(argv[1]);
21    }
22
23    // Print which blob we are using
24    printf("Using blob at path: %s\n", nnPath.c_str());
25
26    // Create pipeline
27    dai::Pipeline pipeline;
28
29    // Define sources and outputs
30    auto monoLeft = pipeline.create<dai::node::MonoCamera>();
31    auto monoRight = pipeline.create<dai::node::MonoCamera>();
32    auto stereo = pipeline.create<dai::node::StereoDepth>();
33    auto spatialDetectionNetwork = pipeline.create<dai::node::MobileNetSpatialDetectionNetwork>();
34    auto imageManip = pipeline.create<dai::node::ImageManip>();
35
36    auto xoutManip = pipeline.create<dai::node::XLinkOut>();
37    auto nnOut = pipeline.create<dai::node::XLinkOut>();
38    auto xoutDepth = pipeline.create<dai::node::XLinkOut>();
39
40    xoutManip->setStreamName("right");
41    nnOut->setStreamName("detections");
42    xoutDepth->setStreamName("depth");
43
44    // Properties
45    imageManip->initialConfig.setResize(300, 300);
46    // The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
47    imageManip->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p);
48
49    monoLeft->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);
50    monoLeft->setCamera("left");
51    monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);
52    monoRight->setCamera("right");
53
54    // StereoDepth
55    stereo->setDefaultProfilePreset(dai::node::StereoDepth::PresetMode::HIGH_DENSITY);
56
57    // Define a neural network that will make predictions based on the source frames
58    spatialDetectionNetwork->setConfidenceThreshold(0.5f);
59    spatialDetectionNetwork->setBlobPath(nnPath);
60    spatialDetectionNetwork->input.setBlocking(false);
61    spatialDetectionNetwork->setBoundingBoxScaleFactor(0.5);
62    spatialDetectionNetwork->setDepthLowerThreshold(100);
63    spatialDetectionNetwork->setDepthUpperThreshold(5000);
64
65    // Linking
66    monoLeft->out.link(stereo->left);
67    monoRight->out.link(stereo->right);
68
69    imageManip->out.link(spatialDetectionNetwork->input);
70    if(syncNN) {
71        spatialDetectionNetwork->passthrough.link(xoutManip->input);
72    } else {
73        imageManip->out.link(xoutManip->input);
74    }
75
76    spatialDetectionNetwork->out.link(nnOut->input);
77
78    stereo->rectifiedRight.link(imageManip->inputImage);
79    stereo->depth.link(spatialDetectionNetwork->inputDepth);
80    spatialDetectionNetwork->passthroughDepth.link(xoutDepth->input);
81
82    // Connect to device and start pipeline
83    dai::Device device(pipeline);
84
85    // Output queues will be used to get the rgb frames and nn data from the outputs defined above
86    auto previewQueue = device.getOutputQueue("right", 4, false);
87    auto detectionNNQueue = device.getOutputQueue("detections", 4, false);
88    auto depthQueue = device.getOutputQueue("depth", 4, false);
89
90    auto startTime = steady_clock::now();
91    int counter = 0;
92    float fps = 0;
93    auto color = cv::Scalar(255, 255, 255);
94
95    while(true) {
96        auto inRectified = previewQueue->get<dai::ImgFrame>();
97        auto inDet = detectionNNQueue->get<dai::SpatialImgDetections>();
98        auto inDepth = depthQueue->get<dai::ImgFrame>();
99
100        counter++;
101        auto currentTime = steady_clock::now();
102        auto elapsed = duration_cast<duration<float>>(currentTime - startTime);
103        if(elapsed > seconds(1)) {
104            fps = counter / elapsed.count();
105            counter = 0;
106            startTime = currentTime;
107        }
108
109        cv::Mat rectifiedRight = inRectified->getCvFrame();
110
111        cv::Mat depthFrame = inDepth->getFrame();  // depthFrame values are in millimeters
112        cv::Mat depthFrameColor;
113
114        cv::normalize(depthFrame, depthFrameColor, 255, 0, cv::NORM_INF, CV_8UC1);
115        cv::equalizeHist(depthFrameColor, depthFrameColor);
116        cv::applyColorMap(depthFrameColor, depthFrameColor, cv::COLORMAP_HOT);
117
118        auto detections = inDet->detections;
119
120        for(auto& detection : detections) {
121            auto roiData = detection.boundingBoxMapping;
122            auto roi = roiData.roi;
123            roi = roi.denormalize(depthFrameColor.cols, depthFrameColor.rows);
124            auto topLeft = roi.topLeft();
125            auto bottomRight = roi.bottomRight();
126            auto xmin = (int)topLeft.x;
127            auto ymin = (int)topLeft.y;
128            auto xmax = (int)bottomRight.x;
129            auto ymax = (int)bottomRight.y;
130            cv::rectangle(depthFrameColor, cv::Rect(cv::Point(xmin, ymin), cv::Point(xmax, ymax)), color, cv::FONT_HERSHEY_SIMPLEX);
131
132            int x1 = detection.xmin * rectifiedRight.cols;
133            int y1 = detection.ymin * rectifiedRight.rows;
134            int x2 = detection.xmax * rectifiedRight.cols;
135            int y2 = detection.ymax * rectifiedRight.rows;
136
137            uint32_t labelIndex = detection.label;
138            std::string labelStr = to_string(labelIndex);
139            if(labelIndex < labelMap.size()) {
140                labelStr = labelMap[labelIndex];
141            }
142            cv::putText(rectifiedRight, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
143            std::stringstream confStr;
144            confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
145            cv::putText(rectifiedRight, confStr.str(), cv::Point(x1 + 10, y1 + 35), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
146
147            std::stringstream depthX;
148            depthX << "X: " << (int)detection.spatialCoordinates.x << " mm";
149            cv::putText(rectifiedRight, depthX.str(), cv::Point(x1 + 10, y1 + 50), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
150            std::stringstream depthY;
151            depthY << "Y: " << (int)detection.spatialCoordinates.y << " mm";
152            cv::putText(rectifiedRight, depthY.str(), cv::Point(x1 + 10, y1 + 65), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
153            std::stringstream depthZ;
154            depthZ << "Z: " << (int)detection.spatialCoordinates.z << " mm";
155            cv::putText(rectifiedRight, depthZ.str(), cv::Point(x1 + 10, y1 + 80), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
156
157            cv::rectangle(rectifiedRight, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
158        }
159
160        std::stringstream fpsStr;
161        fpsStr << std::fixed << std::setprecision(2) << fps;
162        cv::putText(rectifiedRight, fpsStr.str(), cv::Point(2, rectifiedRight.rows - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, color);
163
164        cv::imshow("depth", depthFrameColor);
165        cv::imshow("rectified right", rectifiedRight);
166
167        int key = cv::waitKey(1);
168        if(key == 'q' || key == 'Q') {
169            return 0;
170        }
171    }
172    return 0;
173}

Pipeline

Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.