Mono & MobilenetSSD with spatial data

This example shows how to run MobileNetv2SSD on the rectified right input frame, and how to display both the preview, detections, depth map and spatial information (X,Y,Z). It's similar to example RGB & MobilenetSSD except it has spatial data. X,Y,Z coordinates are relative to the center of depth map.setConfidenceThreshold - confidence threshold above which objects are detected
Similar samples:

Demo

Setup

This example requires the DepthAI v3 API, see installation instructions.
Source code

Python
C++
Python

GitHub
1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8import time
9
10'''
11Mobilenet SSD device side decoding demo
12  The "mobilenet-ssd" model is a Single-Shot multibox Detection (SSD) network intended
13  to perform object detection. This model is implemented using the Caffe* framework.
14  For details about this model, check out the repository <https://github.com/chuanqi305/MobileNet-SSD>.
15'''
16
17# Get argument first
18nnPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
19if len(sys.argv) > 1:
20    nnPath = sys.argv[1]
21
22if not Path(nnPath).exists():
23    import sys
24    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
25
26# MobilenetSSD label texts
27labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
28            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
29
30syncNN = True
31
32# Create pipeline
33pipeline = dai.Pipeline()
34
35# Define sources and outputs
36monoLeft = pipeline.create(dai.node.MonoCamera)
37monoRight = pipeline.create(dai.node.MonoCamera)
38stereo = pipeline.create(dai.node.StereoDepth)
39spatialDetectionNetwork = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork)
40imageManip = pipeline.create(dai.node.ImageManip)
41
42xoutManip = pipeline.create(dai.node.XLinkOut)
43nnOut = pipeline.create(dai.node.XLinkOut)
44xoutDepth = pipeline.create(dai.node.XLinkOut)
45
46xoutManip.setStreamName("right")
47nnOut.setStreamName("detections")
48xoutDepth.setStreamName("depth")
49
50# Properties
51imageManip.initialConfig.setResize(300, 300)
52# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
53imageManip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p)
54
55monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
56monoLeft.setCamera("left")
57monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
58monoRight.setCamera("right")
59
60# StereoDepth
61stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
62stereo.setSubpixel(True)
63
64# Define a neural network that will make predictions based on the source frames
65spatialDetectionNetwork.setConfidenceThreshold(0.5)
66spatialDetectionNetwork.setBlobPath(nnPath)
67spatialDetectionNetwork.input.setBlocking(False)
68spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
69spatialDetectionNetwork.setDepthLowerThreshold(100)
70spatialDetectionNetwork.setDepthUpperThreshold(5000)
71
72# Linking
73monoLeft.out.link(stereo.left)
74monoRight.out.link(stereo.right)
75
76imageManip.out.link(spatialDetectionNetwork.input)
77if syncNN:
78    spatialDetectionNetwork.passthrough.link(xoutManip.input)
79else:
80    imageManip.out.link(xoutManip.input)
81
82spatialDetectionNetwork.out.link(nnOut.input)
83
84stereo.rectifiedRight.link(imageManip.inputImage)
85stereo.depth.link(spatialDetectionNetwork.inputDepth)
86spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)
87
88# Connect to device and start pipeline
89with dai.Device(pipeline) as device:
90
91    # Output queues will be used to get the rgb frames and nn data from the outputs defined above
92    previewQueue = device.getOutputQueue(name="right", maxSize=4, blocking=False)
93    detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
94    depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)
95
96    rectifiedRight = None
97    detections = []
98
99    startTime = time.monotonic()
100    counter = 0
101    fps = 0
102    color = (255, 255, 255)
103
104    while True:
105        inRectified = previewQueue.get()
106        inDet = detectionNNQueue.get()
107        inDepth = depthQueue.get()
108
109        counter += 1
110        currentTime = time.monotonic()
111        if (currentTime - startTime) > 1:
112            fps = counter / (currentTime - startTime)
113            counter = 0
114            startTime = currentTime
115
116        rectifiedRight = inRectified.getCvFrame()
117
118        depthFrame = inDepth.getFrame() # depthFrame values are in millimeters
119
120        depth_downscaled = depthFrame[::4]
121        if np.all(depth_downscaled == 0):
122            min_depth = 0  # Set a default minimum depth value when all elements are zero
123        else:
124            min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
125        max_depth = np.percentile(depth_downscaled, 99)
126        depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
127        depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
128
129        detections = inDet.detections
130
131        # If the rectifiedRight is available, draw bounding boxes on it and show the rectifiedRight
132        height = rectifiedRight.shape[0]
133        width = rectifiedRight.shape[1]
134        for detection in detections:
135            roiData = detection.boundingBoxMapping
136            roi = roiData.roi
137            roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])
138            topLeft = roi.topLeft()
139            bottomRight = roi.bottomRight()
140            xmin = int(topLeft.x)
141            ymin = int(topLeft.y)
142            xmax = int(bottomRight.x)
143            ymax = int(bottomRight.y)
144            cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, cv2.FONT_HERSHEY_SCRIPT_SIMPLEX)
145
146            # Denormalize bounding box
147            x1 = int(detection.xmin * width)
148            x2 = int(detection.xmax * width)
149            y1 = int(detection.ymin * height)
150            y2 = int(detection.ymax * height)
151
152            try:
153                label = labelMap[detection.label]
154            except:
155                label = detection.label
156
157            cv2.putText(rectifiedRight, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
158            cv2.putText(rectifiedRight, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
159            cv2.putText(rectifiedRight, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
160            cv2.putText(rectifiedRight, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
161            cv2.putText(rectifiedRight, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
162
163            cv2.rectangle(rectifiedRight, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX)
164
165        cv2.putText(rectifiedRight, "NN fps: {:.2f}".format(fps), (2, rectifiedRight.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)
166        cv2.imshow("depth", depthFrameColor)
167        cv2.imshow("rectified right", rectifiedRight)
168
169        if cv2.waitKey(1) == ord('q'):
170            break
Pipeline

React Flow
Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.
ON THIS PAGE

Mono & MobilenetSSD with spatial data

Similar samples:

Demo

Setup

Source code

Python

Pipeline

Need assistance?