Mono & MobilenetSSD with spatial data
This example shows how to run MobileNetv2SSD on the rectified right input frame, and how to display both the preview, detections, depth map and spatial information (X,Y,Z). It's similar to example RGB & MobilenetSSD except it has spatial data. X,Y,Z coordinates are relative to the center of depth map.setConfidenceThreshold - confidence threshold above which objects are detectedSimilar samples:
- Spatial location calculator
- Spatial object tracker on RGB
- RGB & MobilenetSSD with spatial data
- RGB & TinyYolo with spatial data
Demo
Setup
Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the scriptCommand Line
1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py
Source code
Python
C++
Python
PythonGitHub
1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8import time
9
10'''
11Mobilenet SSD device side decoding demo
12 The "mobilenet-ssd" model is a Single-Shot multibox Detection (SSD) network intended
13 to perform object detection. This model is implemented using the Caffe* framework.
14 For details about this model, check out the repository <https://github.com/chuanqi305/MobileNet-SSD>.
15'''
16
17# Get argument first
18nnPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
19if len(sys.argv) > 1:
20 nnPath = sys.argv[1]
21
22if not Path(nnPath).exists():
23 import sys
24 raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
25
26# MobilenetSSD label texts
27labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
28 "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
29
30syncNN = True
31
32# Create pipeline
33pipeline = dai.Pipeline()
34
35# Define sources and outputs
36monoLeft = pipeline.create(dai.node.MonoCamera)
37monoRight = pipeline.create(dai.node.MonoCamera)
38stereo = pipeline.create(dai.node.StereoDepth)
39spatialDetectionNetwork = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork)
40imageManip = pipeline.create(dai.node.ImageManip)
41
42xoutManip = pipeline.create(dai.node.XLinkOut)
43nnOut = pipeline.create(dai.node.XLinkOut)
44xoutDepth = pipeline.create(dai.node.XLinkOut)
45
46xoutManip.setStreamName("right")
47nnOut.setStreamName("detections")
48xoutDepth.setStreamName("depth")
49
50# Properties
51imageManip.initialConfig.setResize(300, 300)
52# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
53imageManip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p)
54
55monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
56monoLeft.setCamera("left")
57monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
58monoRight.setCamera("right")
59
60# StereoDepth
61stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
62stereo.setSubpixel(True)
63
64# Define a neural network that will make predictions based on the source frames
65spatialDetectionNetwork.setConfidenceThreshold(0.5)
66spatialDetectionNetwork.setBlobPath(nnPath)
67spatialDetectionNetwork.input.setBlocking(False)
68spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
69spatialDetectionNetwork.setDepthLowerThreshold(100)
70spatialDetectionNetwork.setDepthUpperThreshold(5000)
71
72# Linking
73monoLeft.out.link(stereo.left)
74monoRight.out.link(stereo.right)
75
76imageManip.out.link(spatialDetectionNetwork.input)
77if syncNN:
78 spatialDetectionNetwork.passthrough.link(xoutManip.input)
79else:
80 imageManip.out.link(xoutManip.input)
81
82spatialDetectionNetwork.out.link(nnOut.input)
83
84stereo.rectifiedRight.link(imageManip.inputImage)
85stereo.depth.link(spatialDetectionNetwork.inputDepth)
86spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)
87
88# Connect to device and start pipeline
89with dai.Device(pipeline) as device:
90
91 # Output queues will be used to get the rgb frames and nn data from the outputs defined above
92 previewQueue = device.getOutputQueue(name="right", maxSize=4, blocking=False)
93 detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
94 depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)
95
96 rectifiedRight = None
97 detections = []
98
99 startTime = time.monotonic()
100 counter = 0
101 fps = 0
102 color = (255, 255, 255)
103
104 while True:
105 inRectified = previewQueue.get()
106 inDet = detectionNNQueue.get()
107 inDepth = depthQueue.get()
108
109 counter += 1
110 currentTime = time.monotonic()
111 if (currentTime - startTime) > 1:
112 fps = counter / (currentTime - startTime)
113 counter = 0
114 startTime = currentTime
115
116 rectifiedRight = inRectified.getCvFrame()
117
118 depthFrame = inDepth.getFrame() # depthFrame values are in millimeters
119
120 depth_downscaled = depthFrame[::4]
121 if np.all(depth_downscaled == 0):
122 min_depth = 0 # Set a default minimum depth value when all elements are zero
123 else:
124 min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
125 max_depth = np.percentile(depth_downscaled, 99)
126 depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
127 depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
128
129 detections = inDet.detections
130
131 # If the rectifiedRight is available, draw bounding boxes on it and show the rectifiedRight
132 height = rectifiedRight.shape[0]
133 width = rectifiedRight.shape[1]
134 for detection in detections:
135 roiData = detection.boundingBoxMapping
136 roi = roiData.roi
137 roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])
138 topLeft = roi.topLeft()
139 bottomRight = roi.bottomRight()
140 xmin = int(topLeft.x)
141 ymin = int(topLeft.y)
142 xmax = int(bottomRight.x)
143 ymax = int(bottomRight.y)
144 cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, cv2.FONT_HERSHEY_SCRIPT_SIMPLEX)
145
146 # Denormalize bounding box
147 x1 = int(detection.xmin * width)
148 x2 = int(detection.xmax * width)
149 y1 = int(detection.ymin * height)
150 y2 = int(detection.ymax * height)
151
152 try:
153 label = labelMap[detection.label]
154 except:
155 label = detection.label
156
157 cv2.putText(rectifiedRight, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
158 cv2.putText(rectifiedRight, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
159 cv2.putText(rectifiedRight, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
160 cv2.putText(rectifiedRight, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
161 cv2.putText(rectifiedRight, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
162
163 cv2.rectangle(rectifiedRight, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX)
164
165 cv2.putText(rectifiedRight, "NN fps: {:.2f}".format(fps), (2, rectifiedRight.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)
166 cv2.imshow("depth", depthFrameColor)
167 cv2.imshow("rectified right", rectifiedRight)
168
169 if cv2.waitKey(1) == ord('q'):
170 break
Pipeline
Need assistance?
Head over to Discussion Forum for technical support or any other questions you might have.