RGB & MobilenetSSD with spatial data
This example shows how to run MobileNetv2SSD on the RGB input frame, and how to display both the RGB preview, detections, depth map and spatial information (X,Y,Z). It's similar to example RGB & MobilenetSSD except it has spatial data. X,Y,Z coordinates are relative to the center of depth map.setConfidenceThreshold - confidence threshold above which objects are detectedSimilar samples:
- Spatial location calculator
- Spatial object tracker on RGB
- Mono & MobilenetSSD with spatial data
- RGB & TinyYolo with spatial data
Demo
Setup
Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the scriptCommand Line
1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py
Source code
Python
C++
Python
PythonGitHub
1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8import time
9
10'''
11Spatial detection network demo.
12 Performs inference on RGB camera and retrieves spatial location coordinates: x,y,z relative to the center of depth map.
13'''
14
15# Get argument first
16nnBlobPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
17if len(sys.argv) > 1:
18 nnBlobPath = sys.argv[1]
19
20if not Path(nnBlobPath).exists():
21 import sys
22 raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
23
24# MobilenetSSD label texts
25labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
26 "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
27
28syncNN = True
29
30# Create pipeline
31pipeline = dai.Pipeline()
32
33# Define sources and outputs
34camRgb = pipeline.create(dai.node.ColorCamera)
35spatialDetectionNetwork = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork)
36monoLeft = pipeline.create(dai.node.MonoCamera)
37monoRight = pipeline.create(dai.node.MonoCamera)
38stereo = pipeline.create(dai.node.StereoDepth)
39
40xoutRgb = pipeline.create(dai.node.XLinkOut)
41xoutNN = pipeline.create(dai.node.XLinkOut)
42xoutDepth = pipeline.create(dai.node.XLinkOut)
43
44xoutRgb.setStreamName("rgb")
45xoutNN.setStreamName("detections")
46xoutDepth.setStreamName("depth")
47
48# Properties
49camRgb.setPreviewSize(300, 300)
50camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
51camRgb.setInterleaved(False)
52camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
53
54monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
55monoLeft.setCamera("left")
56monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
57monoRight.setCamera("right")
58
59# Setting node configs
60stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
61# Align depth map to the perspective of RGB camera, on which inference is done
62stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)
63stereo.setSubpixel(True)
64stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())
65
66spatialDetectionNetwork.setBlobPath(nnBlobPath)
67spatialDetectionNetwork.setConfidenceThreshold(0.5)
68spatialDetectionNetwork.input.setBlocking(False)
69spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
70spatialDetectionNetwork.setDepthLowerThreshold(100)
71spatialDetectionNetwork.setDepthUpperThreshold(5000)
72
73# Linking
74monoLeft.out.link(stereo.left)
75monoRight.out.link(stereo.right)
76
77camRgb.preview.link(spatialDetectionNetwork.input)
78if syncNN:
79 spatialDetectionNetwork.passthrough.link(xoutRgb.input)
80else:
81 camRgb.preview.link(xoutRgb.input)
82
83spatialDetectionNetwork.out.link(xoutNN.input)
84
85stereo.depth.link(spatialDetectionNetwork.inputDepth)
86spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)
87
88# Connect to device and start pipeline
89with dai.Device(pipeline) as device:
90
91 # Output queues will be used to get the rgb frames and nn data from the outputs defined above
92 previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
93 detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
94 depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)
95
96 startTime = time.monotonic()
97 counter = 0
98 fps = 0
99 color = (255, 255, 255)
100
101 while True:
102 inPreview = previewQueue.get()
103 inDet = detectionNNQueue.get()
104 depth = depthQueue.get()
105
106 counter+=1
107 current_time = time.monotonic()
108 if (current_time - startTime) > 1 :
109 fps = counter / (current_time - startTime)
110 counter = 0
111 startTime = current_time
112
113 frame = inPreview.getCvFrame()
114
115 depthFrame = depth.getFrame() # depthFrame values are in millimeters
116
117 depth_downscaled = depthFrame[::4]
118 if np.all(depth_downscaled == 0):
119 min_depth = 0 # Set a default minimum depth value when all elements are zero
120 else:
121 min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
122 max_depth = np.percentile(depth_downscaled, 99)
123 depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
124 depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
125
126 detections = inDet.detections
127
128 # If the frame is available, draw bounding boxes on it and show the frame
129 height = frame.shape[0]
130 width = frame.shape[1]
131 for detection in detections:
132 roiData = detection.boundingBoxMapping
133 roi = roiData.roi
134 roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])
135 topLeft = roi.topLeft()
136 bottomRight = roi.bottomRight()
137 xmin = int(topLeft.x)
138 ymin = int(topLeft.y)
139 xmax = int(bottomRight.x)
140 ymax = int(bottomRight.y)
141 cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1)
142
143 # Denormalize bounding box
144 x1 = int(detection.xmin * width)
145 x2 = int(detection.xmax * width)
146 y1 = int(detection.ymin * height)
147 y2 = int(detection.ymax * height)
148 try:
149 label = labelMap[detection.label]
150 except:
151 label = detection.label
152 cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
153 cv2.putText(frame, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
154 cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
155 cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
156 cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
157
158 cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), cv2.FONT_HERSHEY_SIMPLEX)
159
160 cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, (255,255,255))
161 cv2.imshow("depth", depthFrameColor)
162 cv2.imshow("preview", frame)
163
164 if cv2.waitKey(1) == ord('q'):
165 break
Pipeline
Need assistance?
Head over to Discussion Forum for technical support or any other questions you might have.