RGB & MobilenetSSD with spatial data
Similar samples:
- Spatial location calculator
- Spatial object tracker on RGB
- Mono & MobilenetSSD with spatial data
- RGB & TinyYolo with spatial data
Demo
Setup
Command Line
1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.pySource code
Python
C++
Python
PythonGitHub
1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8import time
9
10'''
11Spatial detection network demo.
12 Performs inference on RGB camera and retrieves spatial location coordinates: x,y,z relative to the center of depth map.
13'''
14
15# Get argument first
16nnBlobPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
17if len(sys.argv) > 1:
18 nnBlobPath = sys.argv[1]
19
20if not Path(nnBlobPath).exists():
21 import sys
22 raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
23
24# MobilenetSSD label texts
25labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
26 "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
27
28syncNN = True
29
30# Create pipeline
31pipeline = dai.Pipeline()
32
33# Define sources and outputs
34camRgb = pipeline.create(dai.node.ColorCamera)
35spatialDetectionNetwork = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork)
36monoLeft = pipeline.create(dai.node.MonoCamera)
37monoRight = pipeline.create(dai.node.MonoCamera)
38stereo = pipeline.create(dai.node.StereoDepth)
39
40xoutRgb = pipeline.create(dai.node.XLinkOut)
41xoutNN = pipeline.create(dai.node.XLinkOut)
42xoutDepth = pipeline.create(dai.node.XLinkOut)
43
44xoutRgb.setStreamName("rgb")
45xoutNN.setStreamName("detections")
46xoutDepth.setStreamName("depth")
47
48# Properties
49camRgb.setPreviewSize(300, 300)
50camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
51camRgb.setInterleaved(False)
52camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
53
54monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
55monoLeft.setCamera("left")
56monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
57monoRight.setCamera("right")
58
59# Setting node configs
60stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
61# Align depth map to the perspective of RGB camera, on which inference is done
62stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)
63stereo.setSubpixel(True)
64stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())
65
66spatialDetectionNetwork.setBlobPath(nnBlobPath)
67spatialDetectionNetwork.setConfidenceThreshold(0.5)
68spatialDetectionNetwork.input.setBlocking(False)
69spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
70spatialDetectionNetwork.setDepthLowerThreshold(100)
71spatialDetectionNetwork.setDepthUpperThreshold(5000)
72
73# Linking
74monoLeft.out.link(stereo.left)
75monoRight.out.link(stereo.right)
76
77camRgb.preview.link(spatialDetectionNetwork.input)
78if syncNN:
79 spatialDetectionNetwork.passthrough.link(xoutRgb.input)
80else:
81 camRgb.preview.link(xoutRgb.input)
82
83spatialDetectionNetwork.out.link(xoutNN.input)
84
85stereo.depth.link(spatialDetectionNetwork.inputDepth)
86spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)
87
88# Connect to device and start pipeline
89with dai.Device(pipeline) as device:
90
91 # Output queues will be used to get the rgb frames and nn data from the outputs defined above
92 previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
93 detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
94 depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)
95
96 startTime = time.monotonic()
97 counter = 0
98 fps = 0
99 color = (255, 255, 255)
100
101 while True:
102 inPreview = previewQueue.get()
103 inDet = detectionNNQueue.get()
104 depth = depthQueue.get()
105
106 counter+=1
107 current_time = time.monotonic()
108 if (current_time - startTime) > 1 :
109 fps = counter / (current_time - startTime)
110 counter = 0
111 startTime = current_time
112
113 frame = inPreview.getCvFrame()
114
115 depthFrame = depth.getFrame() # depthFrame values are in millimeters
116
117 depth_downscaled = depthFrame[::4]
118 if np.all(depth_downscaled == 0):
119 min_depth = 0 # Set a default minimum depth value when all elements are zero
120 else:
121 min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
122 max_depth = np.percentile(depth_downscaled, 99)
123 depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
124 depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
125
126 detections = inDet.detections
127
128 # If the frame is available, draw bounding boxes on it and show the frame
129 height = frame.shape[0]
130 width = frame.shape[1]
131 for detection in detections:
132 roiData = detection.boundingBoxMapping
133 roi = roiData.roi
134 roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])
135 topLeft = roi.topLeft()
136 bottomRight = roi.bottomRight()
137 xmin = int(topLeft.x)
138 ymin = int(topLeft.y)
139 xmax = int(bottomRight.x)
140 ymax = int(bottomRight.y)
141 cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1)
142
143 # Denormalize bounding box
144 x1 = int(detection.xmin * width)
145 x2 = int(detection.xmax * width)
146 y1 = int(detection.ymin * height)
147 y2 = int(detection.ymax * height)
148 try:
149 label = labelMap[detection.label]
150 except:
151 label = detection.label
152 cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
153 cv2.putText(frame, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
154 cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
155 cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
156 cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
157
158 cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), cv2.FONT_HERSHEY_SIMPLEX)
159
160 cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, (255,255,255))
161 cv2.imshow("depth", depthFrameColor)
162 cv2.imshow("preview", frame)
163
164 if cv2.waitKey(1) == ord('q'):
165 breakPipeline
Need assistance?
Head over to Discussion Forum for technical support or any other questions you might have.