Mono & MobilenetSSD with spatial data
Similar samples:
- Spatial location calculator
- Spatial object tracker on RGB
- RGB & MobilenetSSD with spatial data
- RGB & TinyYolo with spatial data
Demo
Setup
Source code
Python
C++
Python
PythonGitHub
1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8import time
9
10'''
11Mobilenet SSD device side decoding demo
12 The "mobilenet-ssd" model is a Single-Shot multibox Detection (SSD) network intended
13 to perform object detection. This model is implemented using the Caffe* framework.
14 For details about this model, check out the repository <https://github.com/chuanqi305/MobileNet-SSD>.
15'''
16
17# Get argument first
18nnPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
19if len(sys.argv) > 1:
20 nnPath = sys.argv[1]
21
22if not Path(nnPath).exists():
23 import sys
24 raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
25
26# MobilenetSSD label texts
27labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
28 "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
29
30syncNN = True
31
32# Create pipeline
33pipeline = dai.Pipeline()
34
35# Define sources and outputs
36monoLeft = pipeline.create(dai.node.MonoCamera)
37monoRight = pipeline.create(dai.node.MonoCamera)
38stereo = pipeline.create(dai.node.StereoDepth)
39spatialDetectionNetwork = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork)
40imageManip = pipeline.create(dai.node.ImageManip)
41
42xoutManip = pipeline.create(dai.node.XLinkOut)
43nnOut = pipeline.create(dai.node.XLinkOut)
44xoutDepth = pipeline.create(dai.node.XLinkOut)
45
46xoutManip.setStreamName("right")
47nnOut.setStreamName("detections")
48xoutDepth.setStreamName("depth")
49
50# Properties
51imageManip.initialConfig.setResize(300, 300)
52# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
53imageManip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p)
54
55monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
56monoLeft.setCamera("left")
57monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
58monoRight.setCamera("right")
59
60# StereoDepth
61stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
62stereo.setSubpixel(True)
63
64# Define a neural network that will make predictions based on the source frames
65spatialDetectionNetwork.setConfidenceThreshold(0.5)
66spatialDetectionNetwork.setBlobPath(nnPath)
67spatialDetectionNetwork.input.setBlocking(False)
68spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
69spatialDetectionNetwork.setDepthLowerThreshold(100)
70spatialDetectionNetwork.setDepthUpperThreshold(5000)
71
72# Linking
73monoLeft.out.link(stereo.left)
74monoRight.out.link(stereo.right)
75
76imageManip.out.link(spatialDetectionNetwork.input)
77if syncNN:
78 spatialDetectionNetwork.passthrough.link(xoutManip.input)
79else:
80 imageManip.out.link(xoutManip.input)
81
82spatialDetectionNetwork.out.link(nnOut.input)
83
84stereo.rectifiedRight.link(imageManip.inputImage)
85stereo.depth.link(spatialDetectionNetwork.inputDepth)
86spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)
87
88# Connect to device and start pipeline
89with dai.Device(pipeline) as device:
90
91 # Output queues will be used to get the rgb frames and nn data from the outputs defined above
92 previewQueue = device.getOutputQueue(name="right", maxSize=4, blocking=False)
93 detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
94 depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)
95
96 rectifiedRight = None
97 detections = []
98
99 startTime = time.monotonic()
100 counter = 0
101 fps = 0
102 color = (255, 255, 255)
103
104 while True:
105 inRectified = previewQueue.get()
106 inDet = detectionNNQueue.get()
107 inDepth = depthQueue.get()
108
109 counter += 1
110 currentTime = time.monotonic()
111 if (currentTime - startTime) > 1:
112 fps = counter / (currentTime - startTime)
113 counter = 0
114 startTime = currentTime
115
116 rectifiedRight = inRectified.getCvFrame()
117
118 depthFrame = inDepth.getFrame() # depthFrame values are in millimeters
119
120 depth_downscaled = depthFrame[::4]
121 if np.all(depth_downscaled == 0):
122 min_depth = 0 # Set a default minimum depth value when all elements are zero
123 else:
124 min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
125 max_depth = np.percentile(depth_downscaled, 99)
126 depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
127 depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
128
129 detections = inDet.detections
130
131 # If the rectifiedRight is available, draw bounding boxes on it and show the rectifiedRight
132 height = rectifiedRight.shape[0]
133 width = rectifiedRight.shape[1]
134 for detection in detections:
135 roiData = detection.boundingBoxMapping
136 roi = roiData.roi
137 roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])
138 topLeft = roi.topLeft()
139 bottomRight = roi.bottomRight()
140 xmin = int(topLeft.x)
141 ymin = int(topLeft.y)
142 xmax = int(bottomRight.x)
143 ymax = int(bottomRight.y)
144 cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, cv2.FONT_HERSHEY_SCRIPT_SIMPLEX)
145
146 # Denormalize bounding box
147 x1 = int(detection.xmin * width)
148 x2 = int(detection.xmax * width)
149 y1 = int(detection.ymin * height)
150 y2 = int(detection.ymax * height)
151
152 try:
153 label = labelMap[detection.label]
154 except:
155 label = detection.label
156
157 cv2.putText(rectifiedRight, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
158 cv2.putText(rectifiedRight, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
159 cv2.putText(rectifiedRight, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
160 cv2.putText(rectifiedRight, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
161 cv2.putText(rectifiedRight, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
162
163 cv2.rectangle(rectifiedRight, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX)
164
165 cv2.putText(rectifiedRight, "NN fps: {:.2f}".format(fps), (2, rectifiedRight.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)
166 cv2.imshow("depth", depthFrameColor)
167 cv2.imshow("rectified right", rectifiedRight)
168
169 if cv2.waitKey(1) == ord('q'):
170 breakPipeline
Need assistance?
Head over to Discussion Forum for technical support or any other questions you might have.