RGB & TinyYolo with spatial data
This example shows how to run Yolo on the RGB input frame, and how to display both the RGB preview, detections, depth map and spatial information (X,Y,Z). It's similar to example RGB & MobilenetSSD with spatial data except it is running TinyYolo network. X,Y,Z coordinates are relative to the center of depth map.setNumClasses - number of YOLO classes setCoordinateSize - size of coordinate setAnchors - yolo anchors setAnchorMasks - anchorMasks26, anchorMasks13 (anchorMasks52 - additionally for full YOLOv4) setIouThreshold - intersection over union threshold setConfidenceThreshold - confidence threshold above which objects are detectedSimilar samples:
- Spatial location calculator
- Spatial object tracker on RGB
- RGB & MobilenetSSD with spatial data
- Mono & MobilenetSSD with spatial data
Demo
Setup
Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the scriptCommand Line
1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py
Source code
Python
C++
Python
PythonGitHub
1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8import time
9
10'''
11Spatial Tiny-yolo example
12 Performs inference on RGB camera and retrieves spatial location coordinates: x,y,z relative to the center of depth map.
13 Can be used for tiny-yolo-v3 or tiny-yolo-v4 networks
14'''
15
16# Get argument first
17nnBlobPath = str((Path(__file__).parent / Path('../models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute())
18if 1 < len(sys.argv):
19 arg = sys.argv[1]
20 if arg == "yolo3":
21 nnBlobPath = str((Path(__file__).parent / Path('../models/yolo-v3-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute())
22 elif arg == "yolo4":
23 nnBlobPath = str((Path(__file__).parent / Path('../models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute())
24 else:
25 nnBlobPath = arg
26else:
27 print("Using Tiny YoloV4 model. If you wish to use Tiny YOLOv3, call 'tiny_yolo.py yolo3'")
28
29if not Path(nnBlobPath).exists():
30 import sys
31 raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
32
33# Tiny yolo v3/4 label texts
34labelMap = [
35 "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train",
36 "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
37 "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
38 "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie",
39 "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
40 "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
41 "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
42 "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
43 "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor",
44 "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
45 "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
46 "teddy bear", "hair drier", "toothbrush"
47]
48
49syncNN = True
50
51# Create pipeline
52pipeline = dai.Pipeline()
53
54# Define sources and outputs
55camRgb = pipeline.create(dai.node.ColorCamera)
56spatialDetectionNetwork = pipeline.create(dai.node.YoloSpatialDetectionNetwork)
57monoLeft = pipeline.create(dai.node.MonoCamera)
58monoRight = pipeline.create(dai.node.MonoCamera)
59stereo = pipeline.create(dai.node.StereoDepth)
60nnNetworkOut = pipeline.create(dai.node.XLinkOut)
61
62xoutRgb = pipeline.create(dai.node.XLinkOut)
63xoutNN = pipeline.create(dai.node.XLinkOut)
64xoutDepth = pipeline.create(dai.node.XLinkOut)
65
66xoutRgb.setStreamName("rgb")
67xoutNN.setStreamName("detections")
68xoutDepth.setStreamName("depth")
69nnNetworkOut.setStreamName("nnNetwork")
70
71# Properties
72camRgb.setPreviewSize(416, 416)
73camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
74camRgb.setInterleaved(False)
75camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
76
77monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
78monoLeft.setCamera("left")
79monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
80monoRight.setCamera("right")
81
82# setting node configs
83stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
84# Align depth map to the perspective of RGB camera, on which inference is done
85stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)
86stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())
87stereo.setSubpixel(True)
88
89spatialDetectionNetwork.setBlobPath(nnBlobPath)
90spatialDetectionNetwork.setConfidenceThreshold(0.5)
91spatialDetectionNetwork.input.setBlocking(False)
92spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
93spatialDetectionNetwork.setDepthLowerThreshold(100)
94spatialDetectionNetwork.setDepthUpperThreshold(5000)
95
96# Yolo specific parameters
97spatialDetectionNetwork.setNumClasses(80)
98spatialDetectionNetwork.setCoordinateSize(4)
99spatialDetectionNetwork.setAnchors([10,14, 23,27, 37,58, 81,82, 135,169, 344,319])
100spatialDetectionNetwork.setAnchorMasks({ "side26": [1,2,3], "side13": [3,4,5] })
101spatialDetectionNetwork.setIouThreshold(0.5)
102
103# Linking
104monoLeft.out.link(stereo.left)
105monoRight.out.link(stereo.right)
106
107camRgb.preview.link(spatialDetectionNetwork.input)
108if syncNN:
109 spatialDetectionNetwork.passthrough.link(xoutRgb.input)
110else:
111 camRgb.preview.link(xoutRgb.input)
112
113spatialDetectionNetwork.out.link(xoutNN.input)
114
115stereo.depth.link(spatialDetectionNetwork.inputDepth)
116spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)
117spatialDetectionNetwork.outNetwork.link(nnNetworkOut.input)
118
119# Connect to device and start pipeline
120with dai.Device(pipeline) as device:
121
122 # Output queues will be used to get the rgb frames and nn data from the outputs defined above
123 previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
124 detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
125 depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)
126 networkQueue = device.getOutputQueue(name="nnNetwork", maxSize=4, blocking=False)
127
128 startTime = time.monotonic()
129 counter = 0
130 fps = 0
131 color = (255, 255, 255)
132 printOutputLayersOnce = True
133
134 while True:
135 inPreview = previewQueue.get()
136 inDet = detectionNNQueue.get()
137 depth = depthQueue.get()
138 inNN = networkQueue.get()
139
140 if printOutputLayersOnce:
141 toPrint = 'Output layer names:'
142 for ten in inNN.getAllLayerNames():
143 toPrint = f'{toPrint} {ten},'
144 print(toPrint)
145 printOutputLayersOnce = False
146
147 frame = inPreview.getCvFrame()
148 depthFrame = depth.getFrame() # depthFrame values are in millimeters
149
150 depth_downscaled = depthFrame[::4]
151 if np.all(depth_downscaled == 0):
152 min_depth = 0 # Set a default minimum depth value when all elements are zero
153 else:
154 min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
155 max_depth = np.percentile(depth_downscaled, 99)
156 depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
157 depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
158
159 counter+=1
160 current_time = time.monotonic()
161 if (current_time - startTime) > 1 :
162 fps = counter / (current_time - startTime)
163 counter = 0
164 startTime = current_time
165
166 detections = inDet.detections
167
168 # If the frame is available, draw bounding boxes on it and show the frame
169 height = frame.shape[0]
170 width = frame.shape[1]
171 for detection in detections:
172 roiData = detection.boundingBoxMapping
173 roi = roiData.roi
174 roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])
175 topLeft = roi.topLeft()
176 bottomRight = roi.bottomRight()
177 xmin = int(topLeft.x)
178 ymin = int(topLeft.y)
179 xmax = int(bottomRight.x)
180 ymax = int(bottomRight.y)
181 cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1)
182
183 # Denormalize bounding box
184 x1 = int(detection.xmin * width)
185 x2 = int(detection.xmax * width)
186 y1 = int(detection.ymin * height)
187 y2 = int(detection.ymax * height)
188 try:
189 label = labelMap[detection.label]
190 except:
191 label = detection.label
192 cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
193 cv2.putText(frame, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
194 cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
195 cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
196 cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
197
198 cv2.rectangle(frame, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX)
199
200 cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)
201 cv2.imshow("depth", depthFrameColor)
202 cv2.imshow("rgb", frame)
203
204 if cv2.waitKey(1) == ord('q'):
205 break
Pipeline
Need assistance?
Head over to Discussion Forum for technical support or any other questions you might have.