RGB & TinyYolo with spatial data

This example shows how to run Yolo on the RGB input frame, and how to display both the RGB preview, detections, depth map and spatial information (X,Y,Z). It's similar to example RGB & MobilenetSSD with spatial data except it is running TinyYolo network. X,Y,Z coordinates are relative to the center of depth map.setNumClasses - number of YOLO classes setCoordinateSize - size of coordinate setAnchors - yolo anchors setAnchorMasks - anchorMasks26, anchorMasks13 (anchorMasks52 - additionally for full YOLOv4) setIouThreshold - intersection over union threshold setConfidenceThreshold - confidence threshold above which objects are detected

Similar samples:

Demo

Setup

Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the script

Command Line

1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py

For additional information, please follow the installation guide.

Source code

Python

C++

Python

GitHub

1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8import time
9
10'''
11Spatial Tiny-yolo example
12  Performs inference on RGB camera and retrieves spatial location coordinates: x,y,z relative to the center of depth map.
13  Can be used for tiny-yolo-v3 or tiny-yolo-v4 networks
14'''
15
16# Get argument first
17nnBlobPath = str((Path(__file__).parent / Path('../models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute())
18if 1 < len(sys.argv):
19    arg = sys.argv[1]
20    if arg == "yolo3":
21        nnBlobPath = str((Path(__file__).parent / Path('../models/yolo-v3-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute())
22    elif arg == "yolo4":
23        nnBlobPath = str((Path(__file__).parent / Path('../models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute())
24    else:
25        nnBlobPath = arg
26else:
27    print("Using Tiny YoloV4 model. If you wish to use Tiny YOLOv3, call 'tiny_yolo.py yolo3'")
28
29if not Path(nnBlobPath).exists():
30    import sys
31    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
32
33# Tiny yolo v3/4 label texts
34labelMap = [
35    "person",         "bicycle",    "car",           "motorbike",     "aeroplane",   "bus",           "train",
36    "truck",          "boat",       "traffic light", "fire hydrant",  "stop sign",   "parking meter", "bench",
37    "bird",           "cat",        "dog",           "horse",         "sheep",       "cow",           "elephant",
38    "bear",           "zebra",      "giraffe",       "backpack",      "umbrella",    "handbag",       "tie",
39    "suitcase",       "frisbee",    "skis",          "snowboard",     "sports ball", "kite",          "baseball bat",
40    "baseball glove", "skateboard", "surfboard",     "tennis racket", "bottle",      "wine glass",    "cup",
41    "fork",           "knife",      "spoon",         "bowl",          "banana",      "apple",         "sandwich",
42    "orange",         "broccoli",   "carrot",        "hot dog",       "pizza",       "donut",         "cake",
43    "chair",          "sofa",       "pottedplant",   "bed",           "diningtable", "toilet",        "tvmonitor",
44    "laptop",         "mouse",      "remote",        "keyboard",      "cell phone",  "microwave",     "oven",
45    "toaster",        "sink",       "refrigerator",  "book",          "clock",       "vase",          "scissors",
46    "teddy bear",     "hair drier", "toothbrush"
47]
48
49syncNN = True
50
51# Create pipeline
52pipeline = dai.Pipeline()
53
54# Define sources and outputs
55camRgb = pipeline.create(dai.node.ColorCamera)
56spatialDetectionNetwork = pipeline.create(dai.node.YoloSpatialDetectionNetwork)
57monoLeft = pipeline.create(dai.node.MonoCamera)
58monoRight = pipeline.create(dai.node.MonoCamera)
59stereo = pipeline.create(dai.node.StereoDepth)
60nnNetworkOut = pipeline.create(dai.node.XLinkOut)
61
62xoutRgb = pipeline.create(dai.node.XLinkOut)
63xoutNN = pipeline.create(dai.node.XLinkOut)
64xoutDepth = pipeline.create(dai.node.XLinkOut)
65
66xoutRgb.setStreamName("rgb")
67xoutNN.setStreamName("detections")
68xoutDepth.setStreamName("depth")
69nnNetworkOut.setStreamName("nnNetwork")
70
71# Properties
72camRgb.setPreviewSize(416, 416)
73camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
74camRgb.setInterleaved(False)
75camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
76
77monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
78monoLeft.setCamera("left")
79monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
80monoRight.setCamera("right")
81
82# setting node configs
83stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
84# Align depth map to the perspective of RGB camera, on which inference is done
85stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)
86stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())
87stereo.setSubpixel(True)
88
89spatialDetectionNetwork.setBlobPath(nnBlobPath)
90spatialDetectionNetwork.setConfidenceThreshold(0.5)
91spatialDetectionNetwork.input.setBlocking(False)
92spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
93spatialDetectionNetwork.setDepthLowerThreshold(100)
94spatialDetectionNetwork.setDepthUpperThreshold(5000)
95
96# Yolo specific parameters
97spatialDetectionNetwork.setNumClasses(80)
98spatialDetectionNetwork.setCoordinateSize(4)
99spatialDetectionNetwork.setAnchors([10,14, 23,27, 37,58, 81,82, 135,169, 344,319])
100spatialDetectionNetwork.setAnchorMasks({ "side26": [1,2,3], "side13": [3,4,5] })
101spatialDetectionNetwork.setIouThreshold(0.5)
102
103# Linking
104monoLeft.out.link(stereo.left)
105monoRight.out.link(stereo.right)
106
107camRgb.preview.link(spatialDetectionNetwork.input)
108if syncNN:
109    spatialDetectionNetwork.passthrough.link(xoutRgb.input)
110else:
111    camRgb.preview.link(xoutRgb.input)
112
113spatialDetectionNetwork.out.link(xoutNN.input)
114
115stereo.depth.link(spatialDetectionNetwork.inputDepth)
116spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)
117spatialDetectionNetwork.outNetwork.link(nnNetworkOut.input)
118
119# Connect to device and start pipeline
120with dai.Device(pipeline) as device:
121
122    # Output queues will be used to get the rgb frames and nn data from the outputs defined above
123    previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
124    detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
125    depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)
126    networkQueue = device.getOutputQueue(name="nnNetwork", maxSize=4, blocking=False)
127
128    startTime = time.monotonic()
129    counter = 0
130    fps = 0
131    color = (255, 255, 255)
132    printOutputLayersOnce = True
133
134    while True:
135        inPreview = previewQueue.get()
136        inDet = detectionNNQueue.get()
137        depth = depthQueue.get()
138        inNN = networkQueue.get()
139
140        if printOutputLayersOnce:
141            toPrint = 'Output layer names:'
142            for ten in inNN.getAllLayerNames():
143                toPrint = f'{toPrint} {ten},'
144            print(toPrint)
145            printOutputLayersOnce = False
146
147        frame = inPreview.getCvFrame()
148        depthFrame = depth.getFrame() # depthFrame values are in millimeters
149
150        depth_downscaled = depthFrame[::4]
151        if np.all(depth_downscaled == 0):
152            min_depth = 0  # Set a default minimum depth value when all elements are zero
153        else:
154            min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
155        max_depth = np.percentile(depth_downscaled, 99)
156        depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
157        depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
158
159        counter+=1
160        current_time = time.monotonic()
161        if (current_time - startTime) > 1 :
162            fps = counter / (current_time - startTime)
163            counter = 0
164            startTime = current_time
165
166        detections = inDet.detections
167
168        # If the frame is available, draw bounding boxes on it and show the frame
169        height = frame.shape[0]
170        width  = frame.shape[1]
171        for detection in detections:
172            roiData = detection.boundingBoxMapping
173            roi = roiData.roi
174            roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])
175            topLeft = roi.topLeft()
176            bottomRight = roi.bottomRight()
177            xmin = int(topLeft.x)
178            ymin = int(topLeft.y)
179            xmax = int(bottomRight.x)
180            ymax = int(bottomRight.y)
181            cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1)
182
183            # Denormalize bounding box
184            x1 = int(detection.xmin * width)
185            x2 = int(detection.xmax * width)
186            y1 = int(detection.ymin * height)
187            y2 = int(detection.ymax * height)
188            try:
189                label = labelMap[detection.label]
190            except:
191                label = detection.label
192            cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
193            cv2.putText(frame, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
194            cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
195            cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
196            cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
197
198            cv2.rectangle(frame, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX)
199
200        cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)
201        cv2.imshow("depth", depthFrameColor)
202        cv2.imshow("rgb", frame)
203
204        if cv2.waitKey(1) == ord('q'):
205            break

Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.