RGB & MobilenetSSD with spatial data

This example shows how to run MobileNetv2SSD on the RGB input frame, and how to display both the RGB preview, detections, depth map and spatial information (X,Y,Z). It's similar to example RGB & MobilenetSSD except it has spatial data. X,Y,Z coordinates are relative to the center of depth map.setConfidenceThreshold - confidence threshold above which objects are detected

Similar samples:

Demo

Setup

Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the script

Command Line

1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py

For additional information, please follow the installation guide.

Source code

Python

C++

Python

GitHub

1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8import time
9
10'''
11Spatial detection network demo.
12    Performs inference on RGB camera and retrieves spatial location coordinates: x,y,z relative to the center of depth map.
13'''
14
15# Get argument first
16nnBlobPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
17if len(sys.argv) > 1:
18    nnBlobPath = sys.argv[1]
19
20if not Path(nnBlobPath).exists():
21    import sys
22    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
23
24# MobilenetSSD label texts
25labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
26            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
27
28syncNN = True
29
30# Create pipeline
31pipeline = dai.Pipeline()
32
33# Define sources and outputs
34camRgb = pipeline.create(dai.node.ColorCamera)
35spatialDetectionNetwork = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork)
36monoLeft = pipeline.create(dai.node.MonoCamera)
37monoRight = pipeline.create(dai.node.MonoCamera)
38stereo = pipeline.create(dai.node.StereoDepth)
39
40xoutRgb = pipeline.create(dai.node.XLinkOut)
41xoutNN = pipeline.create(dai.node.XLinkOut)
42xoutDepth = pipeline.create(dai.node.XLinkOut)
43
44xoutRgb.setStreamName("rgb")
45xoutNN.setStreamName("detections")
46xoutDepth.setStreamName("depth")
47
48# Properties
49camRgb.setPreviewSize(300, 300)
50camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
51camRgb.setInterleaved(False)
52camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
53
54monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
55monoLeft.setCamera("left")
56monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
57monoRight.setCamera("right")
58
59# Setting node configs
60stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
61# Align depth map to the perspective of RGB camera, on which inference is done
62stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)
63stereo.setSubpixel(True)
64stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())
65
66spatialDetectionNetwork.setBlobPath(nnBlobPath)
67spatialDetectionNetwork.setConfidenceThreshold(0.5)
68spatialDetectionNetwork.input.setBlocking(False)
69spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
70spatialDetectionNetwork.setDepthLowerThreshold(100)
71spatialDetectionNetwork.setDepthUpperThreshold(5000)
72
73# Linking
74monoLeft.out.link(stereo.left)
75monoRight.out.link(stereo.right)
76
77camRgb.preview.link(spatialDetectionNetwork.input)
78if syncNN:
79    spatialDetectionNetwork.passthrough.link(xoutRgb.input)
80else:
81    camRgb.preview.link(xoutRgb.input)
82
83spatialDetectionNetwork.out.link(xoutNN.input)
84
85stereo.depth.link(spatialDetectionNetwork.inputDepth)
86spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)
87
88# Connect to device and start pipeline
89with dai.Device(pipeline) as device:
90
91    # Output queues will be used to get the rgb frames and nn data from the outputs defined above
92    previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
93    detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
94    depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)
95
96    startTime = time.monotonic()
97    counter = 0
98    fps = 0
99    color = (255, 255, 255)
100
101    while True:
102        inPreview = previewQueue.get()
103        inDet = detectionNNQueue.get()
104        depth = depthQueue.get()
105
106        counter+=1
107        current_time = time.monotonic()
108        if (current_time - startTime) > 1 :
109            fps = counter / (current_time - startTime)
110            counter = 0
111            startTime = current_time
112
113        frame = inPreview.getCvFrame()
114
115        depthFrame = depth.getFrame() # depthFrame values are in millimeters
116
117        depth_downscaled = depthFrame[::4]
118        if np.all(depth_downscaled == 0):
119            min_depth = 0  # Set a default minimum depth value when all elements are zero
120        else:
121            min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
122        max_depth = np.percentile(depth_downscaled, 99)
123        depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
124        depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
125
126        detections = inDet.detections
127
128        # If the frame is available, draw bounding boxes on it and show the frame
129        height = frame.shape[0]
130        width  = frame.shape[1]
131        for detection in detections:
132            roiData = detection.boundingBoxMapping
133            roi = roiData.roi
134            roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])
135            topLeft = roi.topLeft()
136            bottomRight = roi.bottomRight()
137            xmin = int(topLeft.x)
138            ymin = int(topLeft.y)
139            xmax = int(bottomRight.x)
140            ymax = int(bottomRight.y)
141            cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1)
142
143            # Denormalize bounding box
144            x1 = int(detection.xmin * width)
145            x2 = int(detection.xmax * width)
146            y1 = int(detection.ymin * height)
147            y2 = int(detection.ymax * height)
148            try:
149                label = labelMap[detection.label]
150            except:
151                label = detection.label
152            cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
153            cv2.putText(frame, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
154            cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
155            cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
156            cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
157
158            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), cv2.FONT_HERSHEY_SIMPLEX)
159
160        cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, (255,255,255))
161        cv2.imshow("depth", depthFrameColor)
162        cv2.imshow("preview", frame)
163
164        if cv2.waitKey(1) == ord('q'):
165            break

Pipeline

React Flow

Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.

ON THIS PAGE

RGB & MobilenetSSD with spatial data

Similar samples:

Demo

Setup

Source code

Python

Pipeline

Need assistance?