RGB & MobilenetSSD 结合空间数据

此示例展示了如何在 RGB 输入帧上运行 MobileNetv2SSD，以及如何显示 RGB 预览、检测结果、深度图和空间信息（X、Y、Z）。它与示例 RGB & MobilenetSSD 类似，但增加了空间数据。X、Y、Z 坐标相对于深度图的中心。setConfidenceThreshold - 对象被检测的置信度阈值

类似示例：

演示

设置

请运行安装脚本以下载所有必需的依赖项。请注意，此脚本必须在 git 上下文中运行，因此您必须先下载 depthai-python 存储库，然后运行脚本

Command Line

1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py

有关更多信息，请遵循安装指南。

源代码

Python

GitHub

1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8import time
9
10'''
11Spatial detection network demo.
12    Performs inference on RGB camera and retrieves spatial location coordinates: x,y,z relative to the center of depth map.
13'''
14
15# Get argument first
16nnBlobPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
17if len(sys.argv) > 1:
18    nnBlobPath = sys.argv[1]
19
20if not Path(nnBlobPath).exists():
21    import sys
22    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
23
24# MobilenetSSD label texts
25labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
26            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
27
28syncNN = True
29
30# Create pipeline
31pipeline = dai.Pipeline()
32
33# Define sources and outputs
34camRgb = pipeline.create(dai.node.ColorCamera)
35spatialDetectionNetwork = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork)
36monoLeft = pipeline.create(dai.node.MonoCamera)
37monoRight = pipeline.create(dai.node.MonoCamera)
38stereo = pipeline.create(dai.node.StereoDepth)
39
40xoutRgb = pipeline.create(dai.node.XLinkOut)
41xoutNN = pipeline.create(dai.node.XLinkOut)
42xoutDepth = pipeline.create(dai.node.XLinkOut)
43
44xoutRgb.setStreamName("rgb")
45xoutNN.setStreamName("detections")
46xoutDepth.setStreamName("depth")
47
48# Properties
49camRgb.setPreviewSize(300, 300)
50camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
51camRgb.setInterleaved(False)
52camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
53
54monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
55monoLeft.setCamera("left")
56monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
57monoRight.setCamera("right")
58
59# Setting node configs
60stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
61# Align depth map to the perspective of RGB camera, on which inference is done
62stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)
63stereo.setSubpixel(True)
64stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())
65
66spatialDetectionNetwork.setBlobPath(nnBlobPath)
67spatialDetectionNetwork.setConfidenceThreshold(0.5)
68spatialDetectionNetwork.input.setBlocking(False)
69spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
70spatialDetectionNetwork.setDepthLowerThreshold(100)
71spatialDetectionNetwork.setDepthUpperThreshold(5000)
72
73# Linking
74monoLeft.out.link(stereo.left)
75monoRight.out.link(stereo.right)
76
77camRgb.preview.link(spatialDetectionNetwork.input)
78if syncNN:
79    spatialDetectionNetwork.passthrough.link(xoutRgb.input)
80else:
81    camRgb.preview.link(xoutRgb.input)
82
83spatialDetectionNetwork.out.link(xoutNN.input)
84
85stereo.depth.link(spatialDetectionNetwork.inputDepth)
86spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)
87
88# Connect to device and start pipeline
89with dai.Device(pipeline) as device:
90
91    # Output queues will be used to get the rgb frames and nn data from the outputs defined above
92    previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
93    detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
94    depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)
95
96    startTime = time.monotonic()
97    counter = 0
98    fps = 0
99    color = (255, 255, 255)
100
101    while True:
102        inPreview = previewQueue.get()
103        inDet = detectionNNQueue.get()
104        depth = depthQueue.get()
105
106        counter+=1
107        current_time = time.monotonic()
108        if (current_time - startTime) > 1 :
109            fps = counter / (current_time - startTime)
110            counter = 0
111            startTime = current_time
112
113        frame = inPreview.getCvFrame()
114
115        depthFrame = depth.getFrame() # depthFrame values are in millimeters
116
117        depth_downscaled = depthFrame[::4]
118        if np.all(depth_downscaled == 0):
119            min_depth = 0  # Set a default minimum depth value when all elements are zero
120        else:
121            min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
122        max_depth = np.percentile(depth_downscaled, 99)
123        depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
124        depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
125
126        detections = inDet.detections
127
128        # If the frame is available, draw bounding boxes on it and show the frame
129        height = frame.shape[0]
130        width  = frame.shape[1]
131        for detection in detections:
132            roiData = detection.boundingBoxMapping
133            roi = roiData.roi
134            roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])
135            topLeft = roi.topLeft()
136            bottomRight = roi.bottomRight()
137            xmin = int(topLeft.x)
138            ymin = int(topLeft.y)
139            xmax = int(bottomRight.x)
140            ymax = int(bottomRight.y)
141            cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1)
142
143            # Denormalize bounding box
144            x1 = int(detection.xmin * width)
145            x2 = int(detection.xmax * width)
146            y1 = int(detection.ymin * height)
147            y2 = int(detection.ymax * height)
148            try:
149                label = labelMap[detection.label]
150            except:
151                label = detection.label
152            cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
153            cv2.putText(frame, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
154            cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
155            cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
156            cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
157
158            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), cv2.FONT_HERSHEY_SIMPLEX)
159
160        cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, (255,255,255))
161        cv2.imshow("depth", depthFrameColor)
162        cv2.imshow("preview", frame)
163
164        if cv2.waitKey(1) == ord('q'):
165            break

C++

GitHub

1#include <chrono>
2#include <iostream>
3
4// Includes common necessary includes for development using depthai library
5#include "depthai/depthai.hpp"
6
7static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle",     "bird",  "boat",        "bottle", "bus",
8                                                  "car",        "cat",       "chair",       "cow",   "diningtable", "dog",    "horse",
9                                                  "motorbike",  "person",    "pottedplant", "sheep", "sofa",        "train",  "tvmonitor"};
10
11static std::atomic<bool> syncNN{true};
12
13int main(int argc, char** argv) {
14    using namespace std;
15    using namespace std::chrono;
16    std::string nnPath(BLOB_PATH);
17
18    // If path to blob specified, use that
19    if(argc > 1) {
20        nnPath = std::string(argv[1]);
21    }
22
23    // Print which blob we are using
24    printf("Using blob at path: %s\n", nnPath.c_str());
25
26    // Create pipeline
27    dai::Pipeline pipeline;
28
29    // Define sources and outputs
30    auto camRgb = pipeline.create<dai::node::ColorCamera>();
31    auto spatialDetectionNetwork = pipeline.create<dai::node::MobileNetSpatialDetectionNetwork>();
32    auto monoLeft = pipeline.create<dai::node::MonoCamera>();
33    auto monoRight = pipeline.create<dai::node::MonoCamera>();
34    auto stereo = pipeline.create<dai::node::StereoDepth>();
35
36    auto xoutRgb = pipeline.create<dai::node::XLinkOut>();
37    auto xoutNN = pipeline.create<dai::node::XLinkOut>();
38    auto xoutDepth = pipeline.create<dai::node::XLinkOut>();
39
40    xoutRgb->setStreamName("rgb");
41    xoutNN->setStreamName("detections");
42    xoutDepth->setStreamName("depth");
43
44    // Properties
45    camRgb->setPreviewSize(300, 300);
46    camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P);
47    camRgb->setInterleaved(false);
48    camRgb->setColorOrder(dai::ColorCameraProperties::ColorOrder::BGR);
49
50    monoLeft->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);
51    monoLeft->setCamera("left");
52    monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);
53    monoRight->setCamera("right");
54
55    // Setting node configs
56    stereo->setDefaultProfilePreset(dai::node::StereoDepth::PresetMode::HIGH_DENSITY);
57    // Align depth map to the perspective of RGB camera, on which inference is done
58    stereo->setDepthAlign(dai::CameraBoardSocket::CAM_A);
59    stereo->setOutputSize(monoLeft->getResolutionWidth(), monoLeft->getResolutionHeight());
60
61    spatialDetectionNetwork->setBlobPath(nnPath);
62    spatialDetectionNetwork->setConfidenceThreshold(0.5f);
63    spatialDetectionNetwork->input.setBlocking(false);
64    spatialDetectionNetwork->setBoundingBoxScaleFactor(0.5);
65    spatialDetectionNetwork->setDepthLowerThreshold(100);
66    spatialDetectionNetwork->setDepthUpperThreshold(5000);
67
68    // Linking
69    monoLeft->out.link(stereo->left);
70    monoRight->out.link(stereo->right);
71
72    camRgb->preview.link(spatialDetectionNetwork->input);
73    if(syncNN) {
74        spatialDetectionNetwork->passthrough.link(xoutRgb->input);
75    } else {
76        camRgb->preview.link(xoutRgb->input);
77    }
78
79    spatialDetectionNetwork->out.link(xoutNN->input);
80
81    stereo->depth.link(spatialDetectionNetwork->inputDepth);
82    spatialDetectionNetwork->passthroughDepth.link(xoutDepth->input);
83
84    // Connect to device and start pipeline
85    dai::Device device(pipeline);
86
87    auto previewQueue = device.getOutputQueue("rgb", 4, false);
88    auto detectionNNQueue = device.getOutputQueue("detections", 4, false);
89    auto depthQueue = device.getOutputQueue("depth", 4, false);
90
91    auto startTime = steady_clock::now();
92    int counter = 0;
93    float fps = 0;
94    auto color = cv::Scalar(255, 255, 255);
95
96    while(true) {
97        auto inPreview = previewQueue->get<dai::ImgFrame>();
98        auto inDet = detectionNNQueue->get<dai::SpatialImgDetections>();
99        auto depth = depthQueue->get<dai::ImgFrame>();
100
101        counter++;
102        auto currentTime = steady_clock::now();
103        auto elapsed = duration_cast<duration<float>>(currentTime - startTime);
104        if(elapsed > seconds(1)) {
105            fps = counter / elapsed.count();
106            counter = 0;
107            startTime = currentTime;
108        }
109
110        cv::Mat frame = inPreview->getCvFrame();
111        cv::Mat depthFrame = depth->getFrame();  // depthFrame values are in millimeters
112
113        cv::Mat depthFrameColor;
114        cv::normalize(depthFrame, depthFrameColor, 255, 0, cv::NORM_INF, CV_8UC1);
115        cv::equalizeHist(depthFrameColor, depthFrameColor);
116        cv::applyColorMap(depthFrameColor, depthFrameColor, cv::COLORMAP_HOT);
117
118        auto detections = inDet->detections;
119
120        for(const auto& detection : detections) {
121            auto roiData = detection.boundingBoxMapping;
122            auto roi = roiData.roi;
123            roi = roi.denormalize(depthFrameColor.cols, depthFrameColor.rows);
124            auto topLeft = roi.topLeft();
125            auto bottomRight = roi.bottomRight();
126            auto xmin = (int)topLeft.x;
127            auto ymin = (int)topLeft.y;
128            auto xmax = (int)bottomRight.x;
129            auto ymax = (int)bottomRight.y;
130            cv::rectangle(depthFrameColor, cv::Rect(cv::Point(xmin, ymin), cv::Point(xmax, ymax)), color, cv::FONT_HERSHEY_SIMPLEX);
131
132            int x1 = detection.xmin * frame.cols;
133            int y1 = detection.ymin * frame.rows;
134            int x2 = detection.xmax * frame.cols;
135            int y2 = detection.ymax * frame.rows;
136
137            uint32_t labelIndex = detection.label;
138            std::string labelStr = to_string(labelIndex);
139            if(labelIndex < labelMap.size()) {
140                labelStr = labelMap[labelIndex];
141            }
142            cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
143            std::stringstream confStr;
144            confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
145            cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 35), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
146
147            std::stringstream depthX;
148            depthX << "X: " << (int)detection.spatialCoordinates.x << " mm";
149            cv::putText(frame, depthX.str(), cv::Point(x1 + 10, y1 + 50), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
150            std::stringstream depthY;
151            depthY << "Y: " << (int)detection.spatialCoordinates.y << " mm";
152            cv::putText(frame, depthY.str(), cv::Point(x1 + 10, y1 + 65), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
153            std::stringstream depthZ;
154            depthZ << "Z: " << (int)detection.spatialCoordinates.z << " mm";
155            cv::putText(frame, depthZ.str(), cv::Point(x1 + 10, y1 + 80), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
156
157            cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
158        }
159
160        std::stringstream fpsStr;
161        fpsStr << "NN fps: " << std::fixed << std::setprecision(2) << fps;
162        cv::putText(frame, fpsStr.str(), cv::Point(2, inPreview->getHeight() - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, color);
163
164        cv::imshow("depth", depthFrameColor);
165        cv::imshow("preview", frame);
166
167        int key = cv::waitKey(1);
168        if(key == 'q' || key == 'Q') {
169            return 0;
170        }
171    }
172    return 0;
173}

Pipeline

需要帮助？

请前往 Discussion Forum 获取技术支持或提出您可能有的任何其他问题。

本页目录

RGB & MobilenetSSD 结合空间数据View as Markdown

类似示例：

演示

设置

源代码

Python

C++

Pipeline

需要帮助？

RGB & MobilenetSSD 结合空间数据