Mono & MobilenetSSD 结合空间数据

本示例展示了如何在校正后的右侧输入帧上运行 MobileNetv2SSD，以及如何显示预览、检测结果、深度图和空间信息（X、Y、Z）。它与示例 RGB & MobilenetSSD 类似，但增加了空间数据。 X、Y、Z 坐标相对于深度图的中心。setConfidenceThreshold - 对象被检测到的置信度阈值

类似示例：

演示

设置

此示例需要 DepthAI v3 API，请参阅安装说明。

源代码

Python

GitHub

1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8import time
9
10'''
11Mobilenet SSD device side decoding demo
12  The "mobilenet-ssd" model is a Single-Shot multibox Detection (SSD) network intended
13  to perform object detection. This model is implemented using the Caffe* framework.
14  For details about this model, check out the repository <https://github.com/chuanqi305/MobileNet-SSD>.
15'''
16
17# Get argument first
18nnPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
19if len(sys.argv) > 1:
20    nnPath = sys.argv[1]
21
22if not Path(nnPath).exists():
23    import sys
24    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
25
26# MobilenetSSD label texts
27labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
28            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
29
30syncNN = True
31
32# Create pipeline
33pipeline = dai.Pipeline()
34
35# Define sources and outputs
36monoLeft = pipeline.create(dai.node.MonoCamera)
37monoRight = pipeline.create(dai.node.MonoCamera)
38stereo = pipeline.create(dai.node.StereoDepth)
39spatialDetectionNetwork = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork)
40imageManip = pipeline.create(dai.node.ImageManip)
41
42xoutManip = pipeline.create(dai.node.XLinkOut)
43nnOut = pipeline.create(dai.node.XLinkOut)
44xoutDepth = pipeline.create(dai.node.XLinkOut)
45
46xoutManip.setStreamName("right")
47nnOut.setStreamName("detections")
48xoutDepth.setStreamName("depth")
49
50# Properties
51imageManip.initialConfig.setResize(300, 300)
52# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
53imageManip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p)
54
55monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
56monoLeft.setCamera("left")
57monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
58monoRight.setCamera("right")
59
60# StereoDepth
61stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
62stereo.setSubpixel(True)
63
64# Define a neural network that will make predictions based on the source frames
65spatialDetectionNetwork.setConfidenceThreshold(0.5)
66spatialDetectionNetwork.setBlobPath(nnPath)
67spatialDetectionNetwork.input.setBlocking(False)
68spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
69spatialDetectionNetwork.setDepthLowerThreshold(100)
70spatialDetectionNetwork.setDepthUpperThreshold(5000)
71
72# Linking
73monoLeft.out.link(stereo.left)
74monoRight.out.link(stereo.right)
75
76imageManip.out.link(spatialDetectionNetwork.input)
77if syncNN:
78    spatialDetectionNetwork.passthrough.link(xoutManip.input)
79else:
80    imageManip.out.link(xoutManip.input)
81
82spatialDetectionNetwork.out.link(nnOut.input)
83
84stereo.rectifiedRight.link(imageManip.inputImage)
85stereo.depth.link(spatialDetectionNetwork.inputDepth)
86spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)
87
88# Connect to device and start pipeline
89with dai.Device(pipeline) as device:
90
91    # Output queues will be used to get the rgb frames and nn data from the outputs defined above
92    previewQueue = device.getOutputQueue(name="right", maxSize=4, blocking=False)
93    detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
94    depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)
95
96    rectifiedRight = None
97    detections = []
98
99    startTime = time.monotonic()
100    counter = 0
101    fps = 0
102    color = (255, 255, 255)
103
104    while True:
105        inRectified = previewQueue.get()
106        inDet = detectionNNQueue.get()
107        inDepth = depthQueue.get()
108
109        counter += 1
110        currentTime = time.monotonic()
111        if (currentTime - startTime) > 1:
112            fps = counter / (currentTime - startTime)
113            counter = 0
114            startTime = currentTime
115
116        rectifiedRight = inRectified.getCvFrame()
117
118        depthFrame = inDepth.getFrame() # depthFrame values are in millimeters
119
120        depth_downscaled = depthFrame[::4]
121        if np.all(depth_downscaled == 0):
122            min_depth = 0  # Set a default minimum depth value when all elements are zero
123        else:
124            min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
125        max_depth = np.percentile(depth_downscaled, 99)
126        depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
127        depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
128
129        detections = inDet.detections
130
131        # If the rectifiedRight is available, draw bounding boxes on it and show the rectifiedRight
132        height = rectifiedRight.shape[0]
133        width = rectifiedRight.shape[1]
134        for detection in detections:
135            roiData = detection.boundingBoxMapping
136            roi = roiData.roi
137            roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])
138            topLeft = roi.topLeft()
139            bottomRight = roi.bottomRight()
140            xmin = int(topLeft.x)
141            ymin = int(topLeft.y)
142            xmax = int(bottomRight.x)
143            ymax = int(bottomRight.y)
144            cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, cv2.FONT_HERSHEY_SCRIPT_SIMPLEX)
145
146            # Denormalize bounding box
147            x1 = int(detection.xmin * width)
148            x2 = int(detection.xmax * width)
149            y1 = int(detection.ymin * height)
150            y2 = int(detection.ymax * height)
151
152            try:
153                label = labelMap[detection.label]
154            except:
155                label = detection.label
156
157            cv2.putText(rectifiedRight, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
158            cv2.putText(rectifiedRight, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
159            cv2.putText(rectifiedRight, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
160            cv2.putText(rectifiedRight, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
161            cv2.putText(rectifiedRight, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
162
163            cv2.rectangle(rectifiedRight, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX)
164
165        cv2.putText(rectifiedRight, "NN fps: {:.2f}".format(fps), (2, rectifiedRight.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)
166        cv2.imshow("depth", depthFrameColor)
167        cv2.imshow("rectified right", rectifiedRight)
168
169        if cv2.waitKey(1) == ord('q'):
170            break

C++

GitHub

1#include <chrono>
2#include <iostream>
3
4// Includes common necessary includes for development using depthai library
5#include "depthai/depthai.hpp"
6
7static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle",     "bird",  "boat",        "bottle", "bus",
8                                                  "car",        "cat",       "chair",       "cow",   "diningtable", "dog",    "horse",
9                                                  "motorbike",  "person",    "pottedplant", "sheep", "sofa",        "train",  "tvmonitor"};
10
11static std::atomic<bool> syncNN{true};
12
13int main(int argc, char** argv) {
14    using namespace std;
15    using namespace std::chrono;
16    std::string nnPath(BLOB_PATH);
17
18    // If path to blob specified, use that
19    if(argc > 1) {
20        nnPath = std::string(argv[1]);
21    }
22
23    // Print which blob we are using
24    printf("Using blob at path: %s\n", nnPath.c_str());
25
26    // Create pipeline
27    dai::Pipeline pipeline;
28
29    // Define sources and outputs
30    auto monoLeft = pipeline.create<dai::node::MonoCamera>();
31    auto monoRight = pipeline.create<dai::node::MonoCamera>();
32    auto stereo = pipeline.create<dai::node::StereoDepth>();
33    auto spatialDetectionNetwork = pipeline.create<dai::node::MobileNetSpatialDetectionNetwork>();
34    auto imageManip = pipeline.create<dai::node::ImageManip>();
35
36    auto xoutManip = pipeline.create<dai::node::XLinkOut>();
37    auto nnOut = pipeline.create<dai::node::XLinkOut>();
38    auto xoutDepth = pipeline.create<dai::node::XLinkOut>();
39
40    xoutManip->setStreamName("right");
41    nnOut->setStreamName("detections");
42    xoutDepth->setStreamName("depth");
43
44    // Properties
45    imageManip->initialConfig.setResize(300, 300);
46    // The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
47    imageManip->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p);
48
49    monoLeft->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);
50    monoLeft->setCamera("left");
51    monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);
52    monoRight->setCamera("right");
53
54    // StereoDepth
55    stereo->setDefaultProfilePreset(dai::node::StereoDepth::PresetMode::HIGH_DENSITY);
56
57    // Define a neural network that will make predictions based on the source frames
58    spatialDetectionNetwork->setConfidenceThreshold(0.5f);
59    spatialDetectionNetwork->setBlobPath(nnPath);
60    spatialDetectionNetwork->input.setBlocking(false);
61    spatialDetectionNetwork->setBoundingBoxScaleFactor(0.5);
62    spatialDetectionNetwork->setDepthLowerThreshold(100);
63    spatialDetectionNetwork->setDepthUpperThreshold(5000);
64
65    // Linking
66    monoLeft->out.link(stereo->left);
67    monoRight->out.link(stereo->right);
68
69    imageManip->out.link(spatialDetectionNetwork->input);
70    if(syncNN) {
71        spatialDetectionNetwork->passthrough.link(xoutManip->input);
72    } else {
73        imageManip->out.link(xoutManip->input);
74    }
75
76    spatialDetectionNetwork->out.link(nnOut->input);
77
78    stereo->rectifiedRight.link(imageManip->inputImage);
79    stereo->depth.link(spatialDetectionNetwork->inputDepth);
80    spatialDetectionNetwork->passthroughDepth.link(xoutDepth->input);
81
82    // Connect to device and start pipeline
83    dai::Device device(pipeline);
84
85    // Output queues will be used to get the rgb frames and nn data from the outputs defined above
86    auto previewQueue = device.getOutputQueue("right", 4, false);
87    auto detectionNNQueue = device.getOutputQueue("detections", 4, false);
88    auto depthQueue = device.getOutputQueue("depth", 4, false);
89
90    auto startTime = steady_clock::now();
91    int counter = 0;
92    float fps = 0;
93    auto color = cv::Scalar(255, 255, 255);
94
95    while(true) {
96        auto inRectified = previewQueue->get<dai::ImgFrame>();
97        auto inDet = detectionNNQueue->get<dai::SpatialImgDetections>();
98        auto inDepth = depthQueue->get<dai::ImgFrame>();
99
100        counter++;
101        auto currentTime = steady_clock::now();
102        auto elapsed = duration_cast<duration<float>>(currentTime - startTime);
103        if(elapsed > seconds(1)) {
104            fps = counter / elapsed.count();
105            counter = 0;
106            startTime = currentTime;
107        }
108
109        cv::Mat rectifiedRight = inRectified->getCvFrame();
110
111        cv::Mat depthFrame = inDepth->getFrame();  // depthFrame values are in millimeters
112        cv::Mat depthFrameColor;
113
114        cv::normalize(depthFrame, depthFrameColor, 255, 0, cv::NORM_INF, CV_8UC1);
115        cv::equalizeHist(depthFrameColor, depthFrameColor);
116        cv::applyColorMap(depthFrameColor, depthFrameColor, cv::COLORMAP_HOT);
117
118        auto detections = inDet->detections;
119
120        for(auto& detection : detections) {
121            auto roiData = detection.boundingBoxMapping;
122            auto roi = roiData.roi;
123            roi = roi.denormalize(depthFrameColor.cols, depthFrameColor.rows);
124            auto topLeft = roi.topLeft();
125            auto bottomRight = roi.bottomRight();
126            auto xmin = (int)topLeft.x;
127            auto ymin = (int)topLeft.y;
128            auto xmax = (int)bottomRight.x;
129            auto ymax = (int)bottomRight.y;
130            cv::rectangle(depthFrameColor, cv::Rect(cv::Point(xmin, ymin), cv::Point(xmax, ymax)), color, cv::FONT_HERSHEY_SIMPLEX);
131
132            int x1 = detection.xmin * rectifiedRight.cols;
133            int y1 = detection.ymin * rectifiedRight.rows;
134            int x2 = detection.xmax * rectifiedRight.cols;
135            int y2 = detection.ymax * rectifiedRight.rows;
136
137            uint32_t labelIndex = detection.label;
138            std::string labelStr = to_string(labelIndex);
139            if(labelIndex < labelMap.size()) {
140                labelStr = labelMap[labelIndex];
141            }
142            cv::putText(rectifiedRight, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
143            std::stringstream confStr;
144            confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
145            cv::putText(rectifiedRight, confStr.str(), cv::Point(x1 + 10, y1 + 35), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
146
147            std::stringstream depthX;
148            depthX << "X: " << (int)detection.spatialCoordinates.x << " mm";
149            cv::putText(rectifiedRight, depthX.str(), cv::Point(x1 + 10, y1 + 50), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
150            std::stringstream depthY;
151            depthY << "Y: " << (int)detection.spatialCoordinates.y << " mm";
152            cv::putText(rectifiedRight, depthY.str(), cv::Point(x1 + 10, y1 + 65), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
153            std::stringstream depthZ;
154            depthZ << "Z: " << (int)detection.spatialCoordinates.z << " mm";
155            cv::putText(rectifiedRight, depthZ.str(), cv::Point(x1 + 10, y1 + 80), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
156
157            cv::rectangle(rectifiedRight, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
158        }
159
160        std::stringstream fpsStr;
161        fpsStr << std::fixed << std::setprecision(2) << fps;
162        cv::putText(rectifiedRight, fpsStr.str(), cv::Point(2, rectifiedRight.rows - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, color);
163
164        cv::imshow("depth", depthFrameColor);
165        cv::imshow("rectified right", rectifiedRight);
166
167        int key = cv::waitKey(1);
168        if(key == 'q' || key == 'Q') {
169            return 0;
170        }
171    }
172    return 0;
173}

管道

需要帮助？

请前往 Discussion Forum 获取技术支持或提出您可能有的任何其他问题。

本页目录

Mono & MobilenetSSD 结合空间数据View as Markdown

类似示例：

演示

设置

源代码

Python

C++

管道

需要帮助？

Mono & MobilenetSSD 结合空间数据