空间检测网络

Supported on:RVC2 RVC4

该示例创建了一个管道，用于使用 RGB 和立体深度流执行 YOLOv6-Nano 空间对象检测，在彩色深度和 RGB 帧上使用边界框和空间坐标可视化结果，并使用自定义可视化节点。此示例需要 DepthAI v3 API，请参阅安装说明。

管道

源代码

Python

1#!/usr/bin/env python3
2
3import argparse
4from pathlib import Path
5import cv2
6import depthai as dai
7import numpy as np
8
9NEURAL_FPS = 8
10STEREO_DEFAULT_FPS = 20
11
12parser = argparse.ArgumentParser()
13parser.add_argument(
14    "--depthSource", type=str, default="stereo", choices=["stereo", "neural"]
15)
16args = parser.parse_args()
17# For better results on OAK4, use a segmentation model like "luxonis/yolov8-instance-segmentation-large:coco-640x480"
18# for depth estimation over the objects mask instead of the full bounding box.
19modelDescription = dai.NNModelDescription("yolov6-nano")
20size = (640, 400)
21
22if args.depthSource == "stereo":
23    fps = STEREO_DEFAULT_FPS
24else:
25    fps = NEURAL_FPS
26
27class SpatialVisualizer(dai.node.HostNode):
28    def __init__(self):
29        dai.node.HostNode.__init__(self)
30        self.sendProcessingToPipeline(True)
31    def build(self, depth:dai.Node.Output, detections: dai.Node.Output, rgb: dai.Node.Output):
32        self.link_args(depth, detections, rgb) # Must match the inputs to the process method
33
34    def process(self, depthPreview, detections, rgbPreview):
35        depthPreview = depthPreview.getCvFrame()
36        rgbPreview = rgbPreview.getCvFrame()
37        depthFrameColor = self.processDepthFrame(depthPreview)
38        self.displayResults(rgbPreview, depthFrameColor, detections.detections)
39
40    def processDepthFrame(self, depthFrame):
41        depthDownscaled = depthFrame[::4]
42        if np.all(depthDownscaled == 0):
43            minDepth = 0
44        else:
45            minDepth = np.percentile(depthDownscaled[depthDownscaled != 0], 1)
46        maxDepth = np.percentile(depthDownscaled, 99)
47        depthFrameColor = np.interp(depthFrame, (minDepth, maxDepth), (0, 255)).astype(np.uint8)
48        return cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
49
50    def displayResults(self, rgbFrame, depthFrameColor, detections):
51        height, width, _ = rgbFrame.shape
52        for detection in detections:
53            self.drawBoundingBoxes(depthFrameColor, detection)
54            self.drawDetections(rgbFrame, detection, width, height)
55
56        cv2.imshow("Depth frame", depthFrameColor)
57        cv2.imshow("Color frame", rgbFrame)
58        if cv2.waitKey(1) == ord('q'):
59            self.stopPipeline()
60
61    def drawBoundingBoxes(self, depthFrameColor, detection):
62        roiData = detection.boundingBoxMapping
63        roi = roiData.roi
64        roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])
65        topLeft = roi.topLeft()
66        bottomRight = roi.bottomRight()
67        cv2.rectangle(depthFrameColor, (int(topLeft.x), int(topLeft.y)), (int(bottomRight.x), int(bottomRight.y)), (255, 255, 255), 1)
68
69    def drawDetections(self, frame, detection, frameWidth, frameHeight):
70        x1 = int(detection.xmin * frameWidth)
71        x2 = int(detection.xmax * frameWidth)
72        y1 = int(detection.ymin * frameHeight)
73        y2 = int(detection.ymax * frameHeight)
74        label = detection.labelName
75        color = (255, 255, 255)
76        cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
77        cv2.putText(frame, "{:.2f}".format(detection.confidence * 100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
78        cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
79        cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
80        cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
81        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 1)
82
83# Creates the pipeline and a default device implicitly
84with dai.Pipeline() as p:
85    # Define sources and outputs
86    platform = p.getDefaultDevice().getPlatform()
87
88    camRgb = p.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_A, sensorFps=fps)
89    monoLeft = p.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_B, sensorFps=fps)
90    monoRight = p.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_C, sensorFps=fps)
91    if args.depthSource == "stereo":
92        depthSource = p.create(dai.node.StereoDepth)
93        depthSource.setExtendedDisparity(True)
94        monoLeft.requestOutput(size).link(depthSource.left)
95        monoRight.requestOutput(size).link(depthSource.right)
96    elif args.depthSource == "neural":
97        depthSource = p.create(dai.node.NeuralDepth).build(
98            monoLeft.requestFullResolutionOutput(),
99            monoRight.requestFullResolutionOutput(),
100            dai.DeviceModelZoo.NEURAL_DEPTH_LARGE,
101        )
102    else:
103        raise ValueError(f"Invalid depth source: {args.depthSource}")
104
105    spatialDetectionNetwork = p.create(dai.node.SpatialDetectionNetwork).build(
106        camRgb, depthSource, modelDescription
107    )
108    visualizer = p.create(SpatialVisualizer)
109
110    spatialDetectionNetwork.spatialLocationCalculator.initialConfig.setSegmentationPassthrough(False)
111    spatialDetectionNetwork.input.setBlocking(False)
112    spatialDetectionNetwork.setDepthLowerThreshold(100)
113    spatialDetectionNetwork.setDepthUpperThreshold(5000)
114
115    visualizer.build(
116        spatialDetectionNetwork.passthroughDepth,
117        spatialDetectionNetwork.out,
118        spatialDetectionNetwork.passthrough,
119    )
120
121    print("Starting pipeline with depth source: ", args.depthSource)
122
123    p.run()

C++

GitHub

1#include <argparse/argparse.hpp>
2#include <csignal>
3#include <iostream>
4#include <memory>
5#include <opencv2/opencv.hpp>
6#include <vector>
7
8#include "depthai/depthai.hpp"
9
10constexpr float NEURAL_FPS = 8.0f;
11constexpr float STEREO_DEFAULT_FPS = 20.0f;
12
13std::atomic<bool> quitEvent(false);
14
15void signalHandler(int) {
16    quitEvent = true;
17}
18
19// Custom host node for spatial visualization
20class SpatialVisualizer : public dai::NodeCRTP<dai::node::HostNode, SpatialVisualizer> {
21   public:
22    Input& depthInput = inputs["depth"];
23    Input& detectionsInput = inputs["detections"];
24    Input& rgbInput = inputs["rgb"];
25
26    std::vector<std::string> labelMap;
27
28    std::shared_ptr<SpatialVisualizer> build(Output& depth, Output& detections, Output& rgb) {
29        depth.link(depthInput);
30        detections.link(detectionsInput);
31        rgb.link(rgbInput);
32        sendProcessingToPipeline(true);
33        return std::static_pointer_cast<SpatialVisualizer>(this->shared_from_this());
34    }
35
36    std::shared_ptr<dai::Buffer> processGroup(std::shared_ptr<dai::MessageGroup> in) override {
37        if(quitEvent) {
38            stopPipeline();
39            return nullptr;
40        }
41
42        auto depthFrame = in->get<dai::ImgFrame>("depth");
43        auto detections = in->get<dai::SpatialImgDetections>("detections");
44        auto rgbFrame = in->get<dai::ImgFrame>("rgb");
45
46        cv::Mat depthCv = depthFrame->getCvFrame();
47        cv::Mat rgbCv = rgbFrame->getCvFrame();
48        cv::Mat depthFrameColor = processDepthFrame(depthCv);
49        displayResults(rgbCv, depthFrameColor, detections->detections);
50
51        return nullptr;
52    }
53
54   private:
55    cv::Mat processDepthFrame(const cv::Mat& depthFrame) {
56        // Downscale depth frame
57        cv::Mat depthDownscaled;
58        cv::resize(depthFrame, depthDownscaled, cv::Size(), 0.25, 0.25);
59
60        // Find min and max depth values
61        double minDepth = 0, maxDepth = 0;
62        cv::Mat mask = (depthDownscaled != 0);
63        if(cv::countNonZero(mask) > 0) {
64            cv::minMaxLoc(depthDownscaled, &minDepth, &maxDepth, nullptr, nullptr, mask);
65        }
66
67        // Normalize depth frame
68        cv::Mat depthFrameColor;
69        depthFrame.convertTo(depthFrameColor, CV_8UC1, 255.0 / (maxDepth - minDepth), -minDepth * 255.0 / (maxDepth - minDepth));
70
71        // Apply color map
72        cv::Mat colorized;
73        cv::applyColorMap(depthFrameColor, colorized, cv::COLORMAP_HOT);
74        return colorized;
75    }
76
77    void displayResults(cv::Mat& rgbFrame, cv::Mat& depthFrameColor, const std::vector<dai::SpatialImgDetection>& detections) {
78        int height = rgbFrame.rows;
79        int width = rgbFrame.cols;
80
81        for(const auto& detection : detections) {
82            drawBoundingBoxes(depthFrameColor, detection);
83            drawDetections(rgbFrame, detection, width, height);
84        }
85
86        cv::imshow("depth", depthFrameColor);
87        cv::imshow("rgb", rgbFrame);
88
89        if(cv::waitKey(1) == 'q') {
90            stopPipeline();
91        }
92    }
93
94    void drawBoundingBoxes(cv::Mat& depthFrameColor, const dai::SpatialImgDetection& detection) {
95        auto roi = detection.boundingBoxMapping.roi;
96        roi = roi.denormalize(depthFrameColor.cols, depthFrameColor.rows);
97        auto topLeft = roi.topLeft();
98        auto bottomRight = roi.bottomRight();
99        cv::rectangle(depthFrameColor,
100                      cv::Point(static_cast<int>(topLeft.x), static_cast<int>(topLeft.y)),
101                      cv::Point(static_cast<int>(bottomRight.x), static_cast<int>(bottomRight.y)),
102                      cv::Scalar(255, 255, 255),
103                      1);
104    }
105
106    void drawDetections(cv::Mat& frame, const dai::SpatialImgDetection& detection, int frameWidth, int frameHeight) {
107        int x1 = static_cast<int>(detection.xmin * frameWidth);
108        int x2 = static_cast<int>(detection.xmax * frameWidth);
109        int y1 = static_cast<int>(detection.ymin * frameHeight);
110        int y2 = static_cast<int>(detection.ymax * frameHeight);
111
112        std::string label;
113        try {
114            label = labelMap[detection.label];
115        } catch(...) {
116            label = std::to_string(detection.label);
117        }
118
119        cv::Scalar color(255, 255, 255);
120        cv::putText(frame, label, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
121        cv::putText(frame, std::to_string(detection.confidence * 100), cv::Point(x1 + 10, y1 + 35), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
122        cv::putText(frame,
123                    "X: " + std::to_string(static_cast<int>(detection.spatialCoordinates.x)) + " mm",
124                    cv::Point(x1 + 10, y1 + 50),
125                    cv::FONT_HERSHEY_TRIPLEX,
126                    0.5,
127                    color);
128        cv::putText(frame,
129                    "Y: " + std::to_string(static_cast<int>(detection.spatialCoordinates.y)) + " mm",
130                    cv::Point(x1 + 10, y1 + 65),
131                    cv::FONT_HERSHEY_TRIPLEX,
132                    0.5,
133                    color);
134        cv::putText(frame,
135                    "Z: " + std::to_string(static_cast<int>(detection.spatialCoordinates.z)) + " mm",
136                    cv::Point(x1 + 10, y1 + 80),
137                    cv::FONT_HERSHEY_TRIPLEX,
138                    0.5,
139                    color);
140        cv::rectangle(frame, cv::Point(x1, y1), cv::Point(x2, y2), color, 1);
141    }
142};
143
144int main(int argc, char** argv) {
145    signal(SIGTERM, signalHandler);
146    signal(SIGINT, signalHandler);
147
148    // Initialize argument parser
149    argparse::ArgumentParser program("spatial_detection", "1.0.0");
150    program.add_description("Spatial detection network example with configurable depth source");
151    program.add_argument("--depthSource").default_value(std::string("stereo")).help("Depth source: stereo, neural, tof");
152
153    // Parse arguments
154    try {
155        program.parse_args(argc, argv);
156    } catch(const std::runtime_error& err) {
157        std::cerr << err.what() << '\n';
158        std::cerr << program;
159        return EXIT_FAILURE;
160    }
161
162    // Get arguments
163    std::string depthSourceArg = program.get<std::string>("--depthSource");
164
165    // Validate depth source argument
166    if(depthSourceArg != "stereo" && depthSourceArg != "neural" && depthSourceArg != "tof") {
167        std::cerr << "Invalid depth source: " << depthSourceArg << '\n';
168        std::cerr << "Valid options are: stereo, neural, tof" << '\n';
169        return EXIT_FAILURE;
170    }
171
172    try {
173        float fps = STEREO_DEFAULT_FPS;
174        if(depthSourceArg == "neural") {
175            fps = NEURAL_FPS;
176        }
177
178        // Create pipeline
179        dai::Pipeline pipeline;
180
181        const std::pair<int, int> size = {640, 400};
182
183        // Define sources and outputs
184        auto camRgb = pipeline.create<dai::node::Camera>();
185        camRgb->build(dai::CameraBoardSocket::CAM_A, std::nullopt, fps);
186
187        auto platform = pipeline.getDefaultDevice()->getPlatform();
188
189        // Create depth source based on argument
190        dai::node::DepthSource depthSource;
191
192        if(depthSourceArg == "stereo") {
193            auto monoLeft = pipeline.create<dai::node::Camera>();
194            auto monoRight = pipeline.create<dai::node::Camera>();
195            auto stereo = pipeline.create<dai::node::StereoDepth>();
196
197            monoLeft->build(dai::CameraBoardSocket::CAM_B, std::nullopt, fps);
198            monoRight->build(dai::CameraBoardSocket::CAM_C, std::nullopt, fps);
199
200            stereo->setExtendedDisparity(true);
201            monoLeft->requestOutput(size, std::nullopt, dai::ImgResizeMode::CROP)->link(stereo->left);
202            monoRight->requestOutput(size, std::nullopt, dai::ImgResizeMode::CROP)->link(stereo->right);
203
204            depthSource = stereo;
205        } else if(depthSourceArg == "neural") {
206            auto monoLeft = pipeline.create<dai::node::Camera>();
207            auto monoRight = pipeline.create<dai::node::Camera>();
208
209            monoLeft->build(dai::CameraBoardSocket::CAM_B, std::nullopt, fps);
210            monoRight->build(dai::CameraBoardSocket::CAM_C, std::nullopt, fps);
211
212            auto neuralDepth = pipeline.create<dai::node::NeuralDepth>();
213            neuralDepth->build(*monoLeft->requestFullResolutionOutput(), *monoRight->requestFullResolutionOutput(), dai::DeviceModelZoo::NEURAL_DEPTH_LARGE);
214
215            depthSource = neuralDepth;
216        } else if(depthSourceArg == "tof") {
217            auto tof = pipeline.create<dai::node::ToF>();
218            depthSource = tof;
219        }
220
221        // Create spatial detection network using the unified build method with DepthSource variant
222        auto spatialDetectionNetwork = pipeline.create<dai::node::SpatialDetectionNetwork>();
223        auto visualizer = pipeline.create<SpatialVisualizer>();
224
225        // Configure spatial detection network
226        spatialDetectionNetwork->input.setBlocking(false);
227        spatialDetectionNetwork->setBoundingBoxScaleFactor(0.5f);
228        spatialDetectionNetwork->setDepthLowerThreshold(100);
229        spatialDetectionNetwork->setDepthUpperThreshold(5000);
230
231        // Set up model and build with DepthSource variant
232        dai::NNModelDescription modelDesc;
233        // For better results on OAK4, use a segmentation model like "luxonis/yolov8-instance-segmentation-large:coco-640x480"
234        // for depth estimation over the objects mask instead of the full bounding box.
235        modelDesc.model = "yolov6-nano";
236        spatialDetectionNetwork->build(camRgb, depthSource, modelDesc);
237
238        // Set label map
239        visualizer->labelMap = spatialDetectionNetwork->getClasses().value();
240        spatialDetectionNetwork->spatialLocationCalculator->initialConfig->setSegmentationPassthrough(false);
241
242        // Linking
243        visualizer->build(spatialDetectionNetwork->passthroughDepth, spatialDetectionNetwork->out, spatialDetectionNetwork->passthrough);
244
245        std::cout << "Pipeline starting with depth source: " << depthSourceArg << '\n';
246
247        // Start pipeline
248        pipeline.run();
249
250    } catch(const std::exception& e) {
251        std::cerr << "Error: " << e.what() << '\n';
252        return EXIT_FAILURE;
253    }
254
255    return EXIT_SUCCESS;
256}

需要帮助？

请前往 Discussion Forum 获取技术支持或提出您可能有的任何其他问题。

本页目录

空间检测网络

管道

源代码

Python

C++

需要帮助？