Mono & MobilenetSSD & Depth

This example shows how to run MobileNetv2SSD on the left grayscale camera in parallel with running the disparity depth results, displaying both the depth map and the right grayscale stream, with the bounding box from the neural network overlaid. It's a combination of Depth Preview and Mono & MobilenetSSD.

Similar samples:

Demo

Setup

Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the script

Command Line

1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py

For additional information, please follow the installation guide.

Source code

Python

GitHub

1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8
9# Get argument first
10nnPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
11if len(sys.argv) > 1:
12    nnPath = sys.argv[1]
13
14if not Path(nnPath).exists():
15    import sys
16    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
17
18# MobilenetSSD label nnLabels
19labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
20            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
21
22# Create pipeline
23pipeline = dai.Pipeline()
24
25# Define sources and outputs
26monoRight = pipeline.create(dai.node.MonoCamera)
27monoLeft = pipeline.create(dai.node.MonoCamera)
28stereo = pipeline.create(dai.node.StereoDepth)
29manip = pipeline.create(dai.node.ImageManip)
30nn = pipeline.create(dai.node.MobileNetDetectionNetwork)
31
32nnOut = pipeline.create(dai.node.XLinkOut)
33disparityOut = pipeline.create(dai.node.XLinkOut)
34xoutRight = pipeline.create(dai.node.XLinkOut)
35
36disparityOut.setStreamName("disparity")
37xoutRight.setStreamName("rectifiedRight")
38nnOut.setStreamName("nn")
39
40# Properties
41monoRight.setCamera("right")
42monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
43monoLeft.setCamera("left")
44monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
45
46# Produce the depth map (using disparity output as it's easier to visualize depth this way)
47stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
48stereo.setRectifyEdgeFillColor(0)  # Black, to better see the cutout from rectification (black stripe on the edges)
49# Convert the grayscale frame into the nn-acceptable form
50manip.initialConfig.setResize(300, 300)
51# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
52manip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p)
53
54# Define a neural network that will make predictions based on the source frames
55nn.setConfidenceThreshold(0.5)
56nn.setBlobPath(nnPath)
57nn.setNumInferenceThreads(2)
58nn.input.setBlocking(False)
59
60# Linking
61monoRight.out.link(stereo.right)
62monoLeft.out.link(stereo.left)
63stereo.rectifiedRight.link(manip.inputImage)
64stereo.disparity.link(disparityOut.input)
65manip.out.link(nn.input)
66manip.out.link(xoutRight.input)
67nn.out.link(nnOut.input)
68
69# Connect to device and start pipeline
70with dai.Device(pipeline) as device:
71
72    # Output queues will be used to get the grayscale / depth frames and nn data from the outputs defined above
73    qRight = device.getOutputQueue("rectifiedRight", maxSize=4, blocking=False)
74    qDisparity = device.getOutputQueue("disparity", maxSize=4, blocking=False)
75    qDet = device.getOutputQueue("nn", maxSize=4, blocking=False)
76
77    rightFrame = None
78    disparityFrame = None
79    detections = []
80
81    # nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
82    def frameNorm(frame, bbox):
83        normVals = np.full(len(bbox), frame.shape[0])
84        normVals[::2] = frame.shape[1]
85        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
86
87    # Add bounding boxes and text to the frame and show it to the user
88    def show(name, frame):
89        color = (255, 0, 0)
90        for detection in detections:
91            bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
92            cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
93            cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
94            cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
95        # Show the frame
96        cv2.imshow(name, frame)
97
98    disparityMultiplier = 255 / stereo.initialConfig.getMaxDisparity()
99
100    while True:
101        # Instead of get (blocking), we use tryGet (non-blocking) which will return the available data or None otherwise
102        if qDet.has():
103            detections = qDet.get().detections
104
105        if qRight.has():
106            rightFrame = qRight.get().getCvFrame()
107
108        if qDisparity.has():
109            # Frame is transformed, normalized, and color map will be applied to highlight the depth info
110            disparityFrame = qDisparity.get().getFrame()
111            disparityFrame = (disparityFrame*disparityMultiplier).astype(np.uint8)
112            # Available color maps: https://docs.opencv.org/3.4/d3/d50/group__imgproc__colormap.html
113            disparityFrame = cv2.applyColorMap(disparityFrame, cv2.COLORMAP_JET)
114            show("disparity", disparityFrame)
115
116        if rightFrame is not None:
117            show("rectified right", rightFrame)
118
119        if cv2.waitKey(1) == ord('q'):
120            break

C++

GitHub

1#include <iostream>
2
3// Includes common necessary includes for development using depthai library
4#include "depthai/depthai.hpp"
5
6// MobilenetSSD label texts
7static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle",     "bird",  "boat",        "bottle", "bus",
8                                                  "car",        "cat",       "chair",       "cow",   "diningtable", "dog",    "horse",
9                                                  "motorbike",  "person",    "pottedplant", "sheep", "sofa",        "train",  "tvmonitor"};
10
11int main(int argc, char** argv) {
12    using namespace std;
13    // Default blob path provided by Hunter private data download
14    // Applicable for easier example usage only
15    std::string nnPath(BLOB_PATH);
16
17    // If path to blob specified, use that
18    if(argc > 1) {
19        nnPath = std::string(argv[1]);
20    }
21
22    // Print which blob we are using
23    printf("Using blob at path: %s\n", nnPath.c_str());
24
25    // Create pipeline
26    dai::Pipeline pipeline;
27
28    // Define sources and outputs
29    auto monoRight = pipeline.create<dai::node::MonoCamera>();
30    auto monoLeft = pipeline.create<dai::node::MonoCamera>();
31    auto stereo = pipeline.create<dai::node::StereoDepth>();
32    auto manip = pipeline.create<dai::node::ImageManip>();
33    auto nn = pipeline.create<dai::node::MobileNetDetectionNetwork>();
34
35    auto disparityOut = pipeline.create<dai::node::XLinkOut>();
36    auto xoutRight = pipeline.create<dai::node::XLinkOut>();
37    auto nnOut = pipeline.create<dai::node::XLinkOut>();
38
39    disparityOut->setStreamName("disparity");
40    xoutRight->setStreamName("rectifiedRight");
41    nnOut->setStreamName("nn");
42
43    // Properties
44    monoRight->setCamera("right");
45    monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);
46    monoLeft->setCamera("left");
47    monoLeft->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);
48    // Produce the depth map (using disparity output as it's easier to visualize depth this way)
49    stereo->setDefaultProfilePreset(dai::node::StereoDepth::PresetMode::HIGH_DENSITY);
50    stereo->setRectifyEdgeFillColor(0);  // Black, to better see the cutout from rectification (black stripe on the edges)
51    // Convert the grayscale frame into the nn-acceptable form
52    manip->initialConfig.setResize(300, 300);
53    // The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
54    manip->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p);
55
56    // Define a neural network that will make predictions based on the source frames
57    nn->setConfidenceThreshold(0.5);
58    nn->setBlobPath(nnPath);
59    nn->setNumInferenceThreads(2);
60    nn->input.setBlocking(false);
61
62    // Linking
63    monoRight->out.link(stereo->right);
64    monoLeft->out.link(stereo->left);
65    stereo->rectifiedRight.link(manip->inputImage);
66    stereo->disparity.link(disparityOut->input);
67    manip->out.link(nn->input);
68    manip->out.link(xoutRight->input);
69    nn->out.link(nnOut->input);
70
71    // Connect to device and start pipeline
72    dai::Device device(pipeline);
73
74    // Output queues will be used to get the grayscale / depth frames and nn data from the outputs defined above
75    auto qRight = device.getOutputQueue("rectifiedRight", 4, false);
76    auto qDisparity = device.getOutputQueue("disparity", 4, false);
77    auto qDet = device.getOutputQueue("nn", 4, false);
78
79    cv::Mat rightFrame;
80    cv::Mat disparityFrame;
81    std::vector<dai::ImgDetection> detections;
82
83    // Add bounding boxes and text to the frame and show it to the user
84    auto show = [](std::string name, cv::Mat frame, std::vector<dai::ImgDetection>& detections) {
85        auto color = cv::Scalar(255, 192, 203);
86        // nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
87        for(auto& detection : detections) {
88            int x1 = detection.xmin * frame.cols;
89            int y1 = detection.ymin * frame.rows;
90            int x2 = detection.xmax * frame.cols;
91            int y2 = detection.ymax * frame.rows;
92
93            uint32_t labelIndex = detection.label;
94            std::string labelStr = to_string(labelIndex);
95            if(labelIndex < labelMap.size()) {
96                labelStr = labelMap[labelIndex];
97            }
98            cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
99            std::stringstream confStr;
100            confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
101            cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
102            cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
103        }
104        // Show the frame
105        cv::imshow(name, frame);
106    };
107
108    float disparityMultiplier = 255 / stereo->initialConfig.getMaxDisparity();
109
110    while(true) {
111        // Instead of get (blocking), we use tryGet (non-blocking) which will return the available data or None otherwise
112        auto inRight = qRight->tryGet<dai::ImgFrame>();
113        auto inDet = qDet->tryGet<dai::ImgDetections>();
114        auto inDisparity = qDisparity->tryGet<dai::ImgFrame>();
115
116        if(inDisparity) {
117            // Frame is transformed, normalized, and color map will be applied to highlight the depth info
118            disparityFrame = inDisparity->getFrame();
119            disparityFrame.convertTo(disparityFrame, CV_8UC1, disparityMultiplier);
120            // Available color maps: https://docs.opencv.org/3.4/d3/d50/group__imgproc__colormap.html
121            cv::applyColorMap(disparityFrame, disparityFrame, cv::COLORMAP_JET);
122            show("disparity", disparityFrame, detections);
123        }
124
125        if(!rightFrame.empty()) {
126            show("rectified right", rightFrame, detections);
127        }
128
129        int key = cv::waitKey(1);
130        if(key == 'q' || key == 'Q') return 0;
131    }
132    return 0;
133}

Pipeline

Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.

ON THIS PAGE

Mono & MobilenetSSD & DepthView as Markdown