DepthAI v2 has been superseded by DepthAI v3. You are viewing legacy documentation.
DepthAI Tutorials
DepthAI API References

ON THIS PAGE

  • Similar samples:
  • Demo
  • Setup
  • Source code
  • Pipeline

Mono & MobilenetSSD

This example shows how to run MobileNetv2SSD on the right grayscale camera and how to display the neural network results on a preview of the right camera stream.

Similar samples:

Demo

Setup

Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the script
Command Line
1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py
For additional information, please follow the installation guide.

Source code

Python

Python
GitHub
1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8
9# Get argument first
10nnPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
11if len(sys.argv) > 1:
12    nnPath = sys.argv[1]
13
14if not Path(nnPath).exists():
15    import sys
16    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
17
18# MobilenetSSD label texts
19labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
20            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
21
22# Create pipeline
23pipeline = dai.Pipeline()
24
25# Define sources and outputs
26monoRight = pipeline.create(dai.node.MonoCamera)
27manip = pipeline.create(dai.node.ImageManip)
28nn = pipeline.create(dai.node.MobileNetDetectionNetwork)
29manipOut = pipeline.create(dai.node.XLinkOut)
30nnOut = pipeline.create(dai.node.XLinkOut)
31
32manipOut.setStreamName("right")
33nnOut.setStreamName("nn")
34
35# Properties
36monoRight.setCamera("right")
37monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_720_P)
38
39# Convert the grayscale frame into the nn-acceptable form
40manip.initialConfig.setResize(300, 300)
41# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
42manip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p)
43
44nn.setConfidenceThreshold(0.5)
45nn.setBlobPath(nnPath)
46nn.setNumInferenceThreads(2)
47nn.input.setBlocking(False)
48
49# Linking
50monoRight.out.link(manip.inputImage)
51manip.out.link(nn.input)
52manip.out.link(manipOut.input)
53nn.out.link(nnOut.input)
54
55# Connect to device and start pipeline
56with dai.Device(pipeline) as device:
57
58    # Output queues will be used to get the grayscale frames and nn data from the outputs defined above
59    qRight = device.getOutputQueue("right", maxSize=4, blocking=False)
60    qDet = device.getOutputQueue("nn", maxSize=4, blocking=False)
61
62    frame = None
63    detections = []
64
65    # nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
66    def frameNorm(frame, bbox):
67        normVals = np.full(len(bbox), frame.shape[0])
68        normVals[::2] = frame.shape[1]
69        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
70
71    def displayFrame(name, frame):
72        color = (255, 0, 0)
73        for detection in detections:
74            bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
75            cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
76            cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
77            cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
78        # Show the frame
79        cv2.imshow(name, frame)
80
81    while True:
82        # Instead of get (blocking), we use tryGet (non-blocking) which will return the available data or None otherwise
83        inRight = qRight.tryGet()
84        inDet = qDet.tryGet()
85
86        if inRight is not None:
87            frame = inRight.getCvFrame()
88
89        if inDet is not None:
90            detections = inDet.detections
91
92        if frame is not None:
93            displayFrame("right", frame)
94
95        if cv2.waitKey(1) == ord('q'):
96            break

C++

1#include <iostream>
2
3#include "utility.hpp"
4
5// Includes common necessary includes for development using depthai library
6#include "depthai/depthai.hpp"
7
8// MobilenetSSD label texts
9static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle",     "bird",  "boat",        "bottle", "bus",
10                                                  "car",        "cat",       "chair",       "cow",   "diningtable", "dog",    "horse",
11                                                  "motorbike",  "person",    "pottedplant", "sheep", "sofa",        "train",  "tvmonitor"};
12
13int main(int argc, char** argv) {
14    using namespace std;
15    // Default blob path provided by Hunter private data download
16    // Applicable for easier example usage only
17    std::string nnPath(BLOB_PATH);
18
19    // If path to blob specified, use that
20    if(argc > 1) {
21        nnPath = std::string(argv[1]);
22    }
23
24    // Print which blob we are using
25    printf("Using blob at path: %s\n", nnPath.c_str());
26
27    // Create pipeline
28    dai::Pipeline pipeline;
29
30    // Define sources and outputs
31    auto monoRight = pipeline.create<dai::node::MonoCamera>();
32    auto manip = pipeline.create<dai::node::ImageManip>();
33    auto nn = pipeline.create<dai::node::MobileNetDetectionNetwork>();
34    auto manipOut = pipeline.create<dai::node::XLinkOut>();
35    auto nnOut = pipeline.create<dai::node::XLinkOut>();
36
37    manipOut->setStreamName("right");
38    nnOut->setStreamName("nn");
39
40    // Properties
41    monoRight->setCamera("right");
42    monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_720_P);
43
44    // Convert the grayscale frame into the nn-acceptable form
45    manip->initialConfig.setResize(300, 300);
46    // The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
47    manip->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p);
48
49    nn->setConfidenceThreshold(0.5);
50    nn->setBlobPath(nnPath);
51    nn->setNumInferenceThreads(2);
52    nn->input.setBlocking(false);
53
54    // Linking
55    monoRight->out.link(manip->inputImage);
56    manip->out.link(nn->input);
57    manip->out.link(manipOut->input);
58    nn->out.link(nnOut->input);
59
60    // Connect to device and start pipeline
61    dai::Device device(pipeline);
62
63    // Output queues will be used to get the grayscale frames and nn data from the outputs defined above
64    auto qRight = device.getOutputQueue("right", 4, false);
65    auto qDet = device.getOutputQueue("nn", 4, false);
66
67    cv::Mat frame;
68    std::vector<dai::ImgDetection> detections;
69
70    // Add bounding boxes and text to the frame and show it to the user
71    auto displayFrame = [](std::string name, cv::Mat frame, std::vector<dai::ImgDetection>& detections) {
72        auto color = cv::Scalar(255, 0, 0);
73        // nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
74        for(auto& detection : detections) {
75            int x1 = detection.xmin * frame.cols;
76            int y1 = detection.ymin * frame.rows;
77            int x2 = detection.xmax * frame.cols;
78            int y2 = detection.ymax * frame.rows;
79
80            uint32_t labelIndex = detection.label;
81            std::string labelStr = to_string(labelIndex);
82            if(labelIndex < labelMap.size()) {
83                labelStr = labelMap[labelIndex];
84            }
85            cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
86            std::stringstream confStr;
87            confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
88            cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
89            cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
90        }
91        // Show the frame
92        cv::imshow(name, frame);
93    };
94
95    while(true) {
96        // Instead of get (blocking), we use tryGet (non-blocking) which will return the available data or None otherwise
97        auto inRight = qRight->tryGet<dai::ImgFrame>();
98        auto inDet = qDet->tryGet<dai::ImgDetections>();
99
100        if(inRight) {
101            frame = inRight->getCvFrame();
102        }
103
104        if(inDet) {
105            detections = inDet->detections;
106        }
107
108        if(!frame.empty()) {
109            displayFrame("right", frame, detections);
110        }
111
112        int key = cv::waitKey(1);
113        if(key == 'q' || key == 'Q') return 0;
114    }
115    return 0;
116}

Pipeline

Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.