RGB & MobilenetSSD

This example shows how to run MobileNetv2SSD on the RGB input frame, and how to display both the RGB preview and the metadata results from the MobileNetv2SSD on the preview.

Similar samples:

Demo

Setup

Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the script

Command Line

1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py

For additional information, please follow the installation guide.

Source code

Python

GitHub

1#!/usr/bin/env python3
2
3from pathlib import Path
4import cv2
5import depthai as dai
6import numpy as np
7import time
8import argparse
9
10nnPathDefault = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
11parser = argparse.ArgumentParser()
12parser.add_argument('nnPath', nargs='?', help="Path to mobilenet detection network blob", default=nnPathDefault)
13parser.add_argument('-s', '--sync', action="store_true", help="Sync RGB output with NN output", default=False)
14args = parser.parse_args()
15
16if not Path(nnPathDefault).exists():
17    import sys
18    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
19
20# MobilenetSSD label texts
21labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
22            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
23
24# Create pipeline
25pipeline = dai.Pipeline()
26
27# Define sources and outputs
28camRgb = pipeline.create(dai.node.ColorCamera)
29nn = pipeline.create(dai.node.MobileNetDetectionNetwork)
30xoutRgb = pipeline.create(dai.node.XLinkOut)
31nnOut = pipeline.create(dai.node.XLinkOut)
32nnNetworkOut = pipeline.create(dai.node.XLinkOut)
33
34xoutRgb.setStreamName("rgb")
35nnOut.setStreamName("nn")
36nnNetworkOut.setStreamName("nnNetwork");
37
38# Properties
39camRgb.setPreviewSize(300, 300)
40camRgb.setInterleaved(False)
41camRgb.setFps(40)
42# Define a neural network that will make predictions based on the source frames
43nn.setConfidenceThreshold(0.5)
44nn.setBlobPath(args.nnPath)
45nn.setNumInferenceThreads(2)
46nn.input.setBlocking(False)
47
48# Linking
49if args.sync:
50    nn.passthrough.link(xoutRgb.input)
51else:
52    camRgb.preview.link(xoutRgb.input)
53
54camRgb.preview.link(nn.input)
55nn.out.link(nnOut.input)
56nn.outNetwork.link(nnNetworkOut.input);
57
58# Connect to device and start pipeline
59with dai.Device(pipeline) as device:
60
61    # Output queues will be used to get the rgb frames and nn data from the outputs defined above
62    qRgb = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
63    qDet = device.getOutputQueue(name="nn", maxSize=4, blocking=False)
64    qNN = device.getOutputQueue(name="nnNetwork", maxSize=4, blocking=False);
65
66    frame = None
67    detections = []
68    startTime = time.monotonic()
69    counter = 0
70    color2 = (255, 255, 255)
71
72    # nn data (bounding box locations) are in <0..1> range - they need to be normalized with frame width/height
73    def frameNorm(frame, bbox):
74        normVals = np.full(len(bbox), frame.shape[0])
75        normVals[::2] = frame.shape[1]
76        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
77
78    def displayFrame(name, frame):
79        color = (255, 0, 0)
80        for detection in detections:
81            bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
82            cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
83            cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
84            cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
85        # Show the frame
86        cv2.imshow(name, frame)
87
88    printOutputLayersOnce = True
89
90    while True:
91        if args.sync:
92            # Use blocking get() call to catch frame and inference result synced
93            inRgb = qRgb.get()
94            inDet = qDet.get()
95            inNN = qNN.get()
96        else:
97            # Instead of get (blocking), we use tryGet (non-blocking) which will return the available data or None otherwise
98            inRgb = qRgb.tryGet()
99            inDet = qDet.tryGet()
100            inNN = qNN.tryGet()
101
102        if inRgb is not None:
103            frame = inRgb.getCvFrame()
104            cv2.putText(frame, "NN fps: {:.2f}".format(counter / (time.monotonic() - startTime)),
105                        (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color2)
106
107        if inDet is not None:
108            detections = inDet.detections
109            counter += 1
110
111        if printOutputLayersOnce and inNN is not None:
112            toPrint = 'Output layer names:'
113            for ten in inNN.getAllLayerNames():
114                toPrint = f'{toPrint} {ten},'
115            print(toPrint)
116            printOutputLayersOnce = False;
117
118        # If the frame is available, draw bounding boxes on it and show the frame
119        if frame is not None:
120            displayFrame("rgb", frame)
121
122        if cv2.waitKey(1) == ord('q'):
123            break

C++

GitHub

1#include <chrono>
2#include <cstdio>
3#include <iostream>
4
5#include "utility.hpp"
6
7// Includes common necessary includes for development using depthai library
8#include "depthai/depthai.hpp"
9
10// MobilenetSSD label texts
11static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle",     "bird",  "boat",        "bottle", "bus",
12                                                  "car",        "cat",       "chair",       "cow",   "diningtable", "dog",    "horse",
13                                                  "motorbike",  "person",    "pottedplant", "sheep", "sofa",        "train",  "tvmonitor"};
14
15static std::atomic<bool> syncNN{true};
16
17int main(int argc, char** argv) {
18    using namespace std;
19    using namespace std::chrono;
20    // Default blob path provided by Hunter private data download
21    // Applicable for easier example usage only
22    std::string nnPath(BLOB_PATH);
23
24    // If path to blob specified, use that
25    if(argc > 1) {
26        nnPath = std::string(argv[1]);
27    }
28
29    // Print which blob we are using
30    printf("Using blob at path: %s\n", nnPath.c_str());
31
32    // Create pipeline
33    dai::Pipeline pipeline;
34
35    // Define sources and outputs
36    auto camRgb = pipeline.create<dai::node::ColorCamera>();
37    auto nn = pipeline.create<dai::node::MobileNetDetectionNetwork>();
38    auto xoutRgb = pipeline.create<dai::node::XLinkOut>();
39    auto nnOut = pipeline.create<dai::node::XLinkOut>();
40    auto nnNetworkOut = pipeline.create<dai::node::XLinkOut>();
41
42    xoutRgb->setStreamName("rgb");
43    nnOut->setStreamName("nn");
44    nnNetworkOut->setStreamName("nnNetwork");
45
46    // Properties
47    camRgb->setPreviewSize(300, 300);  // NN input
48    camRgb->setInterleaved(false);
49    camRgb->setFps(40);
50    // Define a neural network that will make predictions based on the source frames
51    nn->setConfidenceThreshold(0.5);
52    nn->setBlobPath(nnPath);
53    nn->setNumInferenceThreads(2);
54    nn->input.setBlocking(false);
55
56    // Linking
57    if(syncNN) {
58        nn->passthrough.link(xoutRgb->input);
59    } else {
60        camRgb->preview.link(xoutRgb->input);
61    }
62
63    camRgb->preview.link(nn->input);
64    nn->out.link(nnOut->input);
65    nn->outNetwork.link(nnNetworkOut->input);
66
67    // Connect to device and start pipeline
68    dai::Device device(pipeline);
69
70    // Output queues will be used to get the rgb frames and nn data from the outputs defined above
71    auto qRgb = device.getOutputQueue("rgb", 4, false);
72    auto qDet = device.getOutputQueue("nn", 4, false);
73    auto qNN = device.getOutputQueue("nnNetwork", 4, false);
74
75    cv::Mat frame;
76    std::vector<dai::ImgDetection> detections;
77    auto startTime = steady_clock::now();
78    int counter = 0;
79    float fps = 0;
80    auto color2 = cv::Scalar(255, 255, 255);
81
82    // Add bounding boxes and text to the frame and show it to the user
83    auto displayFrame = [](std::string name, cv::Mat frame, std::vector<dai::ImgDetection>& detections) {
84        auto color = cv::Scalar(255, 0, 0);
85        // nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
86        for(auto& detection : detections) {
87            int x1 = detection.xmin * frame.cols;
88            int y1 = detection.ymin * frame.rows;
89            int x2 = detection.xmax * frame.cols;
90            int y2 = detection.ymax * frame.rows;
91
92            uint32_t labelIndex = detection.label;
93            std::string labelStr = to_string(labelIndex);
94            if(labelIndex < labelMap.size()) {
95                labelStr = labelMap[labelIndex];
96            }
97            cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
98            std::stringstream confStr;
99            confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
100            cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
101            cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
102        }
103        // Show the frame
104        cv::imshow(name, frame);
105    };
106
107    bool printOutputLayersOnce = true;
108
109    while(true) {
110        std::shared_ptr<dai::ImgFrame> inRgb;
111        std::shared_ptr<dai::ImgDetections> inDet;
112        std::shared_ptr<dai::NNData> inNN;
113
114        if(syncNN) {
115            inRgb = qRgb->get<dai::ImgFrame>();
116            inDet = qDet->get<dai::ImgDetections>();
117            inNN = qNN->get<dai::NNData>();
118        } else {
119            inRgb = qRgb->tryGet<dai::ImgFrame>();
120            inDet = qDet->tryGet<dai::ImgDetections>();
121            inNN = qNN->tryGet<dai::NNData>();
122        }
123
124        counter++;
125        auto currentTime = steady_clock::now();
126        auto elapsed = duration_cast<duration<float>>(currentTime - startTime);
127        if(elapsed > seconds(1)) {
128            fps = counter / elapsed.count();
129            counter = 0;
130            startTime = currentTime;
131        }
132
133        if(inRgb) {
134            frame = inRgb->getCvFrame();
135            std::stringstream fpsStr;
136            fpsStr << "NN fps: " << std::fixed << std::setprecision(2) << fps;
137            cv::putText(frame, fpsStr.str(), cv::Point(2, inRgb->getHeight() - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, color2);
138        }
139
140        if(inDet) {
141            detections = inDet->detections;
142        }
143
144        if(printOutputLayersOnce && inNN) {
145            std::cout << "Output layer names: ";
146            for(const auto& ten : inNN->getAllLayerNames()) {
147                std::cout << ten << ", ";
148            }
149            std::cout << std::endl;
150            printOutputLayersOnce = false;
151        }
152
153        if(!frame.empty()) {
154            displayFrame("video", frame, detections);
155        }
156
157        int key = cv::waitKey(1);
158        if(key == 'q' || key == 'Q') {
159            return 0;
160        }
161    }
162    return 0;
163}

Pipeline

Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.

ON THIS PAGE

RGB & MobilenetSSDView as Markdown