DepthAI v2 has been superseded by DepthAI v3. You are viewing legacy documentation.
DepthAI Tutorials
DepthAI API References

ON THIS PAGE

  • Demo
  • Setup
  • Source code
  • Pipeline

RGB & Tiny YOLO

This example shows how to run YOLO on the RGB input frame, and how to display both the RGB preview and the metadata results from the YOLO model on the preview. Decoding is done on the RVC instead on the host computer.Configurable, network dependent parameters are required for correct decoding:
  • setNumClasses() - number of YOLO classes
  • setCoordinateSize() - size of coordinate
  • setAnchors() - yolo anchors
  • setAnchorMasks() - anchorMasks26, anchorMasks13 (anchorMasks52 - additionally for full YOLOv4)
  • setIouThreshold() - intersection over union threshold
  • setConfidenceThreshold() - confidence threshold above which objects are detected
By default, Tiny YOLOv4 is used. You can add yolo3 as a CMD argument to use Tiny YOLOv3.

Demo

Setup

Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the script
Command Line
1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py
For additional information, please follow the installation guide.

Source code

Python

Python
GitHub
1#!/usr/bin/env python3
2
3"""
4The code is the same as for Tiny Yolo V3 and V4, the only difference is the blob file
5- Tiny YOLOv3: https://github.com/david8862/keras-YOLOv3-model-set
6- Tiny YOLOv4: https://github.com/TNTWEN/OpenVINO-YOLOV4
7"""
8
9from pathlib import Path
10import sys
11import cv2
12import depthai as dai
13import numpy as np
14import time
15
16# Get argument first
17nnPath = str((Path(__file__).parent / Path('../models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute())
18if 1 < len(sys.argv):
19    arg = sys.argv[1]
20    if arg == "yolo3":
21        nnPath = str((Path(__file__).parent / Path('../models/yolo-v3-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute())
22    elif arg == "yolo4":
23        nnPath = str((Path(__file__).parent / Path('../models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute())
24    else:
25        nnPath = arg
26else:
27    print("Using Tiny YoloV4 model. If you wish to use Tiny YOLOv3, call 'tiny_yolo.py yolo3'")
28
29if not Path(nnPath).exists():
30    import sys
31    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
32
33# tiny yolo v4 label texts
34labelMap = [
35    "person",         "bicycle",    "car",           "motorbike",     "aeroplane",   "bus",           "train",
36    "truck",          "boat",       "traffic light", "fire hydrant",  "stop sign",   "parking meter", "bench",
37    "bird",           "cat",        "dog",           "horse",         "sheep",       "cow",           "elephant",
38    "bear",           "zebra",      "giraffe",       "backpack",      "umbrella",    "handbag",       "tie",
39    "suitcase",       "frisbee",    "skis",          "snowboard",     "sports ball", "kite",          "baseball bat",
40    "baseball glove", "skateboard", "surfboard",     "tennis racket", "bottle",      "wine glass",    "cup",
41    "fork",           "knife",      "spoon",         "bowl",          "banana",      "apple",         "sandwich",
42    "orange",         "broccoli",   "carrot",        "hot dog",       "pizza",       "donut",         "cake",
43    "chair",          "sofa",       "pottedplant",   "bed",           "diningtable", "toilet",        "tvmonitor",
44    "laptop",         "mouse",      "remote",        "keyboard",      "cell phone",  "microwave",     "oven",
45    "toaster",        "sink",       "refrigerator",  "book",          "clock",       "vase",          "scissors",
46    "teddy bear",     "hair drier", "toothbrush"
47]
48
49syncNN = True
50
51# Create pipeline
52pipeline = dai.Pipeline()
53
54# Define sources and outputs
55camRgb = pipeline.create(dai.node.ColorCamera)
56detectionNetwork = pipeline.create(dai.node.YoloDetectionNetwork)
57xoutRgb = pipeline.create(dai.node.XLinkOut)
58nnOut = pipeline.create(dai.node.XLinkOut)
59
60xoutRgb.setStreamName("rgb")
61nnOut.setStreamName("nn")
62
63# Properties
64camRgb.setPreviewSize(416, 416)
65camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
66camRgb.setInterleaved(False)
67camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
68camRgb.setFps(40)
69
70# Network specific settings
71detectionNetwork.setConfidenceThreshold(0.5)
72detectionNetwork.setNumClasses(80)
73detectionNetwork.setCoordinateSize(4)
74detectionNetwork.setAnchors([10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319])
75detectionNetwork.setAnchorMasks({"side26": [1, 2, 3], "side13": [3, 4, 5]})
76detectionNetwork.setIouThreshold(0.5)
77detectionNetwork.setBlobPath(nnPath)
78detectionNetwork.setNumInferenceThreads(2)
79detectionNetwork.input.setBlocking(False)
80
81# Linking
82camRgb.preview.link(detectionNetwork.input)
83if syncNN:
84    detectionNetwork.passthrough.link(xoutRgb.input)
85else:
86    camRgb.preview.link(xoutRgb.input)
87
88detectionNetwork.out.link(nnOut.input)
89
90# Connect to device and start pipeline
91with dai.Device(pipeline) as device:
92
93    # Output queues will be used to get the rgb frames and nn data from the outputs defined above
94    qRgb = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
95    qDet = device.getOutputQueue(name="nn", maxSize=4, blocking=False)
96
97    frame = None
98    detections = []
99    startTime = time.monotonic()
100    counter = 0
101    color2 = (255, 255, 255)
102
103    # nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
104    def frameNorm(frame, bbox):
105        normVals = np.full(len(bbox), frame.shape[0])
106        normVals[::2] = frame.shape[1]
107        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
108
109    def displayFrame(name, frame):
110        color = (255, 0, 0)
111        for detection in detections:
112            bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
113            cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
114            cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
115            cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
116        # Show the frame
117        cv2.imshow(name, frame)
118
119    while True:
120        if syncNN:
121            inRgb = qRgb.get()
122            inDet = qDet.get()
123        else:
124            inRgb = qRgb.tryGet()
125            inDet = qDet.tryGet()
126
127        if inRgb is not None:
128            frame = inRgb.getCvFrame()
129            cv2.putText(frame, "NN fps: {:.2f}".format(counter / (time.monotonic() - startTime)),
130                        (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color2)
131
132        if inDet is not None:
133            detections = inDet.detections
134            counter += 1
135
136        if frame is not None:
137            displayFrame("rgb", frame)
138
139        if cv2.waitKey(1) == ord('q'):
140            break

C++

1#include <chrono>
2#include <iostream>
3
4// Includes common necessary includes for development using depthai library
5#include "depthai/depthai.hpp"
6
7/*
8The code is the same as for Tiny-yolo-V3, the only difference is the blob file.
9The blob was compiled following this tutorial: https://github.com/TNTWEN/OpenVINO-YOLOV4
10*/
11
12static const std::vector<std::string> labelMap = {
13    "person",        "bicycle",      "car",           "motorbike",     "aeroplane",   "bus",         "train",       "truck",        "boat",
14    "traffic light", "fire hydrant", "stop sign",     "parking meter", "bench",       "bird",        "cat",         "dog",          "horse",
15    "sheep",         "cow",          "elephant",      "bear",          "zebra",       "giraffe",     "backpack",    "umbrella",     "handbag",
16    "tie",           "suitcase",     "frisbee",       "skis",          "snowboard",   "sports ball", "kite",        "baseball bat", "baseball glove",
17    "skateboard",    "surfboard",    "tennis racket", "bottle",        "wine glass",  "cup",         "fork",        "knife",        "spoon",
18    "bowl",          "banana",       "apple",         "sandwich",      "orange",      "broccoli",    "carrot",      "hot dog",      "pizza",
19    "donut",         "cake",         "chair",         "sofa",          "pottedplant", "bed",         "diningtable", "toilet",       "tvmonitor",
20    "laptop",        "mouse",        "remote",        "keyboard",      "cell phone",  "microwave",   "oven",        "toaster",      "sink",
21    "refrigerator",  "book",         "clock",         "vase",          "scissors",    "teddy bear",  "hair drier",  "toothbrush"};
22
23static std::atomic<bool> syncNN{true};
24
25int main(int argc, char** argv) {
26    using namespace std;
27    using namespace std::chrono;
28    std::string nnPath(BLOB_PATH);
29
30    // If path to blob specified, use that
31    if(argc > 1) {
32        nnPath = std::string(argv[1]);
33    }
34
35    // Print which blob we are using
36    printf("Using blob at path: %s\n", nnPath.c_str());
37
38    // Create pipeline
39    dai::Pipeline pipeline;
40
41    // Define sources and outputs
42    auto camRgb = pipeline.create<dai::node::ColorCamera>();
43    auto detectionNetwork = pipeline.create<dai::node::YoloDetectionNetwork>();
44    auto xoutRgb = pipeline.create<dai::node::XLinkOut>();
45    auto nnOut = pipeline.create<dai::node::XLinkOut>();
46
47    xoutRgb->setStreamName("rgb");
48    nnOut->setStreamName("detections");
49
50    // Properties
51    camRgb->setPreviewSize(416, 416);
52    camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P);
53    camRgb->setInterleaved(false);
54    camRgb->setColorOrder(dai::ColorCameraProperties::ColorOrder::BGR);
55    camRgb->setFps(40);
56
57    // Network specific settings
58    detectionNetwork->setConfidenceThreshold(0.5f);
59    detectionNetwork->setNumClasses(80);
60    detectionNetwork->setCoordinateSize(4);
61    detectionNetwork->setAnchors({10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319});
62    detectionNetwork->setAnchorMasks({{"side26", {1, 2, 3}}, {"side13", {3, 4, 5}}});
63    detectionNetwork->setIouThreshold(0.5f);
64    detectionNetwork->setBlobPath(nnPath);
65    detectionNetwork->setNumInferenceThreads(2);
66    detectionNetwork->input.setBlocking(false);
67
68    // Linking
69    camRgb->preview.link(detectionNetwork->input);
70    if(syncNN) {
71        detectionNetwork->passthrough.link(xoutRgb->input);
72    } else {
73        camRgb->preview.link(xoutRgb->input);
74    }
75
76    detectionNetwork->out.link(nnOut->input);
77
78    // Connect to device and start pipeline
79    dai::Device device(pipeline);
80
81    // Output queues will be used to get the rgb frames and nn data from the outputs defined above
82    auto qRgb = device.getOutputQueue("rgb", 4, false);
83    auto qDet = device.getOutputQueue("detections", 4, false);
84
85    cv::Mat frame;
86    std::vector<dai::ImgDetection> detections;
87    auto startTime = steady_clock::now();
88    int counter = 0;
89    float fps = 0;
90    auto color2 = cv::Scalar(255, 255, 255);
91
92    // Add bounding boxes and text to the frame and show it to the user
93    auto displayFrame = [](std::string name, cv::Mat frame, std::vector<dai::ImgDetection>& detections) {
94        auto color = cv::Scalar(255, 0, 0);
95        // nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
96        for(auto& detection : detections) {
97            int x1 = detection.xmin * frame.cols;
98            int y1 = detection.ymin * frame.rows;
99            int x2 = detection.xmax * frame.cols;
100            int y2 = detection.ymax * frame.rows;
101
102            uint32_t labelIndex = detection.label;
103            std::string labelStr = to_string(labelIndex);
104            if(labelIndex < labelMap.size()) {
105                labelStr = labelMap[labelIndex];
106            }
107            cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
108            std::stringstream confStr;
109            confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
110            cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255);
111            cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
112        }
113        // Show the frame
114        cv::imshow(name, frame);
115    };
116
117    while(true) {
118        std::shared_ptr<dai::ImgFrame> inRgb;
119        std::shared_ptr<dai::ImgDetections> inDet;
120
121        if(syncNN) {
122            inRgb = qRgb->get<dai::ImgFrame>();
123            inDet = qDet->get<dai::ImgDetections>();
124        } else {
125            inRgb = qRgb->tryGet<dai::ImgFrame>();
126            inDet = qDet->tryGet<dai::ImgDetections>();
127        }
128
129        counter++;
130        auto currentTime = steady_clock::now();
131        auto elapsed = duration_cast<duration<float>>(currentTime - startTime);
132        if(elapsed > seconds(1)) {
133            fps = counter / elapsed.count();
134            counter = 0;
135            startTime = currentTime;
136        }
137
138        if(inRgb) {
139            frame = inRgb->getCvFrame();
140            std::stringstream fpsStr;
141            fpsStr << "NN fps: " << std::fixed << std::setprecision(2) << fps;
142            cv::putText(frame, fpsStr.str(), cv::Point(2, inRgb->getHeight() - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, color2);
143        }
144
145        if(inDet) {
146            detections = inDet->detections;
147        }
148
149        if(!frame.empty()) {
150            displayFrame("rgb", frame, detections);
151        }
152
153        int key = cv::waitKey(1);
154        if(key == 'q' || key == 'Q') {
155            return 0;
156        }
157    }
158    return 0;
159}

Pipeline

Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.