Detection network

Supported on:RVC2 RVC4

This example shows how to run YOLOv6 on-camera on the RGB stream. It downloads the pre-trained model weights from Model Zoo, specifically the YOLOv6 Nano model along with its configuration files.As it's using the DetectionNetwork node, it does NN output parsing (decoding) on the camera itself, and supports decoding both YOLO and SSD NN output formats.This examples displays bounding boxes around detected objects on the video stream. Note that for video stream we use passthrough output, so video stream resolution is the same as the NN input resolution (512x288). We could also display NN results in high resolution, for more information please see Resolution Techniques for NNs.

Demo

This example requires the DepthAI v3 API, see installation instructions.

Pipeline

Source code

Python

GitHub

1#!/usr/bin/env python3
2
3from pathlib import Path
4import cv2
5import depthai as dai
6import numpy as np
7import time
8
9# Create pipeline
10with dai.Pipeline() as pipeline:
11    cameraNode = pipeline.create(dai.node.Camera).build()
12    detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(cameraNode, dai.NNModelDescription("yolov6-nano"))
13    labelMap = detectionNetwork.getClasses()
14
15    qRgb = detectionNetwork.passthrough.createOutputQueue()
16    qDet = detectionNetwork.out.createOutputQueue()
17
18    pipeline.start()
19
20    frame = None
21    detections = []
22    startTime = time.monotonic()
23    counter = 0
24    color2 = (255, 255, 255)
25
26    # nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
27    def frameNorm(frame, bbox):
28        normVals = np.full(len(bbox), frame.shape[0])
29        normVals[::2] = frame.shape[1]
30        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
31
32    def displayFrame(name, frame):
33        color = (255, 0, 0)
34        for detection in detections:
35            bbox = frameNorm(
36                frame,
37                (detection.xmin, detection.ymin, detection.xmax, detection.ymax),
38            )
39            cv2.putText(
40                frame,
41                labelMap[detection.label],
42                (bbox[0] + 10, bbox[1] + 20),
43                cv2.FONT_HERSHEY_TRIPLEX,
44                0.5,
45                255,
46            )
47            cv2.putText(
48                frame,
49                f"{int(detection.confidence * 100)}%",
50                (bbox[0] + 10, bbox[1] + 40),
51                cv2.FONT_HERSHEY_TRIPLEX,
52                0.5,
53                255,
54            )
55            cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
56        # Show the frame
57        cv2.imshow(name, frame)
58
59    while pipeline.isRunning():
60        inRgb: dai.ImgFrame = qRgb.get()
61        inDet: dai.ImgDetections = qDet.get()
62        if inRgb is not None:
63            frame = inRgb.getCvFrame()
64            cv2.putText(
65                frame,
66                "NN fps: {:.2f}".format(counter / (time.monotonic() - startTime)),
67                (2, frame.shape[0] - 4),
68                cv2.FONT_HERSHEY_TRIPLEX,
69                0.4,
70                color2,
71            )
72
73        if inDet is not None:
74            detections = inDet.detections
75            counter += 1
76
77        if frame is not None:
78            displayFrame("rgb", frame)
79            print("FPS: {:.2f}".format(counter / (time.monotonic() - startTime)))
80        if cv2.waitKey(1) == ord("q"):
81            pipeline.stop()
82            break

C++

GitHub

1#include <chrono>
2#include <iostream>
3#include <opencv2/opencv.hpp>
4
5#include "depthai/depthai.hpp"
6
7// Helper function to normalize frame coordinates
8cv::Rect frameNorm(const cv::Mat& frame, const dai::Point2f& topLeft, const dai::Point2f& bottomRight) {
9    float width = frame.cols, height = frame.rows;
10    return cv::Rect(cv::Point(topLeft.x * width, topLeft.y * height), cv::Point(bottomRight.x * width, bottomRight.y * height));
11}
12
13int main() {
14    // Create pipeline
15    dai::Pipeline pipeline;
16
17    // Create and configure camera node
18    auto cameraNode = pipeline.create<dai::node::Camera>();
19    cameraNode->build();
20
21    // Create and configure detection network node
22    auto detectionNetwork = pipeline.create<dai::node::DetectionNetwork>();
23
24    dai::NNModelDescription modelDescription;
25    modelDescription.model = "yolov6-nano";
26    detectionNetwork->build(cameraNode, modelDescription);
27    auto labelMap = detectionNetwork->getClasses();
28
29    // Create output queues
30    auto qRgb = detectionNetwork->passthrough.createOutputQueue();
31    auto qDet = detectionNetwork->out.createOutputQueue();
32
33    cv::Mat frame;
34    auto startTime = std::chrono::steady_clock::now();
35    int counter = 0;
36    cv::Scalar color(255, 0, 0);
37    cv::Scalar textColor(255, 255, 255);
38
39    pipeline.start();
40    while(pipeline.isRunning()) {
41        auto inRgb = qRgb->get<dai::ImgFrame>();
42        auto inDet = qDet->get<dai::ImgDetections>();
43
44        if(inRgb != nullptr) {
45            frame = inRgb->getCvFrame();
46
47            // Add FPS text
48            auto currentTime = std::chrono::steady_clock::now();
49            float fps = counter / std::chrono::duration<float>(currentTime - startTime).count();
50            cv::putText(frame, "NN fps: " + std::to_string(fps), cv::Point(2, frame.rows - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, textColor);
51        }
52
53        if(inDet != nullptr) {
54            counter++;
55        }
56
57        if(!frame.empty()) {
58            // Display detections
59            for(const auto& detection : inDet->detections) {
60                auto bbox = frameNorm(frame, dai::Point2f(detection.xmin, detection.ymin), dai::Point2f(detection.xmax, detection.ymax));
61
62                // Draw label
63                cv::putText(frame, labelMap.value()[detection.label], cv::Point(bbox.x + 10, bbox.y + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, textColor);
64
65                // Draw confidence
66                cv::putText(frame,
67                            std::to_string(static_cast<int>(detection.confidence * 100)) + "%",
68                            cv::Point(bbox.x + 10, bbox.y + 40),
69                            cv::FONT_HERSHEY_TRIPLEX,
70                            0.5,
71                            textColor);
72
73                // Draw rectangle
74                cv::rectangle(frame, bbox, color, 2);
75            }
76
77            // Show the frame
78            cv::imshow("rgb", frame);
79
80            auto currentTime = std::chrono::steady_clock::now();
81            float fps = counter / std::chrono::duration<float>(currentTime - startTime).count();
82            std::cout << "FPS: " << fps << std::endl;
83        }
84
85        if(cv::waitKey(1) == 'q') {
86            break;
87        }
88    }
89
90    return 0;
91}

Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.

ON THIS PAGE

Detection networkView as Markdown