RGB Encoding & Mono & MobilenetSSD

This example shows how to configure the depthai video encoder in h.265 format to encode the RGB camera input at Full-HD resolution at 30FPS, and transfers the encoded video over XLINK to the host, saving it to disk as a video file. At the same time, a MobileNetv2SSD network is ran on the frames from right grayscale cameraPressing Ctrl+C will stop the recording and then convert it using ffmpeg into an mp4 to make it playable. Note that ffmpeg will need to be installed and runnable for the conversion to mp4 to succeed.Be careful, this example saves encoded video to your host storage. So if you leave it running, you could fill up your storage on your host.It's a combination of RGB Encoding and Mono & MobilenetSSD.

Similar samples:

Demo

Setup

Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the script

Command Line

1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py

For additional information, please follow the installation guide.

Source code

Python

GitHub

1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8
9# Get argument first
10nnPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
11if len(sys.argv) > 1:
12    nnPath = sys.argv[1]
13
14if not Path(nnPath).exists():
15    import sys
16    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
17
18# MobilenetSSD label texts
19labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
20            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
21
22# Create pipeline
23pipeline = dai.Pipeline()
24
25# Define sources and outputs
26camRgb = pipeline.create(dai.node.ColorCamera)
27monoRight = pipeline.create(dai.node.MonoCamera)
28videoEncoder = pipeline.create(dai.node.VideoEncoder)
29nn = pipeline.create(dai.node.MobileNetDetectionNetwork)
30manip = pipeline.create(dai.node.ImageManip)
31
32videoOut = pipeline.create(dai.node.XLinkOut)
33xoutRight = pipeline.create(dai.node.XLinkOut)
34manipOut = pipeline.create(dai.node.XLinkOut)
35nnOut = pipeline.create(dai.node.XLinkOut)
36
37videoOut.setStreamName('h265')
38xoutRight.setStreamName("right")
39manipOut.setStreamName("manip")
40nnOut.setStreamName("nn")
41
42# Properties
43camRgb.setBoardSocket(dai.CameraBoardSocket.CAM_A)
44camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
45monoRight.setCamera("right")
46monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_720_P)
47videoEncoder.setDefaultProfilePreset(30, dai.VideoEncoderProperties.Profile.H265_MAIN)
48
49nn.setConfidenceThreshold(0.5)
50nn.setBlobPath(nnPath)
51nn.setNumInferenceThreads(2)
52nn.input.setBlocking(False)
53
54# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
55manip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p)
56manip.initialConfig.setResize(300, 300)
57
58# Linking
59camRgb.video.link(videoEncoder.input)
60videoEncoder.bitstream.link(videoOut.input)
61monoRight.out.link(manip.inputImage)
62manip.out.link(nn.input)
63monoRight.out.link(xoutRight.input)
64manip.out.link(manipOut.input)
65nn.out.link(nnOut.input)
66
67# Connect to device and start pipeline
68with dai.Device(pipeline) as device:
69
70    # Queues
71    queue_size = 8
72    qRight = device.getOutputQueue("right", queue_size)
73    qManip = device.getOutputQueue("manip", queue_size)
74    qDet = device.getOutputQueue("nn", queue_size)
75    qRgbEnc = device.getOutputQueue('h265', maxSize=30, blocking=True)
76
77    frame = None
78    frameManip = None
79    detections = []
80    offsetX = (monoRight.getResolutionWidth() - monoRight.getResolutionHeight()) // 2
81    color = (255, 0, 0)
82    croppedFrame = np.zeros((monoRight.getResolutionHeight(), monoRight.getResolutionHeight()))
83
84    def frameNorm(frame, bbox):
85        normVals = np.full(len(bbox), frame.shape[0])
86        normVals[::2] = frame.shape[1]
87        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
88
89    videoFile = open('video.h265', 'wb')
90    cv2.namedWindow("right", cv2.WINDOW_NORMAL)
91    cv2.namedWindow("manip", cv2.WINDOW_NORMAL)
92
93    while True:
94        inRight = qRight.tryGet()
95        inManip = qManip.tryGet()
96        inDet = qDet.tryGet()
97
98        while qRgbEnc.has():
99            qRgbEnc.get().getData().tofile(videoFile)
100
101        if inRight is not None:
102            frame = inRight.getCvFrame()
103
104        if inManip is not None:
105            frameManip = inManip.getCvFrame()
106
107        if inDet is not None:
108            detections = inDet.detections
109
110        if frame is not None:
111            for detection in detections:
112                bbox = frameNorm(croppedFrame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
113                bbox[::2] += offsetX
114                cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
115                cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
116                cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
117            # Show the frame
118            cv2.imshow("right", frame)
119
120        if frameManip is not None:
121            for detection in detections:
122                bbox = frameNorm(frameManip, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
123                cv2.putText(frameManip, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
124                cv2.putText(frameManip, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
125                cv2.rectangle(frameManip, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
126            # Show the frame
127            cv2.imshow("manip", frameManip)
128
129        if cv2.waitKey(1) == ord('q'):
130            break
131
132    print("To view the encoded data, convert the stream file (.h265) into a video file (.mp4) using a command below:")
133    print("ffmpeg -framerate 30 -i video.h265 -c copy video.mp4")

C++

GitHub

1#include <iostream>
2
3// Includes common necessary includes for development using depthai library
4#include "depthai/depthai.hpp"
5
6// MobilenetSSD label texts
7static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle",     "bird",  "boat",        "bottle", "bus",
8                                                  "car",        "cat",       "chair",       "cow",   "diningtable", "dog",    "horse",
9                                                  "motorbike",  "person",    "pottedplant", "sheep", "sofa",        "train",  "tvmonitor"};
10
11int main(int argc, char** argv) {
12    using namespace std;
13    // Default blob path provided by Hunter private data download
14    // Applicable for easier example usage only
15    std::string nnPath(BLOB_PATH);
16
17    // If path to blob specified, use that
18    if(argc > 1) {
19        nnPath = std::string(argv[1]);
20    }
21
22    // Print which blob we are using
23    printf("Using blob at path: %s\n", nnPath.c_str());
24
25    // Create pipeline
26    dai::Pipeline pipeline;
27
28    // Define sources and outputs
29    auto camRgb = pipeline.create<dai::node::ColorCamera>();
30    auto monoRight = pipeline.create<dai::node::MonoCamera>();
31    auto videoEncoder = pipeline.create<dai::node::VideoEncoder>();
32    auto nn = pipeline.create<dai::node::MobileNetDetectionNetwork>();
33    auto manip = pipeline.create<dai::node::ImageManip>();
34
35    auto videoOut = pipeline.create<dai::node::XLinkOut>();
36    auto xoutRight = pipeline.create<dai::node::XLinkOut>();
37    auto manipOut = pipeline.create<dai::node::XLinkOut>();
38    auto nnOut = pipeline.create<dai::node::XLinkOut>();
39
40    videoOut->setStreamName("h265");
41    xoutRight->setStreamName("right");
42    manipOut->setStreamName("manip");
43    nnOut->setStreamName("nn");
44
45    // Properties
46    camRgb->setBoardSocket(dai::CameraBoardSocket::CAM_A);
47    camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P);
48    monoRight->setCamera("right");
49    monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_720_P);
50    videoEncoder->setDefaultProfilePreset(30, dai::VideoEncoderProperties::Profile::H265_MAIN);
51
52    nn->setConfidenceThreshold(0.5);
53    nn->setBlobPath(nnPath);
54    nn->setNumInferenceThreads(2);
55    nn->input.setBlocking(false);
56
57    // The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
58    manip->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p);
59    manip->initialConfig.setResize(300, 300);
60
61    // Linking
62    camRgb->video.link(videoEncoder->input);
63    videoEncoder->bitstream.link(videoOut->input);
64    monoRight->out.link(manip->inputImage);
65    manip->out.link(nn->input);
66    monoRight->out.link(xoutRight->input);
67    manip->out.link(manipOut->input);
68    nn->out.link(nnOut->input);
69
70    // Connect to device and start pipeline
71    dai::Device device(pipeline);
72
73    // Queues
74    int queueSize = 8;
75    auto qRight = device.getOutputQueue("right", queueSize);
76    auto qManip = device.getOutputQueue("manip", queueSize);
77    auto qDet = device.getOutputQueue("nn", queueSize);
78    auto qRgbEnc = device.getOutputQueue("h265", 30, true);
79
80    cv::Mat frame;
81    cv::Mat frameManip;
82    std::vector<dai::ImgDetection> detections;
83    int offsetX = (monoRight->getResolutionWidth() - monoRight->getResolutionHeight()) / 2;
84    auto color = cv::Scalar(255, 0, 0);
85
86    auto videoFile = std::ofstream("video.h265", std::ios::binary);
87    cv::namedWindow("right", cv::WINDOW_NORMAL);
88    cv::namedWindow("manip", cv::WINDOW_NORMAL);
89
90    while(true) {
91        auto inRight = qRight->tryGet<dai::ImgFrame>();
92        auto inManip = qManip->tryGet<dai::ImgFrame>();
93        auto inDet = qDet->tryGet<dai::ImgDetections>();
94
95        auto out1 = qRgbEnc->get<dai::ImgFrame>();
96        videoFile.write((char*)out1->getData().data(), out1->getData().size());
97
98        if(inRight) {
99            frame = inRight->getCvFrame();
100        }
101
102        if(inManip) {
103            frameManip = inManip->getCvFrame();
104        }
105
106        if(inDet) {
107            detections = inDet->detections;
108        }
109
110        if(!frame.empty()) {
111            for(auto& detection : detections) {
112                int x1 = detection.xmin * monoRight->getResolutionHeight() + offsetX;
113                int y1 = detection.ymin * monoRight->getResolutionHeight();
114                int x2 = detection.xmax * monoRight->getResolutionHeight() + offsetX;
115                int y2 = detection.ymax * monoRight->getResolutionHeight();
116
117                uint32_t labelIndex = detection.label;
118                std::string labelStr = to_string(labelIndex);
119                if(labelIndex < labelMap.size()) {
120                    labelStr = labelMap[labelIndex];
121                }
122                cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
123                std::stringstream confStr;
124                confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
125                cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
126                cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
127            }
128            // Show the frame
129            cv::imshow("right", frame);
130        }
131
132        if(!frameManip.empty()) {
133            for(auto& detection : detections) {
134                int x1 = detection.xmin * frameManip.cols;
135                int y1 = detection.ymin * frameManip.rows;
136                int x2 = detection.xmax * frameManip.cols;
137                int y2 = detection.ymax * frameManip.rows;
138
139                uint32_t labelIndex = detection.label;
140                std::string labelStr = to_string(labelIndex);
141                if(labelIndex < labelMap.size()) {
142                    labelStr = labelMap[labelIndex];
143                }
144                cv::putText(frameManip, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
145                std::stringstream confStr;
146                confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
147                cv::putText(frameManip, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
148                cv::rectangle(frameManip, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
149            }
150            // Show the frame
151            cv::imshow("manip", frameManip);
152        }
153
154        int key = cv::waitKey(1);
155        if(key == 'q' || key == 'Q') {
156            break;
157        }
158    }
159    cout << "To view the encoded data, convert the stream file (.h265) into a video file (.mp4), using a command below:" << endl;
160    cout << "ffmpeg -framerate 30 -i video.h265 -c copy video.mp4" << endl;
161    return 0;
162}

Pipeline

Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.

ON THIS PAGE

RGB Encoding & Mono & MobilenetSSDView as Markdown