DepthAI v2 has been superseded by DepthAI v3. You are viewing legacy documentation.
DepthAI Tutorials
DepthAI API References

ON THIS PAGE

  • Demo
  • Setup
  • Source code
  • Pipeline

RGB Encoding & Mono with MobilenetSSD & Depth

This example shows how to configure the depthai video encoder in h.265 format to encode the RGB camera input at Full-HD resolution at 30FPS, and transfers the encoded video over XLINK to the host, saving it to disk as a video file. At the same time, a MobileNetv2SSD network is ran on the frames from right grayscale camera, while the application also displays the depth map produced by both of the grayscale cameras. Note that disparity is used in this case, as it colorizes in a more intuitive way.Pressing Ctrl+C will stop the recording and then convert it using ffmpeg into an mp4 to make it playable. Note that ffmpeg will need to be installed and runnable for the conversion to mp4 to succeed.Be careful, this example saves encoded video to your host storage. So if you leave it running, you could fill up your storage on your host.It's a combination of RGB Encoding and Mono & MobilenetSSD & Depth.

Similar samples:

Demo

Setup

Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the script
Command Line
1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py
For additional information, please follow the installation guide.

Source code

Python

Python
GitHub
1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8
9# Get argument first
10nnPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
11if len(sys.argv) > 1:
12    nnPath = sys.argv[1]
13
14if not Path(nnPath).exists():
15    import sys
16    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
17
18# MobilenetSSD label texts
19labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
20            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
21
22# Create pipeline
23pipeline = dai.Pipeline()
24
25# Define sources and outputs
26camRgb = pipeline.create(dai.node.ColorCamera)
27videoEncoder = pipeline.create(dai.node.VideoEncoder)
28monoRight = pipeline.create(dai.node.MonoCamera)
29monoLeft = pipeline.create(dai.node.MonoCamera)
30depth = pipeline.create(dai.node.StereoDepth)
31manip = pipeline.create(dai.node.ImageManip)
32nn = pipeline.create(dai.node.MobileNetDetectionNetwork)
33
34videoOut = pipeline.create(dai.node.XLinkOut)
35xoutRight = pipeline.create(dai.node.XLinkOut)
36disparityOut = pipeline.create(dai.node.XLinkOut)
37manipOut = pipeline.create(dai.node.XLinkOut)
38nnOut = pipeline.create(dai.node.XLinkOut)
39
40videoOut.setStreamName('h265')
41xoutRight.setStreamName('right')
42disparityOut.setStreamName('disparity')
43manipOut.setStreamName('manip')
44nnOut.setStreamName('nn')
45
46# Properties
47camRgb.setBoardSocket(dai.CameraBoardSocket.CAM_A)
48camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
49monoRight.setCamera("right")
50monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
51monoLeft.setCamera("left")
52monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
53videoEncoder.setDefaultProfilePreset(30, dai.VideoEncoderProperties.Profile.H265_MAIN)
54
55depth.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
56depth.setRectifyEdgeFillColor(0) # Black, to better see the cutout
57
58nn.setConfidenceThreshold(0.5)
59nn.setBlobPath(nnPath)
60nn.setNumInferenceThreads(2)
61nn.input.setBlocking(False)
62
63# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
64manip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p)
65manip.initialConfig.setResize(300, 300)
66
67# Linking
68camRgb.video.link(videoEncoder.input)
69videoEncoder.bitstream.link(videoOut.input)
70monoRight.out.link(xoutRight.input)
71monoRight.out.link(depth.right)
72monoLeft.out.link(depth.left)
73depth.disparity.link(disparityOut.input)
74depth.rectifiedRight.link(manip.inputImage)
75manip.out.link(nn.input)
76manip.out.link(manipOut.input)
77nn.out.link(nnOut.input)
78
79# Disparity range is used for normalization
80disparityMultiplier = 255 / depth.initialConfig.getMaxDisparity()
81
82# Connect to device and start pipeline
83with dai.Device(pipeline) as device:
84
85    queueSize = 8
86    qRight = device.getOutputQueue("right", queueSize)
87    qDisparity = device.getOutputQueue("disparity", queueSize)
88    qManip = device.getOutputQueue("manip", queueSize)
89    qDet = device.getOutputQueue("nn", queueSize)
90    qRgbEnc = device.getOutputQueue('h265', maxSize=30, blocking=True)
91
92    frame = None
93    frameManip = None
94    frameDisparity = None
95    detections = []
96    offsetX = (monoRight.getResolutionWidth() - monoRight.getResolutionHeight()) // 2
97    color = (255, 0, 0)
98    croppedFrame = np.zeros((monoRight.getResolutionHeight(), monoRight.getResolutionHeight()))
99
100    def frameNorm(frame, bbox):
101        normVals = np.full(len(bbox), frame.shape[0])
102        normVals[::2] = frame.shape[1]
103        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
104
105    videoFile = open('video.h265', 'wb')
106    cv2.namedWindow("right", cv2.WINDOW_NORMAL)
107    cv2.namedWindow("manip", cv2.WINDOW_NORMAL)
108
109    while True:
110        inRight = qRight.tryGet()
111        inManip = qManip.tryGet()
112        inDet = qDet.tryGet()
113        inDisparity = qDisparity.tryGet()
114
115        while qRgbEnc.has():
116            qRgbEnc.get().getData().tofile(videoFile)
117
118        if inRight is not None:
119            frame = inRight.getCvFrame()
120
121        if inManip is not None:
122            frameManip = inManip.getCvFrame()
123
124        if inDisparity is not None:
125            # Apply color map for better visualization
126            frameDisparity = inDisparity.getCvFrame()
127            frameDisparity = (frameDisparity*disparityMultiplier).astype(np.uint8)
128            frameDisparity = cv2.applyColorMap(frameDisparity, cv2.COLORMAP_JET)
129
130        if inDet is not None:
131            detections = inDet.detections
132
133        if frame is not None:
134            for detection in detections:
135                bbox = frameNorm(croppedFrame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
136                bbox[::2] += offsetX
137                cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
138                cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
139                cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
140            # Show the right cam frame
141            cv2.imshow("right", frame)
142
143        if frameDisparity is not None:
144            for detection in detections:
145                bbox = frameNorm(croppedFrame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
146                bbox[::2] += offsetX
147                cv2.rectangle(frameDisparity, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
148                cv2.putText(frameDisparity, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
149                cv2.putText(frameDisparity, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
150            # Show the disparity frame
151            cv2.imshow("disparity", frameDisparity)
152
153        if frameManip is not None:
154            for detection in detections:
155                bbox = frameNorm(frameManip, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
156                cv2.rectangle(frameManip, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
157                cv2.putText(frameManip, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
158                cv2.putText(frameManip, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
159            # Show the manip frame
160            cv2.imshow("manip", frameManip)
161
162        if cv2.waitKey(1) == ord('q'):
163            break
164
165    print("To view the encoded data, convert the stream file (.h265) into a video file (.mp4) using a command below:")
166    print("ffmpeg -framerate 30 -i video.h265 -c copy video.mp4")

C++

1#include <cstdio>
2#include <iostream>
3
4// Includes common necessary includes for development using depthai library
5#include "depthai/depthai.hpp"
6
7// MobilenetSSD label texts
8static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle",     "bird",  "boat",        "bottle", "bus",
9                                                  "car",        "cat",       "chair",       "cow",   "diningtable", "dog",    "horse",
10                                                  "motorbike",  "person",    "pottedplant", "sheep", "sofa",        "train",  "tvmonitor"};
11
12int main(int argc, char** argv) {
13    using namespace std;
14    // Default blob path provided by Hunter private data download
15    // Applicable for easier example usage only
16    std::string nnPath(BLOB_PATH);
17
18    // If path to blob specified, use that
19    if(argc > 1) {
20        nnPath = std::string(argv[1]);
21    }
22
23    // Print which blob we are using
24    printf("Using blob at path: %s\n", nnPath.c_str());
25
26    // Create pipeline
27    dai::Pipeline pipeline;
28
29    // Define sources and outputs
30    auto camRgb = pipeline.create<dai::node::ColorCamera>();
31    auto videoEncoder = pipeline.create<dai::node::VideoEncoder>();
32    auto monoRight = pipeline.create<dai::node::MonoCamera>();
33    auto monoLeft = pipeline.create<dai::node::MonoCamera>();
34    auto depth = pipeline.create<dai::node::StereoDepth>();
35    auto manip = pipeline.create<dai::node::ImageManip>();
36    auto nn = pipeline.create<dai::node::MobileNetDetectionNetwork>();
37
38    auto videoOut = pipeline.create<dai::node::XLinkOut>();
39    auto xoutRight = pipeline.create<dai::node::XLinkOut>();
40    auto disparityOut = pipeline.create<dai::node::XLinkOut>();
41    auto manipOut = pipeline.create<dai::node::XLinkOut>();
42    auto nnOut = pipeline.create<dai::node::XLinkOut>();
43
44    videoOut->setStreamName("h265");
45    xoutRight->setStreamName("right");
46    disparityOut->setStreamName("disparity");
47    manipOut->setStreamName("manip");
48    nnOut->setStreamName("nn");
49
50    // Properties
51    camRgb->setBoardSocket(dai::CameraBoardSocket::CAM_A);
52    camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P);
53    monoRight->setCamera("right");
54    monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);
55    monoLeft->setCamera("left");
56    monoLeft->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);
57    videoEncoder->setDefaultProfilePreset(30, dai::VideoEncoderProperties::Profile::H265_MAIN);
58
59    depth->setDefaultProfilePreset(dai::node::StereoDepth::PresetMode::HIGH_DENSITY);
60    depth->setRectifyEdgeFillColor(0);  // Black, to better see the cutout
61
62    nn->setConfidenceThreshold(0.5);
63    nn->setBlobPath(nnPath);
64    nn->setNumInferenceThreads(2);
65    nn->input.setBlocking(false);
66
67    // The NN model expects BGR input-> By default ImageManip output type would be same as input (gray in this case)
68    manip->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p);
69    manip->initialConfig.setResize(300, 300);
70
71    // Linking
72    camRgb->video.link(videoEncoder->input);
73    videoEncoder->bitstream.link(videoOut->input);
74    monoRight->out.link(xoutRight->input);
75    monoRight->out.link(depth->right);
76    monoLeft->out.link(depth->left);
77    depth->disparity.link(disparityOut->input);
78    depth->rectifiedRight.link(manip->inputImage);
79    manip->out.link(nn->input);
80    manip->out.link(manipOut->input);
81    nn->out.link(nnOut->input);
82
83    // Disparity range is used for normalization
84    float disparityMultiplier = 255 / depth->initialConfig.getMaxDisparity();
85
86    // Connect to device and start pipeline
87    dai::Device device(pipeline);
88
89    // Queues
90    int queueSize = 8;
91    auto qRight = device.getOutputQueue("right", queueSize);
92    auto qDisparity = device.getOutputQueue("disparity", queueSize);
93    auto qManip = device.getOutputQueue("manip", queueSize);
94    auto qDet = device.getOutputQueue("nn", queueSize);
95    auto qRgbEnc = device.getOutputQueue("h265", 30, true);
96
97    cv::Mat frame;
98    cv::Mat frameManip;
99    cv::Mat frameDisparity;
100    std::vector<dai::ImgDetection> detections;
101    int offsetX = (monoRight->getResolutionWidth() - monoRight->getResolutionHeight()) / 2;
102    auto color = cv::Scalar(255, 0, 0);
103
104    auto videoFile = std::ofstream("video.h265", std::ios::binary);
105    cv::namedWindow("right", cv::WINDOW_NORMAL);
106    cv::namedWindow("manip", cv::WINDOW_NORMAL);
107
108    while(true) {
109        auto inRight = qRight->tryGet<dai::ImgFrame>();
110        auto inManip = qManip->tryGet<dai::ImgFrame>();
111        auto inDet = qDet->tryGet<dai::ImgDetections>();
112        auto inDisparity = qDisparity->tryGet<dai::ImgFrame>();
113
114        auto out1 = qRgbEnc->get<dai::ImgFrame>();
115        videoFile.write((char*)out1->getData().data(), out1->getData().size());
116
117        if(inRight) {
118            frame = inRight->getCvFrame();
119        }
120
121        if(inManip) {
122            frameManip = inManip->getCvFrame();
123        }
124
125        if(inDisparity) {
126            frameDisparity = inDisparity->getCvFrame();
127            frameDisparity.convertTo(frameDisparity, CV_8UC1, disparityMultiplier);
128            cv::applyColorMap(frameDisparity, frameDisparity, cv::COLORMAP_JET);
129        }
130
131        if(inDet) {
132            detections = inDet->detections;
133        }
134
135        if(!frame.empty()) {
136            for(auto& detection : detections) {
137                int x1 = detection.xmin * monoRight->getResolutionHeight() + offsetX;
138                int y1 = detection.ymin * monoRight->getResolutionHeight();
139                int x2 = detection.xmax * monoRight->getResolutionHeight() + offsetX;
140                int y2 = detection.ymax * monoRight->getResolutionHeight();
141
142                uint32_t labelIndex = detection.label;
143                std::string labelStr = to_string(labelIndex);
144                if(labelIndex < labelMap.size()) {
145                    labelStr = labelMap[labelIndex];
146                }
147                cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
148                std::stringstream confStr;
149                confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
150                cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
151                cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
152            }
153            // Show the right cam frame
154            cv::imshow("right", frame);
155        }
156
157        if(!frameDisparity.empty()) {
158            for(auto& detection : detections) {
159                int x1 = detection.xmin * monoRight->getResolutionHeight() + offsetX;
160                int y1 = detection.ymin * monoRight->getResolutionHeight();
161                int x2 = detection.xmax * monoRight->getResolutionHeight() + offsetX;
162                int y2 = detection.ymax * monoRight->getResolutionHeight();
163
164                uint32_t labelIndex = detection.label;
165                std::string labelStr = to_string(labelIndex);
166                if(labelIndex < labelMap.size()) {
167                    labelStr = labelMap[labelIndex];
168                }
169                cv::putText(frameDisparity, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
170                std::stringstream confStr;
171                confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
172                cv::putText(frameDisparity, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
173                cv::rectangle(frameDisparity, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
174            }
175            // Show the disparity frame
176            cv::imshow("disparity", frameDisparity);
177        }
178
179        if(!frameManip.empty()) {
180            for(auto& detection : detections) {
181                int x1 = detection.xmin * frameManip.cols;
182                int y1 = detection.ymin * frameManip.rows;
183                int x2 = detection.xmax * frameManip.cols;
184                int y2 = detection.ymax * frameManip.rows;
185
186                uint32_t labelIndex = detection.label;
187                std::string labelStr = to_string(labelIndex);
188                if(labelIndex < labelMap.size()) {
189                    labelStr = labelMap[labelIndex];
190                }
191                cv::putText(frameManip, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
192                std::stringstream confStr;
193                confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
194                cv::putText(frameManip, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
195                cv::rectangle(frameManip, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
196            }
197            // Show the manip frame
198            cv::imshow("manip", frameManip);
199        }
200
201        int key = cv::waitKey(1);
202        if(key == 'q' || key == 'Q') {
203            break;
204        }
205    }
206    cout << "To view the encoded data, convert the stream file (.h265) into a video file (.mp4), using a command below:" << endl;
207    cout << "ffmpeg -framerate 30 -i video.h265 -c copy video.mp4" << endl;
208    return 0;
209}

Pipeline

Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.