RGB编码 & Mono & MobilenetSSD & 深度

此示例展示了如何配置 depthai 视频编码器（h.265 格式），以 30FPS 的 Full-HD 分辨率对 RGB 摄像头输入进行编码，并通过 XLINK 将编码后的视频传输到主机，并将其保存为视频文件。同时，在右侧灰度摄像头的帧上运行 MobileNetv2SSD 网络，并且应用程序还会显示由两个灰度摄像头生成的深度图。在此情况下，使用了视差，因为它以更直观的方式进行着色。按下 Ctrl+C 将停止录制，然后使用 ffmpeg 将其转换为 mp4 以便播放。请注意，为了成功转换为 mp4，需要安装并运行 ffmpeg。请注意，此示例会将编码后的视频保存到您的主机存储中。因此，如果您让它一直运行，可能会填满您主机的存储空间。它是 RGB 编码和 Mono & MobilenetSSD & 深度的组合。

类似示例：

演示

设置

请运行安装脚本以下载所有必需的依赖项。请注意，此脚本必须在 git 上下文中运行，因此您必须先下载 depthai-python 存储库，然后运行脚本

Command Line

1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py

有关更多信息，请遵循安装指南。

源代码

Python

GitHub

1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8
9# Get argument first
10nnPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
11if len(sys.argv) > 1:
12    nnPath = sys.argv[1]
13
14if not Path(nnPath).exists():
15    import sys
16    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
17
18# MobilenetSSD label texts
19labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
20            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
21
22# Create pipeline
23pipeline = dai.Pipeline()
24
25# Define sources and outputs
26camRgb = pipeline.create(dai.node.ColorCamera)
27videoEncoder = pipeline.create(dai.node.VideoEncoder)
28monoRight = pipeline.create(dai.node.MonoCamera)
29monoLeft = pipeline.create(dai.node.MonoCamera)
30depth = pipeline.create(dai.node.StereoDepth)
31manip = pipeline.create(dai.node.ImageManip)
32nn = pipeline.create(dai.node.MobileNetDetectionNetwork)
33
34videoOut = pipeline.create(dai.node.XLinkOut)
35xoutRight = pipeline.create(dai.node.XLinkOut)
36disparityOut = pipeline.create(dai.node.XLinkOut)
37manipOut = pipeline.create(dai.node.XLinkOut)
38nnOut = pipeline.create(dai.node.XLinkOut)
39
40videoOut.setStreamName('h265')
41xoutRight.setStreamName('right')
42disparityOut.setStreamName('disparity')
43manipOut.setStreamName('manip')
44nnOut.setStreamName('nn')
45
46# Properties
47camRgb.setBoardSocket(dai.CameraBoardSocket.CAM_A)
48camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
49monoRight.setCamera("right")
50monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
51monoLeft.setCamera("left")
52monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
53videoEncoder.setDefaultProfilePreset(30, dai.VideoEncoderProperties.Profile.H265_MAIN)
54
55depth.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
56depth.setRectifyEdgeFillColor(0) # Black, to better see the cutout
57
58nn.setConfidenceThreshold(0.5)
59nn.setBlobPath(nnPath)
60nn.setNumInferenceThreads(2)
61nn.input.setBlocking(False)
62
63# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
64manip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p)
65manip.initialConfig.setResize(300, 300)
66
67# Linking
68camRgb.video.link(videoEncoder.input)
69videoEncoder.bitstream.link(videoOut.input)
70monoRight.out.link(xoutRight.input)
71monoRight.out.link(depth.right)
72monoLeft.out.link(depth.left)
73depth.disparity.link(disparityOut.input)
74depth.rectifiedRight.link(manip.inputImage)
75manip.out.link(nn.input)
76manip.out.link(manipOut.input)
77nn.out.link(nnOut.input)
78
79# Disparity range is used for normalization
80disparityMultiplier = 255 / depth.initialConfig.getMaxDisparity()
81
82# Connect to device and start pipeline
83with dai.Device(pipeline) as device:
84
85    queueSize = 8
86    qRight = device.getOutputQueue("right", queueSize)
87    qDisparity = device.getOutputQueue("disparity", queueSize)
88    qManip = device.getOutputQueue("manip", queueSize)
89    qDet = device.getOutputQueue("nn", queueSize)
90    qRgbEnc = device.getOutputQueue('h265', maxSize=30, blocking=True)
91
92    frame = None
93    frameManip = None
94    frameDisparity = None
95    detections = []
96    offsetX = (monoRight.getResolutionWidth() - monoRight.getResolutionHeight()) // 2
97    color = (255, 0, 0)
98    croppedFrame = np.zeros((monoRight.getResolutionHeight(), monoRight.getResolutionHeight()))
99
100    def frameNorm(frame, bbox):
101        normVals = np.full(len(bbox), frame.shape[0])
102        normVals[::2] = frame.shape[1]
103        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
104
105    videoFile = open('video.h265', 'wb')
106    cv2.namedWindow("right", cv2.WINDOW_NORMAL)
107    cv2.namedWindow("manip", cv2.WINDOW_NORMAL)
108
109    while True:
110        inRight = qRight.tryGet()
111        inManip = qManip.tryGet()
112        inDet = qDet.tryGet()
113        inDisparity = qDisparity.tryGet()
114
115        while qRgbEnc.has():
116            qRgbEnc.get().getData().tofile(videoFile)
117
118        if inRight is not None:
119            frame = inRight.getCvFrame()
120
121        if inManip is not None:
122            frameManip = inManip.getCvFrame()
123
124        if inDisparity is not None:
125            # Apply color map for better visualization
126            frameDisparity = inDisparity.getCvFrame()
127            frameDisparity = (frameDisparity*disparityMultiplier).astype(np.uint8)
128            frameDisparity = cv2.applyColorMap(frameDisparity, cv2.COLORMAP_JET)
129
130        if inDet is not None:
131            detections = inDet.detections
132
133        if frame is not None:
134            for detection in detections:
135                bbox = frameNorm(croppedFrame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
136                bbox[::2] += offsetX
137                cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
138                cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
139                cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
140            # Show the right cam frame
141            cv2.imshow("right", frame)
142
143        if frameDisparity is not None:
144            for detection in detections:
145                bbox = frameNorm(croppedFrame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
146                bbox[::2] += offsetX
147                cv2.rectangle(frameDisparity, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
148                cv2.putText(frameDisparity, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
149                cv2.putText(frameDisparity, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
150            # Show the disparity frame
151            cv2.imshow("disparity", frameDisparity)
152
153        if frameManip is not None:
154            for detection in detections:
155                bbox = frameNorm(frameManip, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
156                cv2.rectangle(frameManip, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
157                cv2.putText(frameManip, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
158                cv2.putText(frameManip, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
159            # Show the manip frame
160            cv2.imshow("manip", frameManip)
161
162        if cv2.waitKey(1) == ord('q'):
163            break
164
165    print("To view the encoded data, convert the stream file (.h265) into a video file (.mp4) using a command below:")
166    print("ffmpeg -framerate 30 -i video.h265 -c copy video.mp4")

C++

GitHub

1#include <cstdio>
2#include <iostream>
3
4// Includes common necessary includes for development using depthai library
5#include "depthai/depthai.hpp"
6
7// MobilenetSSD label texts
8static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle",     "bird",  "boat",        "bottle", "bus",
9                                                  "car",        "cat",       "chair",       "cow",   "diningtable", "dog",    "horse",
10                                                  "motorbike",  "person",    "pottedplant", "sheep", "sofa",        "train",  "tvmonitor"};
11
12int main(int argc, char** argv) {
13    using namespace std;
14    // Default blob path provided by Hunter private data download
15    // Applicable for easier example usage only
16    std::string nnPath(BLOB_PATH);
17
18    // If path to blob specified, use that
19    if(argc > 1) {
20        nnPath = std::string(argv[1]);
21    }
22
23    // Print which blob we are using
24    printf("Using blob at path: %s\n", nnPath.c_str());
25
26    // Create pipeline
27    dai::Pipeline pipeline;
28
29    // Define sources and outputs
30    auto camRgb = pipeline.create<dai::node::ColorCamera>();
31    auto videoEncoder = pipeline.create<dai::node::VideoEncoder>();
32    auto monoRight = pipeline.create<dai::node::MonoCamera>();
33    auto monoLeft = pipeline.create<dai::node::MonoCamera>();
34    auto depth = pipeline.create<dai::node::StereoDepth>();
35    auto manip = pipeline.create<dai::node::ImageManip>();
36    auto nn = pipeline.create<dai::node::MobileNetDetectionNetwork>();
37
38    auto videoOut = pipeline.create<dai::node::XLinkOut>();
39    auto xoutRight = pipeline.create<dai::node::XLinkOut>();
40    auto disparityOut = pipeline.create<dai::node::XLinkOut>();
41    auto manipOut = pipeline.create<dai::node::XLinkOut>();
42    auto nnOut = pipeline.create<dai::node::XLinkOut>();
43
44    videoOut->setStreamName("h265");
45    xoutRight->setStreamName("right");
46    disparityOut->setStreamName("disparity");
47    manipOut->setStreamName("manip");
48    nnOut->setStreamName("nn");
49
50    // Properties
51    camRgb->setBoardSocket(dai::CameraBoardSocket::CAM_A);
52    camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P);
53    monoRight->setCamera("right");
54    monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);
55    monoLeft->setCamera("left");
56    monoLeft->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);
57    videoEncoder->setDefaultProfilePreset(30, dai::VideoEncoderProperties::Profile::H265_MAIN);
58
59    depth->setDefaultProfilePreset(dai::node::StereoDepth::PresetMode::HIGH_DENSITY);
60    depth->setRectifyEdgeFillColor(0);  // Black, to better see the cutout
61
62    nn->setConfidenceThreshold(0.5);
63    nn->setBlobPath(nnPath);
64    nn->setNumInferenceThreads(2);
65    nn->input.setBlocking(false);
66
67    // The NN model expects BGR input-> By default ImageManip output type would be same as input (gray in this case)
68    manip->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p);
69    manip->initialConfig.setResize(300, 300);
70
71    // Linking
72    camRgb->video.link(videoEncoder->input);
73    videoEncoder->bitstream.link(videoOut->input);
74    monoRight->out.link(xoutRight->input);
75    monoRight->out.link(depth->right);
76    monoLeft->out.link(depth->left);
77    depth->disparity.link(disparityOut->input);
78    depth->rectifiedRight.link(manip->inputImage);
79    manip->out.link(nn->input);
80    manip->out.link(manipOut->input);
81    nn->out.link(nnOut->input);
82
83    // Disparity range is used for normalization
84    float disparityMultiplier = 255 / depth->initialConfig.getMaxDisparity();
85
86    // Connect to device and start pipeline
87    dai::Device device(pipeline);
88
89    // Queues
90    int queueSize = 8;
91    auto qRight = device.getOutputQueue("right", queueSize);
92    auto qDisparity = device.getOutputQueue("disparity", queueSize);
93    auto qManip = device.getOutputQueue("manip", queueSize);
94    auto qDet = device.getOutputQueue("nn", queueSize);
95    auto qRgbEnc = device.getOutputQueue("h265", 30, true);
96
97    cv::Mat frame;
98    cv::Mat frameManip;
99    cv::Mat frameDisparity;
100    std::vector<dai::ImgDetection> detections;
101    int offsetX = (monoRight->getResolutionWidth() - monoRight->getResolutionHeight()) / 2;
102    auto color = cv::Scalar(255, 0, 0);
103
104    auto videoFile = std::ofstream("video.h265", std::ios::binary);
105    cv::namedWindow("right", cv::WINDOW_NORMAL);
106    cv::namedWindow("manip", cv::WINDOW_NORMAL);
107
108    while(true) {
109        auto inRight = qRight->tryGet<dai::ImgFrame>();
110        auto inManip = qManip->tryGet<dai::ImgFrame>();
111        auto inDet = qDet->tryGet<dai::ImgDetections>();
112        auto inDisparity = qDisparity->tryGet<dai::ImgFrame>();
113
114        auto out1 = qRgbEnc->get<dai::ImgFrame>();
115        videoFile.write((char*)out1->getData().data(), out1->getData().size());
116
117        if(inRight) {
118            frame = inRight->getCvFrame();
119        }
120
121        if(inManip) {
122            frameManip = inManip->getCvFrame();
123        }
124
125        if(inDisparity) {
126            frameDisparity = inDisparity->getCvFrame();
127            frameDisparity.convertTo(frameDisparity, CV_8UC1, disparityMultiplier);
128            cv::applyColorMap(frameDisparity, frameDisparity, cv::COLORMAP_JET);
129        }
130
131        if(inDet) {
132            detections = inDet->detections;
133        }
134
135        if(!frame.empty()) {
136            for(auto& detection : detections) {
137                int x1 = detection.xmin * monoRight->getResolutionHeight() + offsetX;
138                int y1 = detection.ymin * monoRight->getResolutionHeight();
139                int x2 = detection.xmax * monoRight->getResolutionHeight() + offsetX;
140                int y2 = detection.ymax * monoRight->getResolutionHeight();
141
142                uint32_t labelIndex = detection.label;
143                std::string labelStr = to_string(labelIndex);
144                if(labelIndex < labelMap.size()) {
145                    labelStr = labelMap[labelIndex];
146                }
147                cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
148                std::stringstream confStr;
149                confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
150                cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
151                cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
152            }
153            // Show the right cam frame
154            cv::imshow("right", frame);
155        }
156
157        if(!frameDisparity.empty()) {
158            for(auto& detection : detections) {
159                int x1 = detection.xmin * monoRight->getResolutionHeight() + offsetX;
160                int y1 = detection.ymin * monoRight->getResolutionHeight();
161                int x2 = detection.xmax * monoRight->getResolutionHeight() + offsetX;
162                int y2 = detection.ymax * monoRight->getResolutionHeight();
163
164                uint32_t labelIndex = detection.label;
165                std::string labelStr = to_string(labelIndex);
166                if(labelIndex < labelMap.size()) {
167                    labelStr = labelMap[labelIndex];
168                }
169                cv::putText(frameDisparity, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
170                std::stringstream confStr;
171                confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
172                cv::putText(frameDisparity, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
173                cv::rectangle(frameDisparity, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
174            }
175            // Show the disparity frame
176            cv::imshow("disparity", frameDisparity);
177        }
178
179        if(!frameManip.empty()) {
180            for(auto& detection : detections) {
181                int x1 = detection.xmin * frameManip.cols;
182                int y1 = detection.ymin * frameManip.rows;
183                int x2 = detection.xmax * frameManip.cols;
184                int y2 = detection.ymax * frameManip.rows;
185
186                uint32_t labelIndex = detection.label;
187                std::string labelStr = to_string(labelIndex);
188                if(labelIndex < labelMap.size()) {
189                    labelStr = labelMap[labelIndex];
190                }
191                cv::putText(frameManip, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
192                std::stringstream confStr;
193                confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
194                cv::putText(frameManip, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
195                cv::rectangle(frameManip, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
196            }
197            // Show the manip frame
198            cv::imshow("manip", frameManip);
199        }
200
201        int key = cv::waitKey(1);
202        if(key == 'q' || key == 'Q') {
203            break;
204        }
205    }
206    cout << "To view the encoded data, convert the stream file (.h265) into a video file (.mp4), using a command below:" << endl;
207    cout << "ffmpeg -framerate 30 -i video.h265 -c copy video.mp4" << endl;
208    return 0;
209}

Pipeline

需要帮助？

请前往 Discussion Forum 获取技术支持或提出您可能有的任何其他问题。

本页目录

RGB编码 & Mono & MobilenetSSD & 深度View as Markdown

类似示例：

演示

设置

源代码

Python

C++

Pipeline

需要帮助？

RGB编码 & Mono & MobilenetSSD & 深度