RGB 编码 & Mono & MobilenetSSD

此示例演示了如何配置 depthai 视频编码器以 h.265 格式，在 Full-HD 分辨率和 30FPS 下编码 RGB 摄像头输入，并通过 XLINK 将编码后的视频传输到主机，将其保存为视频文件。同时，在右侧灰度摄像头的帧上运行 MobileNetv2SSD 网络。按 Ctrl+C 将停止录制，然后使用 ffmpeg 将其转换为 mp4 以便播放。请注意，需要安装并运行 ffmpeg 才能成功转换为 mp4。请注意，此示例会将编码后的视频保存到您的主机存储中。因此，如果您让它一直运行，可能会填满您主机的存储空间。它是 RGB 编码和 Mono & MobilenetSSD 的组合。

类似示例：

演示

设置

请运行安装脚本以下载所有必需的依赖项。请注意，此脚本必须在 git 上下文中运行，因此您必须先下载 depthai-python 存储库，然后运行脚本

Command Line

1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py

有关更多信息，请遵循安装指南。

源代码

Python

GitHub

1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8
9# Get argument first
10nnPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
11if len(sys.argv) > 1:
12    nnPath = sys.argv[1]
13
14if not Path(nnPath).exists():
15    import sys
16    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
17
18# MobilenetSSD label texts
19labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
20            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
21
22# Create pipeline
23pipeline = dai.Pipeline()
24
25# Define sources and outputs
26camRgb = pipeline.create(dai.node.ColorCamera)
27monoRight = pipeline.create(dai.node.MonoCamera)
28videoEncoder = pipeline.create(dai.node.VideoEncoder)
29nn = pipeline.create(dai.node.MobileNetDetectionNetwork)
30manip = pipeline.create(dai.node.ImageManip)
31
32videoOut = pipeline.create(dai.node.XLinkOut)
33xoutRight = pipeline.create(dai.node.XLinkOut)
34manipOut = pipeline.create(dai.node.XLinkOut)
35nnOut = pipeline.create(dai.node.XLinkOut)
36
37videoOut.setStreamName('h265')
38xoutRight.setStreamName("right")
39manipOut.setStreamName("manip")
40nnOut.setStreamName("nn")
41
42# Properties
43camRgb.setBoardSocket(dai.CameraBoardSocket.CAM_A)
44camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
45monoRight.setCamera("right")
46monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_720_P)
47videoEncoder.setDefaultProfilePreset(30, dai.VideoEncoderProperties.Profile.H265_MAIN)
48
49nn.setConfidenceThreshold(0.5)
50nn.setBlobPath(nnPath)
51nn.setNumInferenceThreads(2)
52nn.input.setBlocking(False)
53
54# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
55manip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p)
56manip.initialConfig.setResize(300, 300)
57
58# Linking
59camRgb.video.link(videoEncoder.input)
60videoEncoder.bitstream.link(videoOut.input)
61monoRight.out.link(manip.inputImage)
62manip.out.link(nn.input)
63monoRight.out.link(xoutRight.input)
64manip.out.link(manipOut.input)
65nn.out.link(nnOut.input)
66
67# Connect to device and start pipeline
68with dai.Device(pipeline) as device:
69
70    # Queues
71    queue_size = 8
72    qRight = device.getOutputQueue("right", queue_size)
73    qManip = device.getOutputQueue("manip", queue_size)
74    qDet = device.getOutputQueue("nn", queue_size)
75    qRgbEnc = device.getOutputQueue('h265', maxSize=30, blocking=True)
76
77    frame = None
78    frameManip = None
79    detections = []
80    offsetX = (monoRight.getResolutionWidth() - monoRight.getResolutionHeight()) // 2
81    color = (255, 0, 0)
82    croppedFrame = np.zeros((monoRight.getResolutionHeight(), monoRight.getResolutionHeight()))
83
84    def frameNorm(frame, bbox):
85        normVals = np.full(len(bbox), frame.shape[0])
86        normVals[::2] = frame.shape[1]
87        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
88
89    videoFile = open('video.h265', 'wb')
90    cv2.namedWindow("right", cv2.WINDOW_NORMAL)
91    cv2.namedWindow("manip", cv2.WINDOW_NORMAL)
92
93    while True:
94        inRight = qRight.tryGet()
95        inManip = qManip.tryGet()
96        inDet = qDet.tryGet()
97
98        while qRgbEnc.has():
99            qRgbEnc.get().getData().tofile(videoFile)
100
101        if inRight is not None:
102            frame = inRight.getCvFrame()
103
104        if inManip is not None:
105            frameManip = inManip.getCvFrame()
106
107        if inDet is not None:
108            detections = inDet.detections
109
110        if frame is not None:
111            for detection in detections:
112                bbox = frameNorm(croppedFrame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
113                bbox[::2] += offsetX
114                cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
115                cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
116                cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
117            # Show the frame
118            cv2.imshow("right", frame)
119
120        if frameManip is not None:
121            for detection in detections:
122                bbox = frameNorm(frameManip, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
123                cv2.putText(frameManip, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
124                cv2.putText(frameManip, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
125                cv2.rectangle(frameManip, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
126            # Show the frame
127            cv2.imshow("manip", frameManip)
128
129        if cv2.waitKey(1) == ord('q'):
130            break
131
132    print("To view the encoded data, convert the stream file (.h265) into a video file (.mp4) using a command below:")
133    print("ffmpeg -framerate 30 -i video.h265 -c copy video.mp4")

C++

GitHub

1#include <iostream>
2
3// Includes common necessary includes for development using depthai library
4#include "depthai/depthai.hpp"
5
6// MobilenetSSD label texts
7static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle",     "bird",  "boat",        "bottle", "bus",
8                                                  "car",        "cat",       "chair",       "cow",   "diningtable", "dog",    "horse",
9                                                  "motorbike",  "person",    "pottedplant", "sheep", "sofa",        "train",  "tvmonitor"};
10
11int main(int argc, char** argv) {
12    using namespace std;
13    // Default blob path provided by Hunter private data download
14    // Applicable for easier example usage only
15    std::string nnPath(BLOB_PATH);
16
17    // If path to blob specified, use that
18    if(argc > 1) {
19        nnPath = std::string(argv[1]);
20    }
21
22    // Print which blob we are using
23    printf("Using blob at path: %s\n", nnPath.c_str());
24
25    // Create pipeline
26    dai::Pipeline pipeline;
27
28    // Define sources and outputs
29    auto camRgb = pipeline.create<dai::node::ColorCamera>();
30    auto monoRight = pipeline.create<dai::node::MonoCamera>();
31    auto videoEncoder = pipeline.create<dai::node::VideoEncoder>();
32    auto nn = pipeline.create<dai::node::MobileNetDetectionNetwork>();
33    auto manip = pipeline.create<dai::node::ImageManip>();
34
35    auto videoOut = pipeline.create<dai::node::XLinkOut>();
36    auto xoutRight = pipeline.create<dai::node::XLinkOut>();
37    auto manipOut = pipeline.create<dai::node::XLinkOut>();
38    auto nnOut = pipeline.create<dai::node::XLinkOut>();
39
40    videoOut->setStreamName("h265");
41    xoutRight->setStreamName("right");
42    manipOut->setStreamName("manip");
43    nnOut->setStreamName("nn");
44
45    // Properties
46    camRgb->setBoardSocket(dai::CameraBoardSocket::CAM_A);
47    camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P);
48    monoRight->setCamera("right");
49    monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_720_P);
50    videoEncoder->setDefaultProfilePreset(30, dai::VideoEncoderProperties::Profile::H265_MAIN);
51
52    nn->setConfidenceThreshold(0.5);
53    nn->setBlobPath(nnPath);
54    nn->setNumInferenceThreads(2);
55    nn->input.setBlocking(false);
56
57    // The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
58    manip->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p);
59    manip->initialConfig.setResize(300, 300);
60
61    // Linking
62    camRgb->video.link(videoEncoder->input);
63    videoEncoder->bitstream.link(videoOut->input);
64    monoRight->out.link(manip->inputImage);
65    manip->out.link(nn->input);
66    monoRight->out.link(xoutRight->input);
67    manip->out.link(manipOut->input);
68    nn->out.link(nnOut->input);
69
70    // Connect to device and start pipeline
71    dai::Device device(pipeline);
72
73    // Queues
74    int queueSize = 8;
75    auto qRight = device.getOutputQueue("right", queueSize);
76    auto qManip = device.getOutputQueue("manip", queueSize);
77    auto qDet = device.getOutputQueue("nn", queueSize);
78    auto qRgbEnc = device.getOutputQueue("h265", 30, true);
79
80    cv::Mat frame;
81    cv::Mat frameManip;
82    std::vector<dai::ImgDetection> detections;
83    int offsetX = (monoRight->getResolutionWidth() - monoRight->getResolutionHeight()) / 2;
84    auto color = cv::Scalar(255, 0, 0);
85
86    auto videoFile = std::ofstream("video.h265", std::ios::binary);
87    cv::namedWindow("right", cv::WINDOW_NORMAL);
88    cv::namedWindow("manip", cv::WINDOW_NORMAL);
89
90    while(true) {
91        auto inRight = qRight->tryGet<dai::ImgFrame>();
92        auto inManip = qManip->tryGet<dai::ImgFrame>();
93        auto inDet = qDet->tryGet<dai::ImgDetections>();
94
95        auto out1 = qRgbEnc->get<dai::ImgFrame>();
96        videoFile.write((char*)out1->getData().data(), out1->getData().size());
97
98        if(inRight) {
99            frame = inRight->getCvFrame();
100        }
101
102        if(inManip) {
103            frameManip = inManip->getCvFrame();
104        }
105
106        if(inDet) {
107            detections = inDet->detections;
108        }
109
110        if(!frame.empty()) {
111            for(auto& detection : detections) {
112                int x1 = detection.xmin * monoRight->getResolutionHeight() + offsetX;
113                int y1 = detection.ymin * monoRight->getResolutionHeight();
114                int x2 = detection.xmax * monoRight->getResolutionHeight() + offsetX;
115                int y2 = detection.ymax * monoRight->getResolutionHeight();
116
117                uint32_t labelIndex = detection.label;
118                std::string labelStr = to_string(labelIndex);
119                if(labelIndex < labelMap.size()) {
120                    labelStr = labelMap[labelIndex];
121                }
122                cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
123                std::stringstream confStr;
124                confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
125                cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
126                cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
127            }
128            // Show the frame
129            cv::imshow("right", frame);
130        }
131
132        if(!frameManip.empty()) {
133            for(auto& detection : detections) {
134                int x1 = detection.xmin * frameManip.cols;
135                int y1 = detection.ymin * frameManip.rows;
136                int x2 = detection.xmax * frameManip.cols;
137                int y2 = detection.ymax * frameManip.rows;
138
139                uint32_t labelIndex = detection.label;
140                std::string labelStr = to_string(labelIndex);
141                if(labelIndex < labelMap.size()) {
142                    labelStr = labelMap[labelIndex];
143                }
144                cv::putText(frameManip, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
145                std::stringstream confStr;
146                confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
147                cv::putText(frameManip, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
148                cv::rectangle(frameManip, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
149            }
150            // Show the frame
151            cv::imshow("manip", frameManip);
152        }
153
154        int key = cv::waitKey(1);
155        if(key == 'q' || key == 'Q') {
156            break;
157        }
158    }
159    cout << "To view the encoded data, convert the stream file (.h265) into a video file (.mp4), using a command below:" << endl;
160    cout << "ffmpeg -framerate 30 -i video.h265 -c copy video.mp4" << endl;
161    return 0;
162}

Pipeline

需要帮助？

请前往 Discussion Forum 获取技术支持或提出您可能有的任何其他问题。

本页目录

RGB 编码 & Mono & MobilenetSSDView as Markdown

类似示例：

演示

设置

源代码

Python

C++

Pipeline

需要帮助？

RGB 编码 & Mono & MobilenetSSD