视频 & MobilenetSSD

此示例演示了如何在从指定文件读取的 RGB 输入帧上运行 MobileNetv2SSD（而不是从 RGB 摄像头读取），以及如何在帧上显示 RGB 帧和来自 MobileNetv2SSD 的元数据结果。此处仅将 DepthAI 用作处理单元。

类似示例：

演示

设置

请运行安装脚本以下载所有必需的依赖项。请注意，此脚本必须在 git 上下文中运行，因此您必须先下载 depthai-python 存储库，然后运行脚本

Command Line

1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py

有关更多信息，请遵循安装指南。

源代码

Python

GitHub

1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8from time import monotonic
9
10# Get argument first
11parentDir = Path(__file__).parent
12nnPath = str((parentDir / Path('../models/mobilenet-ssd_openvino_2021.4_8shave.blob')).resolve().absolute())
13videoPath = str((parentDir / Path('../models/construction_vest.mp4')).resolve().absolute())
14if len(sys.argv) > 2:
15    nnPath = sys.argv[1]
16    videoPath = sys.argv[2]
17
18if not Path(nnPath).exists() or not Path(videoPath).exists():
19    import sys
20    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
21
22# MobilenetSSD label texts
23labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
24            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
25
26# Create pipeline
27pipeline = dai.Pipeline()
28
29# Define sources and outputs
30nn = pipeline.create(dai.node.MobileNetDetectionNetwork)
31
32xinFrame = pipeline.create(dai.node.XLinkIn)
33nnOut = pipeline.create(dai.node.XLinkOut)
34
35xinFrame.setStreamName("inFrame")
36nnOut.setStreamName("nn")
37
38# Properties
39nn.setConfidenceThreshold(0.5)
40nn.setBlobPath(nnPath)
41nn.setNumInferenceThreads(2)
42nn.input.setBlocking(False)
43
44# Linking
45xinFrame.out.link(nn.input)
46nn.out.link(nnOut.input)
47
48# Connect to device and start pipeline
49with dai.Device(pipeline) as device:
50
51    # Input queue will be used to send video frames to the device.
52    qIn = device.getInputQueue(name="inFrame")
53    # Output queue will be used to get nn data from the video frames.
54    qDet = device.getOutputQueue(name="nn", maxSize=4, blocking=False)
55
56    frame = None
57    detections = []
58
59    # nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
60    def frameNorm(frame, bbox):
61        normVals = np.full(len(bbox), frame.shape[0])
62        normVals[::2] = frame.shape[1]
63        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
64
65    def to_planar(arr: np.ndarray, shape: tuple) -> np.ndarray:
66        return cv2.resize(arr, shape).transpose(2, 0, 1).flatten()
67
68    def displayFrame(name, frame):
69        for detection in detections:
70            bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
71            cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
72            cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
73            cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 0, 0), 2)
74        # Show the frame
75        cv2.imshow(name, frame)
76
77    cap = cv2.VideoCapture(videoPath)
78    while cap.isOpened():
79        read_correctly, frame = cap.read()
80        if not read_correctly:
81            break
82
83        img = dai.ImgFrame()
84        img.setData(to_planar(frame, (300, 300)))
85        img.setTimestamp(monotonic())
86        img.setWidth(300)
87        img.setHeight(300)
88        qIn.send(img)
89
90        inDet = qDet.tryGet()
91
92        if inDet is not None:
93            detections = inDet.detections
94
95        if frame is not None:
96            displayFrame("rgb", frame)
97
98        if cv2.waitKey(1) == ord('q'):
99            break

C++

GitHub

1#include <chrono>
2#include <iostream>
3
4#include "utility.hpp"
5
6// Includes common necessary includes for development using depthai library
7#include "depthai/depthai.hpp"
8
9// MobilenetSSD label texts
10static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle",     "bird",  "boat",        "bottle", "bus",
11                                                  "car",        "cat",       "chair",       "cow",   "diningtable", "dog",    "horse",
12                                                  "motorbike",  "person",    "pottedplant", "sheep", "sofa",        "train",  "tvmonitor"};
13
14int main(int argc, char** argv) {
15    using namespace std;
16    using namespace std::chrono;
17    // Default blob path provided by Hunter private data download
18    // Applicable for easier example usage only
19    std::string nnPath(BLOB_PATH);
20    std::string videoPath(VIDEO_PATH);
21
22    // If path to blob specified, use that
23    if(argc > 2) {
24        nnPath = std::string(argv[1]);
25        videoPath = std::string(argv[2]);
26    }
27
28    // Print which blob we are using
29    printf("Using blob at path: %s\n", nnPath.c_str());
30    printf("Using video at path: %s\n", videoPath.c_str());
31
32    // Create pipeline
33    dai::Pipeline pipeline;
34
35    // Define source and outputs
36    auto nn = pipeline.create<dai::node::MobileNetDetectionNetwork>();
37
38    auto xinFrame = pipeline.create<dai::node::XLinkIn>();
39    auto nnOut = pipeline.create<dai::node::XLinkOut>();
40
41    xinFrame->setStreamName("inFrame");
42    nnOut->setStreamName("nn");
43
44    // Properties
45    nn->setConfidenceThreshold(0.5);
46    nn->setBlobPath(nnPath);
47    nn->setNumInferenceThreads(2);
48    nn->input.setBlocking(false);
49
50    // Linking
51    xinFrame->out.link(nn->input);
52    nn->out.link(nnOut->input);
53
54    // Connect to device and start pipeline
55    dai::Device device(pipeline);
56
57    // Input queue will be used to send video frames to the device.
58    auto qIn = device.getInputQueue("inFrame");
59    // Output queue will be used to get nn data from the video frames.
60    auto qDet = device.getOutputQueue("nn", 4, false);
61
62    // Add bounding boxes and text to the frame and show it to the user
63    auto displayFrame = [](std::string name, cv::Mat frame, std::vector<dai::ImgDetection>& detections) {
64        auto color = cv::Scalar(255, 0, 0);
65        // nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
66        for(auto& detection : detections) {
67            int x1 = detection.xmin * frame.cols;
68            int y1 = detection.ymin * frame.rows;
69            int x2 = detection.xmax * frame.cols;
70            int y2 = detection.ymax * frame.rows;
71
72            uint32_t labelIndex = detection.label;
73            std::string labelStr = to_string(labelIndex);
74            if(labelIndex < labelMap.size()) {
75                labelStr = labelMap[labelIndex];
76            }
77            cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
78            std::stringstream confStr;
79            confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
80            cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
81            cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
82        }
83        // Show the frame
84        cv::imshow(name, frame);
85    };
86
87    cv::Mat frame;
88    cv::VideoCapture cap(videoPath);
89
90    cv::namedWindow("inFrame", cv::WINDOW_NORMAL);
91    cv::resizeWindow("inFrame", 1280, 720);
92    std::cout << "Resize video window with mouse drag!" << std::endl;
93
94    while(cap.isOpened()) {
95        // Read frame from video
96        cap >> frame;
97        if(frame.empty()) break;
98
99        auto img = std::make_shared<dai::ImgFrame>();
100        frame = resizeKeepAspectRatio(frame, cv::Size(300, 300), cv::Scalar(0));
101        toPlanar(frame, img->getData());
102        img->setTimestamp(steady_clock::now());
103        img->setWidth(300);
104        img->setHeight(300);
105        qIn->send(img);
106
107        auto inDet = qDet->get<dai::ImgDetections>();
108        auto detections = inDet->detections;
109
110        displayFrame("inFrame", frame, detections);
111
112        int key = cv::waitKey(1);
113        if(key == 'q' || key == 'Q') return 0;
114    }
115    return 0;
116}

Pipeline

需要帮助？

请前往 Discussion Forum 获取技术支持或提出您可能有的任何其他问题。

本页目录

视频 & MobilenetSSDView as Markdown

类似示例：

演示

设置

源代码

Python

C++

Pipeline

需要帮助？

视频 & MobilenetSSD