DepthAI Tutorials
DepthAI API References

ON THIS PAGE

  • Video & MobilenetSSD
  • Similar samples:
  • Demo
  • Setup
  • Source code

Video & MobilenetSSD

This example shows how to MobileNetv2SSD on the RGB input frame, which is read from the specified file, and not from the RGB camera, and how to display both the RGB frame and the metadata results from the MobileNetv2SSD on the frame. DepthAI is used here only as a processing unit

Similar samples:

Demo

Setup

Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the script
Command Line
1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py
For additional information, please follow the installation guide.

Source code

Python
C++
Python
GitHub
1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8from time import monotonic
9
10# Get argument first
11parentDir = Path(__file__).parent
12nnPath = str((parentDir / Path('../models/mobilenet-ssd_openvino_2021.4_8shave.blob')).resolve().absolute())
13videoPath = str((parentDir / Path('../models/construction_vest.mp4')).resolve().absolute())
14if len(sys.argv) > 2:
15    nnPath = sys.argv[1]
16    videoPath = sys.argv[2]
17
18if not Path(nnPath).exists() or not Path(videoPath).exists():
19    import sys
20    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
21
22# MobilenetSSD label texts
23labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
24            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
25
26# Create pipeline
27pipeline = dai.Pipeline()
28
29# Define sources and outputs
30nn = pipeline.create(dai.node.MobileNetDetectionNetwork)
31
32xinFrame = pipeline.create(dai.node.XLinkIn)
33nnOut = pipeline.create(dai.node.XLinkOut)
34
35xinFrame.setStreamName("inFrame")
36nnOut.setStreamName("nn")
37
38# Properties
39nn.setConfidenceThreshold(0.5)
40nn.setBlobPath(nnPath)
41nn.setNumInferenceThreads(2)
42nn.input.setBlocking(False)
43
44# Linking
45xinFrame.out.link(nn.input)
46nn.out.link(nnOut.input)
47
48# Connect to device and start pipeline
49with dai.Device(pipeline) as device:
50
51    # Input queue will be used to send video frames to the device.
52    qIn = device.getInputQueue(name="inFrame")
53    # Output queue will be used to get nn data from the video frames.
54    qDet = device.getOutputQueue(name="nn", maxSize=4, blocking=False)
55
56    frame = None
57    detections = []
58
59    # nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
60    def frameNorm(frame, bbox):
61        normVals = np.full(len(bbox), frame.shape[0])
62        normVals[::2] = frame.shape[1]
63        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
64
65    def to_planar(arr: np.ndarray, shape: tuple) -> np.ndarray:
66        return cv2.resize(arr, shape).transpose(2, 0, 1).flatten()
67
68    def displayFrame(name, frame):
69        for detection in detections:
70            bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
71            cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
72            cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
73            cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 0, 0), 2)
74        # Show the frame
75        cv2.imshow(name, frame)
76
77    cap = cv2.VideoCapture(videoPath)
78    while cap.isOpened():
79        read_correctly, frame = cap.read()
80        if not read_correctly:
81            break
82
83        img = dai.ImgFrame()
84        img.setData(to_planar(frame, (300, 300)))
85        img.setTimestamp(monotonic())
86        img.setWidth(300)
87        img.setHeight(300)
88        qIn.send(img)
89
90        inDet = qDet.tryGet()
91
92        if inDet is not None:
93            detections = inDet.detections
94
95        if frame is not None:
96            displayFrame("rgb", frame)
97
98        if cv2.waitKey(1) == ord('q'):
99            break

Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.