RGB Encoding & Mono with MobilenetSSD & Depth

This example shows how to configure the depthai video encoder in h.265 format to encode the RGB camera input at Full-HD resolution at 30FPS, and transfers the encoded video over XLINK to the host, saving it to disk as a video file. At the same time, a MobileNetv2SSD network is ran on the frames from right grayscale camera, while the application also displays the depth map produced by both of the grayscale cameras. Note that disparity is used in this case, as it colorizes in a more intuitive way.Pressing Ctrl+C will stop the recording and then convert it using ffmpeg into an mp4 to make it playable. Note that ffmpeg will need to be installed and runnable for the conversion to mp4 to succeed.Be careful, this example saves encoded video to your host storage. So if you leave it running, you could fill up your storage on your host.It's a combination of RGB Encoding and Mono & MobilenetSSD & Depth.

Similar samples:

Demo

Setup

Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the script

Command Line

1git clone https://github.com/luxonis/depthai-python.git
2cd depthai-python/examples
3python3 install_requirements.py

For additional information, please follow the installation guide.

Source code

Python

C++

Python

GitHub

1#!/usr/bin/env python3
2
3from pathlib import Path
4import sys
5import cv2
6import depthai as dai
7import numpy as np
8
9# Get argument first
10nnPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
11if len(sys.argv) > 1:
12    nnPath = sys.argv[1]
13
14if not Path(nnPath).exists():
15    import sys
16    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
17
18# MobilenetSSD label texts
19labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
20            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
21
22# Create pipeline
23pipeline = dai.Pipeline()
24
25# Define sources and outputs
26camRgb = pipeline.create(dai.node.ColorCamera)
27videoEncoder = pipeline.create(dai.node.VideoEncoder)
28monoRight = pipeline.create(dai.node.MonoCamera)
29monoLeft = pipeline.create(dai.node.MonoCamera)
30depth = pipeline.create(dai.node.StereoDepth)
31manip = pipeline.create(dai.node.ImageManip)
32nn = pipeline.create(dai.node.MobileNetDetectionNetwork)
33
34videoOut = pipeline.create(dai.node.XLinkOut)
35xoutRight = pipeline.create(dai.node.XLinkOut)
36disparityOut = pipeline.create(dai.node.XLinkOut)
37manipOut = pipeline.create(dai.node.XLinkOut)
38nnOut = pipeline.create(dai.node.XLinkOut)
39
40videoOut.setStreamName('h265')
41xoutRight.setStreamName('right')
42disparityOut.setStreamName('disparity')
43manipOut.setStreamName('manip')
44nnOut.setStreamName('nn')
45
46# Properties
47camRgb.setBoardSocket(dai.CameraBoardSocket.CAM_A)
48camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
49monoRight.setCamera("right")
50monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
51monoLeft.setCamera("left")
52monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
53videoEncoder.setDefaultProfilePreset(30, dai.VideoEncoderProperties.Profile.H265_MAIN)
54
55depth.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
56depth.setRectifyEdgeFillColor(0) # Black, to better see the cutout
57
58nn.setConfidenceThreshold(0.5)
59nn.setBlobPath(nnPath)
60nn.setNumInferenceThreads(2)
61nn.input.setBlocking(False)
62
63# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
64manip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p)
65manip.initialConfig.setResize(300, 300)
66
67# Linking
68camRgb.video.link(videoEncoder.input)
69videoEncoder.bitstream.link(videoOut.input)
70monoRight.out.link(xoutRight.input)
71monoRight.out.link(depth.right)
72monoLeft.out.link(depth.left)
73depth.disparity.link(disparityOut.input)
74depth.rectifiedRight.link(manip.inputImage)
75manip.out.link(nn.input)
76manip.out.link(manipOut.input)
77nn.out.link(nnOut.input)
78
79# Disparity range is used for normalization
80disparityMultiplier = 255 / depth.initialConfig.getMaxDisparity()
81
82# Connect to device and start pipeline
83with dai.Device(pipeline) as device:
84
85    queueSize = 8
86    qRight = device.getOutputQueue("right", queueSize)
87    qDisparity = device.getOutputQueue("disparity", queueSize)
88    qManip = device.getOutputQueue("manip", queueSize)
89    qDet = device.getOutputQueue("nn", queueSize)
90    qRgbEnc = device.getOutputQueue('h265', maxSize=30, blocking=True)
91
92    frame = None
93    frameManip = None
94    frameDisparity = None
95    detections = []
96    offsetX = (monoRight.getResolutionWidth() - monoRight.getResolutionHeight()) // 2
97    color = (255, 0, 0)
98    croppedFrame = np.zeros((monoRight.getResolutionHeight(), monoRight.getResolutionHeight()))
99
100    def frameNorm(frame, bbox):
101        normVals = np.full(len(bbox), frame.shape[0])
102        normVals[::2] = frame.shape[1]
103        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
104
105    videoFile = open('video.h265', 'wb')
106    cv2.namedWindow("right", cv2.WINDOW_NORMAL)
107    cv2.namedWindow("manip", cv2.WINDOW_NORMAL)
108
109    while True:
110        inRight = qRight.tryGet()
111        inManip = qManip.tryGet()
112        inDet = qDet.tryGet()
113        inDisparity = qDisparity.tryGet()
114
115        while qRgbEnc.has():
116            qRgbEnc.get().getData().tofile(videoFile)
117
118        if inRight is not None:
119            frame = inRight.getCvFrame()
120
121        if inManip is not None:
122            frameManip = inManip.getCvFrame()
123
124        if inDisparity is not None:
125            # Apply color map for better visualization
126            frameDisparity = inDisparity.getCvFrame()
127            frameDisparity = (frameDisparity*disparityMultiplier).astype(np.uint8)
128            frameDisparity = cv2.applyColorMap(frameDisparity, cv2.COLORMAP_JET)
129
130        if inDet is not None:
131            detections = inDet.detections
132
133        if frame is not None:
134            for detection in detections:
135                bbox = frameNorm(croppedFrame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
136                bbox[::2] += offsetX
137                cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
138                cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
139                cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
140            # Show the right cam frame
141            cv2.imshow("right", frame)
142
143        if frameDisparity is not None:
144            for detection in detections:
145                bbox = frameNorm(croppedFrame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
146                bbox[::2] += offsetX
147                cv2.rectangle(frameDisparity, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
148                cv2.putText(frameDisparity, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
149                cv2.putText(frameDisparity, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
150            # Show the disparity frame
151            cv2.imshow("disparity", frameDisparity)
152
153        if frameManip is not None:
154            for detection in detections:
155                bbox = frameNorm(frameManip, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
156                cv2.rectangle(frameManip, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
157                cv2.putText(frameManip, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
158                cv2.putText(frameManip, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
159            # Show the manip frame
160            cv2.imshow("manip", frameManip)
161
162        if cv2.waitKey(1) == ord('q'):
163            break
164
165    print("To view the encoded data, convert the stream file (.h265) into a video file (.mp4) using a command below:")
166    print("ffmpeg -framerate 30 -i video.h265 -c copy video.mp4")

Pipeline

React Flow

Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.

ON THIS PAGE

RGB Encoding & Mono with MobilenetSSD & Depth

Similar samples:

Demo

Setup

Source code

Python

Pipeline

Need assistance?