Digital Zoom
Crops and displays the largest detected person from the full‑resolution RGB stream, and updates AE/AF regions to match the crop. Uses a YOLO model from the Model Zoo for person detection and remaps detections to the full‑resolution source.Demo
This example requires the DepthAI v3 API, see installation instructions.Pipeline
Source code
Python
Python
PythonGitHub
1#!/usr/bin/env python3
2
3import cv2
4import depthai as dai
5import time
6
7# This example shows how to crop out the biggest person from the maximum resolution of the camera and setting the autofocus and autoexposure region to the same ROI.
8
9
10def getBiggestPerson(imgDetections: dai.ImgDetections):
11 biggestDetection = None
12 biggestDetectionSize = 0
13 for detection in imgDetections.detections:
14 if detection.label == 0: # Person
15 size = (detection.xmax - detection.xmin) * (detection.ymax - detection.ymin)
16 if size > biggestDetectionSize:
17 biggestDetection = detection
18 biggestDetectionSize = size
19 return biggestDetection
20
21
22def displayFrame(name: str, frame: dai.ImgFrame, imgDetections: dai.ImgDetections, labelMap: dict):
23 color = (0, 255, 0)
24 assert imgDetections.getTransformation() is not None
25 cvFrame = frame.getFrame() if frame.getType() == dai.ImgFrame.Type.RAW16 else frame.getCvFrame()
26 for detection in imgDetections.detections:
27 # Get the shape of the frame from which the detections originated for denormalization
28 normShape = imgDetections.getTransformation().getSize()
29
30 # Create rotated rectangle to remap
31 rotRect = dai.RotatedRect(dai.Rect(dai.Point2f(detection.xmin, detection.ymin), dai.Point2f(detection.xmax, detection.ymax)).denormalize(normShape[0], normShape[1]), 0)
32 # Remap the detection rectangle to target frame
33 remapped = imgDetections.getTransformation().remapRectTo(frame.getTransformation(), rotRect)
34 # Remapped rectangle could be rotated, so we get the bounding box
35 bbox = [int(l) for l in remapped.getOuterRect()]
36 cv2.putText(
37 cvFrame,
38 labelMap[detection.label],
39 (bbox[0] + 10, bbox[1] + 20),
40 cv2.FONT_HERSHEY_TRIPLEX,
41 0.5,
42 255,
43 )
44 cv2.putText(
45 cvFrame,
46 f"{int(detection.confidence * 100)}%",
47 (bbox[0] + 10, bbox[1] + 40),
48 cv2.FONT_HERSHEY_TRIPLEX,
49 0.5,
50 255,
51 )
52 cv2.rectangle(cvFrame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
53 # Show the frame
54 cv2.imshow(name, cvFrame)
55
56
57def transformDetectionToSource(imgDetections: dai.ImgDetections, detection: dai.ImgDetection):
58 normShape = imgDetections.getTransformation().getSize()
59 sourceSize = imgDetections.getTransformation().getSourceSize()
60 width, height = sourceSize
61 rotRect = dai.RotatedRect(dai.Rect(dai.Point2f(detection.xmin, detection.ymin), dai.Point2f(detection.xmax, detection.ymax)).denormalize(normShape[0], normShape[1]), 0)
62 rotRect = imgDetections.getTransformation().invTransformRect(rotRect)
63 outerRect = rotRect.getOuterRect()
64
65 firstPoint = dai.Point2f(max(0, min(outerRect[0], width)), max(0, min(outerRect[1], height)))
66 secondPoint = dai.Point2f(max(0, min(outerRect[2], width)), max(0, min(outerRect[3], height)))
67 return dai.Rect(firstPoint, secondPoint)
68
69
70device = dai.Device()
71modelPath = dai.getModelFromZoo(dai.NNModelDescription("yolov6-nano", platform=device.getPlatformAsString()))
72modelArchive = dai.NNArchive(modelPath)
73inputSize = modelArchive.getInputSize()
74type = modelArchive.getConfig().model.inputs[0].preprocessing.daiType
75
76if type:
77 try:
78 frameType = getattr(dai.ImgFrame.Type, type)
79 except AttributeError:
80 type = None
81
82if not type:
83 if device.getPlatform() == dai.Platform.RVC2:
84 frameType = dai.ImgFrame.Type.BGR888p
85 else:
86 frameType = dai.ImgFrame.Type.BGR888i
87
88# Create pipeline
89with dai.Pipeline(device) as pipeline:
90 # Define source and output
91 cam = pipeline.create(dai.node.Camera).build()
92 cameraControlQueue = cam.inputControl.createInputQueue()
93 fullResStream = cam.requestFullResolutionOutput(useHighestResolution=True)
94
95 imageManip = pipeline.create(dai.node.ImageManip)
96 imageManip.initialConfig.setOutputSize(inputSize[0], inputSize[1])
97 imageManip.initialConfig.setFrameType(frameType)
98
99 fullResStream.link(imageManip.inputImage)
100
101 detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(imageManip.out, modelArchive)
102 labelMap = detectionNetwork.getClasses()
103
104 imageManipCropOut = pipeline.create(dai.node.ImageManip)
105 imageManipCropOut.setMaxOutputFrameSize(round(1000*1000*1.5)+300)
106 imageManipCropOut.initialConfig.setOutputSize(800, 800)
107 imageManipCropOut.inputImage.setBlocking(False)
108 imageManipCropOut.inputImage.setMaxSize(1)
109
110 imageManipConfigQueue = imageManipCropOut.inputConfig.createInputQueue()
111 imageManipCropOutQueue = imageManipCropOut.out.createOutputQueue()
112 fullResStream.link(imageManipCropOut.inputImage)
113
114 videoQueue = detectionNetwork.passthrough.createOutputQueue()
115 detectionQueue = detectionNetwork.out.createOutputQueue()
116
117 # Connect to device and start pipeline
118 pipeline.start()
119 lastTimeToAutoFocus = time.time()
120 while pipeline.isRunning():
121 videoIn = videoQueue.get()
122 detections = detectionQueue.get()
123 biggestDetection = getBiggestPerson(detections)
124 if biggestDetection:
125 sourceRect = transformDetectionToSource(detections, biggestDetection)
126 configQueue = dai.ImageManipConfig()
127 configQueue.addCrop(sourceRect, False)
128 configQueue.setOutputSize(800, 800, dai.ImageManipConfig.ResizeMode.LETTERBOX)
129 imageManipConfigQueue.send(configQueue)
130
131 configCamera = dai.CameraControl()
132 configCamera.setAutoExposureRegion(int(sourceRect.x), int(sourceRect.y), int(sourceRect.width), int(sourceRect.height))
133 if(time.time() - lastTimeToAutoFocus > 5):
134 lastTimeToAutoFocus = time.time()
135 configCamera.setAutoFocusRegion(int(sourceRect.x), int(sourceRect.y), int(sourceRect.width), int(sourceRect.height))
136 cameraControlQueue.send(configCamera)
137 imageManipCropOutFrame = imageManipCropOutQueue.tryGet()
138 if imageManipCropOutFrame is not None:
139 assert isinstance(imageManipCropOutFrame, dai.ImgFrame)
140 cv2.imshow("Cropped out frame", imageManipCropOutFrame.getCvFrame())
141 assert isinstance(videoIn, dai.ImgFrame)
142 assert isinstance(detections, dai.ImgDetections)
143 displayFrame("Full view video", videoIn, detections, labelMap)
144 key = cv2.waitKey(1)
145 if key == ord("q"):
146 breakNeed assistance?
Head over to Discussion Forum for technical support or any other questions you might have.