Digital Zoom
Crops and displays the largest detected person from the full‑resolution RGB stream, and updates AE/AF regions to match the crop. Uses a YOLO model from the Model Zoo for person detection and remaps detections to the full‑resolution source.Demo

Pipeline
Source code
Python
Python
PythonGitHub
1#!/usr/bin/env python3
2
3import cv2
4import depthai as dai
5import time
6
7# This example shows how to crop out the biggest person from the maximum resolution of the camera and setting the autofocus and autoexposure region to the same ROI.
8
9
10def getBiggestPerson(imgDetections: dai.ImgDetections):
11 biggestDetection = None
12 biggestDetectionSize = 0
13 for detection in imgDetections.detections:
14 if detection.label == 0: # Person
15 size = (detection.xmax - detection.xmin) * (detection.ymax - detection.ymin)
16 if size > biggestDetectionSize:
17 biggestDetection = detection
18 biggestDetectionSize = size
19 return biggestDetection
20
21
22def displayFrame(name: str, frame: dai.ImgFrame, imgDetections: dai.ImgDetections, labelMap: dict):
23 color = (0, 255, 0)
24 assert imgDetections.getTransformation() is not None
25 cvFrame = frame.getFrame() if frame.getType() == dai.ImgFrame.Type.RAW16 else frame.getCvFrame()
26 for detection in imgDetections.detections:
27 # Get the shape of the frame from which the detections originated for denormalization
28 normShape = imgDetections.getTransformation().getSize()
29
30 # Create rotated rectangle to remap
31 rotRect = dai.RotatedRect(dai.Rect(dai.Point2f(detection.xmin, detection.ymin), dai.Point2f(detection.xmax, detection.ymax)).denormalize(normShape[0], normShape[1]), 0)
32 # Remap the detection rectangle to target frame
33 remapped = imgDetections.getTransformation().remapRectTo(frame.getTransformation(), rotRect)
34 # Remapped rectangle could be rotated, so we get the bounding box
35 bbox = [int(l) for l in remapped.getOuterRect()]
36 cv2.putText(
37 cvFrame,
38 labelMap[detection.label],
39 (bbox[0] + 10, bbox[1] + 20),
40 cv2.FONT_HERSHEY_TRIPLEX,
41 0.5,
42 255,
43 )
44 cv2.putText(
45 cvFrame,
46 f"{int(detection.confidence * 100)}%",
47 (bbox[0] + 10, bbox[1] + 40),
48 cv2.FONT_HERSHEY_TRIPLEX,
49 0.5,
50 255,
51 )
52 cv2.rectangle(cvFrame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
53 # Show the frame
54 cv2.imshow(name, cvFrame)
55
56
57def transformDetectionToSource(imgDetections: dai.ImgDetections, detection: dai.ImgDetection):
58 normShape = imgDetections.getTransformation().getSize()
59 sourceSize = imgDetections.getTransformation().getSourceSize()
60 width, height = sourceSize
61 rotRect = dai.RotatedRect(dai.Rect(dai.Point2f(detection.xmin, detection.ymin), dai.Point2f(detection.xmax, detection.ymax)).denormalize(normShape[0], normShape[1]), 0)
62 rotRect = imgDetections.getTransformation().invTransformRect(rotRect)
63 outerRect = rotRect.getOuterRect()
64
65 firstPoint = dai.Point2f(max(0, min(outerRect[0], width)), max(0, min(outerRect[1], height)))
66 secondPoint = dai.Point2f(max(0, min(outerRect[2], width)), max(0, min(outerRect[3], height)))
67 return dai.Rect(firstPoint, secondPoint)
68
69
70device = dai.Device()
71modelPath = dai.getModelFromZoo(dai.NNModelDescription("yolov6-nano", platform=device.getPlatformAsString()))
72modelArchive = dai.NNArchive(modelPath)
73inputSize = modelArchive.getInputSize()
74type = modelArchive.getConfig().model.inputs[0].preprocessing.daiType
75
76if type:
77 try:
78 frameType = dai.ImgFrame.Type.__getattribute__(type)
79 except AttributeError:
80 type = None
81
82if not type:
83 if device.getPlatform() == dai.Platform.RVC2:
84 frameType = dai.ImgFrame.Type.BGR888p
85 else:
86 frameType = dai.ImgFrame.Type.BGR888i
87
88# Create pipeline
89with dai.Pipeline(device) as pipeline:
90 # Define source and output
91 cam = pipeline.create(dai.node.Camera).build()
92 cameraControlQueue = cam.inputControl.createInputQueue()
93 fullResStream = cam.requestFullResolutionOutput(useHighestResolution=True)
94
95 imageManip = pipeline.create(dai.node.ImageManip)
96 imageManip.initialConfig.setOutputSize(inputSize[0], inputSize[1])
97 imageManip.initialConfig.setFrameType(frameType)
98
99 fullResStream.link(imageManip.inputImage)
100
101 detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(imageManip.out, modelArchive)
102 labelMap = detectionNetwork.getClasses()
103
104 imageManipCropOut = pipeline.create(dai.node.ImageManip)
105 imageManipCropOut.setMaxOutputFrameSize(round(1000*1000*1.5)+300)
106 imageManipCropOut.initialConfig.setOutputSize(800, 800)
107 imageManipCropOut.inputImage.setBlocking(False)
108 imageManipCropOut.inputImage.setMaxSize(1)
109
110 imageManipConfigQueue = imageManipCropOut.inputConfig.createInputQueue()
111 imageManipCropOutQueue = imageManipCropOut.out.createOutputQueue()
112 fullResStream.link(imageManipCropOut.inputImage)
113
114 videoQueue = detectionNetwork.passthrough.createOutputQueue()
115 detectionQueue = detectionNetwork.out.createOutputQueue()
116
117 # Connect to device and start pipeline
118 pipeline.start()
119 lastTimeToAutoFocus = time.time()
120 while pipeline.isRunning():
121 videoIn = videoQueue.get()
122 detections = detectionQueue.get()
123 biggestDetection = getBiggestPerson(detections)
124 if biggestDetection:
125 sourceRect = transformDetectionToSource(detections, biggestDetection)
126 configQueue = dai.ImageManipConfig()
127 configQueue.addCrop(sourceRect, False)
128 configQueue.setOutputSize(800, 800, dai.ImageManipConfig.ResizeMode.LETTERBOX)
129 imageManipConfigQueue.send(configQueue)
130
131 configCamera = dai.CameraControl()
132 configCamera.setAutoExposureRegion(int(sourceRect.x), int(sourceRect.y), int(sourceRect.width), int(sourceRect.height))
133 if(time.time() - lastTimeToAutoFocus > 5):
134 lastTimeToAutoFocus = time.time()
135 configCamera.setAutoFocusRegion(int(sourceRect.x), int(sourceRect.y), int(sourceRect.width), int(sourceRect.height))
136 cameraControlQueue.send(configCamera)
137 imageManipCropOutFrame = imageManipCropOutQueue.tryGet()
138 if imageManipCropOutFrame is not None:
139 assert isinstance(imageManipCropOutFrame, dai.ImgFrame)
140 cv2.imshow("Cropped out frame", imageManipCropOutFrame.getCvFrame())
141 assert isinstance(videoIn, dai.ImgFrame)
142 assert isinstance(detections, dai.ImgDetections)
143 displayFrame("Full view video", videoIn, detections, labelMap)
144 key = cv2.waitKey(1)
145 if key == ord("q"):
146 break
Need assistance?
Head over to Discussion Forum for technical support or any other questions you might have.