Digital Zoom

Supported on:RVC2 RVC4
Crops and displays the largest detected person from the full‑resolution RGB stream, and updates AE/AF regions to match the crop. Uses a YOLO model from the Model Zoo for person detection and remaps detections to the full‑resolution source.
Demo

This example requires the DepthAI v3 API, see installation instructions.
Pipeline

React Flow
Source code

Python
GitHub
1#!/usr/bin/env python3
2
3import cv2
4import depthai as dai
5import time
6
7# This example shows how to crop out the biggest person from the maximum resolution of the camera and setting the autofocus and autoexposure region to the same ROI.
8
9
10def getBiggestPerson(imgDetections: dai.ImgDetections):
11    biggestDetection = None
12    biggestDetectionSize = 0
13    for detection in imgDetections.detections:
14        if detection.label == 0: # Person
15            size = (detection.xmax - detection.xmin) * (detection.ymax - detection.ymin)
16            if size > biggestDetectionSize:
17                biggestDetection = detection
18                biggestDetectionSize = size
19    return biggestDetection
20
21
22def displayFrame(name: str, frame: dai.ImgFrame, imgDetections: dai.ImgDetections, labelMap: dict):
23    color = (0, 255, 0)
24    assert imgDetections.getTransformation() is not None
25    cvFrame = frame.getFrame() if frame.getType() == dai.ImgFrame.Type.RAW16 else frame.getCvFrame()
26    for detection in imgDetections.detections:
27        # Get the shape of the frame from which the detections originated for denormalization
28        normShape = imgDetections.getTransformation().getSize()
29
30        # Create rotated rectangle to remap
31        rotRect = dai.RotatedRect(dai.Rect(dai.Point2f(detection.xmin, detection.ymin), dai.Point2f(detection.xmax, detection.ymax)).denormalize(normShape[0], normShape[1]), 0)
32        # Remap the detection rectangle to target frame
33        remapped = imgDetections.getTransformation().remapRectTo(frame.getTransformation(), rotRect)
34        # Remapped rectangle could be rotated, so we get the bounding box
35        bbox = [int(l) for l in remapped.getOuterRect()]
36        cv2.putText(
37            cvFrame,
38            labelMap[detection.label],
39            (bbox[0] + 10, bbox[1] + 20),
40            cv2.FONT_HERSHEY_TRIPLEX,
41            0.5,
42            255,
43        )
44        cv2.putText(
45            cvFrame,
46            f"{int(detection.confidence * 100)}%",
47            (bbox[0] + 10, bbox[1] + 40),
48            cv2.FONT_HERSHEY_TRIPLEX,
49            0.5,
50            255,
51        )
52        cv2.rectangle(cvFrame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
53    # Show the frame
54    cv2.imshow(name, cvFrame)
55
56
57def transformDetectionToSource(imgDetections: dai.ImgDetections, detection: dai.ImgDetection):
58    normShape = imgDetections.getTransformation().getSize()
59    sourceSize = imgDetections.getTransformation().getSourceSize()
60    width, height = sourceSize
61    rotRect = dai.RotatedRect(dai.Rect(dai.Point2f(detection.xmin, detection.ymin), dai.Point2f(detection.xmax, detection.ymax)).denormalize(normShape[0], normShape[1]), 0)
62    rotRect = imgDetections.getTransformation().invTransformRect(rotRect)
63    outerRect = rotRect.getOuterRect()
64
65    firstPoint = dai.Point2f(max(0, min(outerRect[0], width)), max(0, min(outerRect[1], height)))
66    secondPoint = dai.Point2f(max(0, min(outerRect[2], width)), max(0, min(outerRect[3], height)))
67    return dai.Rect(firstPoint, secondPoint)
68
69
70device = dai.Device()
71modelPath = dai.getModelFromZoo(dai.NNModelDescription("yolov6-nano", platform=device.getPlatformAsString()))
72modelArchive = dai.NNArchive(modelPath)
73inputSize = modelArchive.getInputSize()
74type = modelArchive.getConfig().model.inputs[0].preprocessing.daiType
75
76if type:
77    try:
78        frameType = getattr(dai.ImgFrame.Type, type)
79    except AttributeError:
80        type = None
81
82if not type:
83    if device.getPlatform() == dai.Platform.RVC2:
84        frameType = dai.ImgFrame.Type.BGR888p
85    else:
86        frameType = dai.ImgFrame.Type.BGR888i
87
88# Create pipeline
89with dai.Pipeline(device) as pipeline:
90    # Define source and output
91    cam = pipeline.create(dai.node.Camera).build()
92    cameraControlQueue = cam.inputControl.createInputQueue()
93    fullResStream = cam.requestFullResolutionOutput(useHighestResolution=True)
94
95    imageManip = pipeline.create(dai.node.ImageManip)
96    imageManip.initialConfig.setOutputSize(inputSize[0], inputSize[1])
97    imageManip.initialConfig.setFrameType(frameType)
98
99    fullResStream.link(imageManip.inputImage)
100
101    detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(imageManip.out, modelArchive)
102    labelMap = detectionNetwork.getClasses()
103
104    imageManipCropOut = pipeline.create(dai.node.ImageManip)
105    imageManipCropOut.setMaxOutputFrameSize(round(1000*1000*1.5)+300)
106    imageManipCropOut.initialConfig.setOutputSize(800, 800)
107    imageManipCropOut.inputImage.setBlocking(False)
108    imageManipCropOut.inputImage.setMaxSize(1)
109
110    imageManipConfigQueue = imageManipCropOut.inputConfig.createInputQueue()
111    imageManipCropOutQueue = imageManipCropOut.out.createOutputQueue()
112    fullResStream.link(imageManipCropOut.inputImage)
113
114    videoQueue = detectionNetwork.passthrough.createOutputQueue()
115    detectionQueue = detectionNetwork.out.createOutputQueue()
116
117    # Connect to device and start pipeline
118    pipeline.start()
119    lastTimeToAutoFocus = time.time()
120    while pipeline.isRunning():
121        videoIn = videoQueue.get()
122        detections = detectionQueue.get()
123        biggestDetection = getBiggestPerson(detections)
124        if biggestDetection:
125            sourceRect = transformDetectionToSource(detections, biggestDetection)
126            configQueue = dai.ImageManipConfig()
127            configQueue.addCrop(sourceRect, False)
128            configQueue.setOutputSize(800, 800, dai.ImageManipConfig.ResizeMode.LETTERBOX)
129            imageManipConfigQueue.send(configQueue)
130
131            configCamera = dai.CameraControl()
132            configCamera.setAutoExposureRegion(int(sourceRect.x), int(sourceRect.y), int(sourceRect.width), int(sourceRect.height))
133            if(time.time() - lastTimeToAutoFocus > 5):
134                lastTimeToAutoFocus = time.time()
135                configCamera.setAutoFocusRegion(int(sourceRect.x), int(sourceRect.y), int(sourceRect.width), int(sourceRect.height))
136            cameraControlQueue.send(configCamera)
137        imageManipCropOutFrame = imageManipCropOutQueue.tryGet()
138        if imageManipCropOutFrame is not None:
139            assert isinstance(imageManipCropOutFrame, dai.ImgFrame)
140            cv2.imshow("Cropped out frame", imageManipCropOutFrame.getCvFrame())
141        assert isinstance(videoIn, dai.ImgFrame)
142        assert isinstance(detections, dai.ImgDetections)
143        displayFrame("Full view video", videoIn, detections, labelMap)
144        key = cv2.waitKey(1)
145        if key == ord("q"):
146            break
Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.