# Digital Zoom

Crops and displays the largest detected person from the full‑resolution RGB stream, and updates AE/AF regions to match the crop.
Uses a YOLO model from the Model Zoo for person detection and remaps detections to the full‑resolution source.

## Demo

This example requires the DepthAI v3 API, see [installation instructions](https://docs.luxonis.com/software-v3/depthai.md).

## Pipeline

### examples/digital_zoom.pipeline.json

```json
{
  "pipeline": {
    "connections": [
      {
        "node1Id": 0,
        "node1Output": "0",
        "node1OutputGroup": "dynamicOutputs",
        "node2Id": 6,
        "node2Input": "inputImage",
        "node2InputGroup": ""
      },
      {
        "node1Id": 0,
        "node1Output": "0",
        "node1OutputGroup": "dynamicOutputs",
        "node2Id": 2,
        "node2Input": "inputImage",
        "node2InputGroup": ""
      },
      {
        "node1Id": 2,
        "node1Output": "out",
        "node1OutputGroup": "",
        "node2Id": 4,
        "node2Input": "in",
        "node2InputGroup": ""
      },
      {
        "node1Id": 4,
        "node1Output": "out",
        "node1OutputGroup": "",
        "node2Id": 5,
        "node2Input": "in",
        "node2InputGroup": ""
      },
      {
        "node1Id": 4,
        "node1Output": "passthrough",
        "node1OutputGroup": "",
        "node2Id": 12,
        "node2Input": "in",
        "node2InputGroup": ""
      },
      {
        "node1Id": 5,
        "node1Output": "out",
        "node1OutputGroup": "",
        "node2Id": 14,
        "node2Input": "in",
        "node2InputGroup": ""
      },
      {
        "node1Id": 6,
        "node1Output": "out",
        "node1OutputGroup": "",
        "node2Id": 16,
        "node2Input": "in",
        "node2InputGroup": ""
      },
      {
        "node1Id": 9,
        "node1Output": "out",
        "node1OutputGroup": "",
        "node2Id": 0,
        "node2Input": "inputControl",
        "node2InputGroup": ""
      },
      {
        "node1Id": 11,
        "node1Output": "out",
        "node1OutputGroup": "",
        "node2Id": 6,
        "node2Input": "inputConfig",
        "node2InputGroup": ""
      }
    ],
    "globalProperties": {
      "calibData": null,
      "cameraTuningBlobSize": null,
      "cameraTuningBlobUri": "",
      "eepromId": 0,
      "leonCssFrequencyHz": 700000000.0,
      "leonMssFrequencyHz": 700000000.0,
      "pipelineName": null,
      "pipelineVersion": null,
      "sippBufferSize": 18432,
      "sippDmaBufferSize": 16384,
      "xlinkChunkSize": -1
    },
    "nodes": [
      [
        0,
        {
          "alias": "",
          "id": 0,
          "ioInfo": [
            [
              [
                "",
                "inputControl"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 0,
                "name": "inputControl",
                "queueSize": 3,
                "type": 3,
                "waitForMessage": false
              }
            ],
            [
              [
                "dynamicOutputs",
                "0"
              ],
              {
                "blocking": false,
                "group": "dynamicOutputs",
                "id": 3,
                "name": "0",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "mockIsp"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 1,
                "name": "mockIsp",
                "queueSize": 8,
                "type": 3,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "raw"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 2,
                "name": "raw",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "Camera",
          "parentId": -1,
          "properties": {
            "boardSocket": 0,
            "cameraName": "",
            "fps": -1.0,
            "imageOrientation": -1,
            "initialControl": {
              "aeLockMode": false,
              "aeMaxExposureTimeUs": 0,
              "aeRegion": {
                "height": 0,
                "priority": 0,
                "width": 0,
                "x": 0,
                "y": 0
              },
              "afRegion": {
                "height": 0,
                "priority": 0,
                "width": 0,
                "x": 0,
                "y": 0
              },
              "antiBandingMode": 0,
              "autoFocusMode": 3,
              "awbLockMode": false,
              "awbMode": 0,
              "brightness": 0,
              "captureIntent": 0,
              "chromaDenoise": 0,
              "cmdMask": 0,
              "contrast": 0,
              "controlMode": 0,
              "effectMode": 0,
              "enableHdr": false,
              "expCompensation": 0,
              "expManual": {
                "exposureTimeUs": 0,
                "frameDurationUs": 0,
                "sensitivityIso": 0
              },
              "frameSyncMode": 0,
              "lensPosAutoInfinity": 0,
              "lensPosAutoMacro": 0,
              "lensPosition": 0,
              "lensPositionRaw": 0.0,
              "lowPowerNumFramesBurst": 0,
              "lowPowerNumFramesDiscard": 0,
              "lumaDenoise": 0,
              "miscControls": [],
              "saturation": 0,
              "sceneMode": 0,
              "sharpness": 0,
              "strobeConfig": {
                "activeLevel": 0,
                "enable": 0,
                "gpioNumber": 0
              },
              "strobeTimings": {
                "durationUs": 0,
                "exposureBeginOffsetUs": 0,
                "exposureEndOffsetUs": 0
              },
              "wbColorTemp": 0
            },
            "isp3aFps": 0,
            "mockIspHeight": -1,
            "mockIspWidth": -1,
            "numFramesPoolIsp": 3,
            "numFramesPoolPreview": 4,
            "numFramesPoolRaw": 3,
            "numFramesPoolStill": 4,
            "numFramesPoolVideo": 4,
            "outputRequests": [
              {
                "enableUndistortion": null,
                "fps": {
                  "value": null
                },
                "resizeMode": 0,
                "size": {
                  "value": {
                    "index": 0,
                    "value": [
                      4056,
                      3040
                    ]
                  }
                },
                "type": null
              }
            ],
            "resolutionHeight": -1,
            "resolutionWidth": -1,
            "sensorType": -1
          }
        }
      ],
      [
        2,
        {
          "alias": "",
          "id": 2,
          "ioInfo": [
            [
              [
                "",
                "inputConfig"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 4,
                "name": "inputConfig",
                "queueSize": 3,
                "type": 3,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "inputImage"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 5,
                "name": "inputImage",
                "queueSize": 3,
                "type": 3,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "out"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 6,
                "name": "out",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "ImageManip",
          "parentId": -1,
          "properties": {
            "backend": 0,
            "initialConfig": {
              "base": {
                "background": 0,
                "backgroundB": 0,
                "backgroundG": 0,
                "backgroundR": 0,
                "center": true,
                "colormap": 0,
                "operations": [],
                "outputHeight": 384,
                "outputWidth": 512,
                "resizeMode": 1,
                "undistort": false
              },
              "outputFrameType": 8,
              "reusePreviousImage": false,
              "skipCurrentImage": false
            },
            "numFramesPool": 4,
            "outputFrameSize": 1048576,
            "performanceMode": 0
          }
        }
      ],
      [
        12,
        {
          "alias": "",
          "id": 12,
          "ioInfo": [
            [
              [
                "",
                "in"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 17,
                "name": "in",
                "queueSize": 3,
                "type": 3,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "XLinkOut",
          "parentId": -1,
          "properties": {
            "maxFpsLimit": -1.0,
            "metadataOnly": false,
            "streamName": "__x_4_passthrough"
          }
        }
      ],
      [
        4,
        {
          "alias": "neuralNetwork",
          "id": 4,
          "ioInfo": [
            [
              [
                "",
                "in"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 7,
                "name": "in",
                "queueSize": 3,
                "type": 3,
                "waitForMessage": true
              }
            ],
            [
              [
                "",
                "out"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 8,
                "name": "out",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "passthrough"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 9,
                "name": "passthrough",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "NeuralNetwork",
          "parentId": 3,
          "properties": {
            "backend": "",
            "backendProperties": {},
            "blobSize": 8685208,
            "blobUri": "asset:__blob",
            "modelSource": 0,
            "modelUri": "",
            "numFrames": 8,
            "numNCEPerThread": 0,
            "numShavesPerThread": 0,
            "numThreads": 0
          }
        }
      ],
      [
        5,
        {
          "alias": "detectionParser",
          "id": 5,
          "ioInfo": [
            [
              [
                "",
                "in"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 10,
                "name": "in",
                "queueSize": 1,
                "type": 3,
                "waitForMessage": true
              }
            ],
            [
              [
                "",
                "out"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 11,
                "name": "out",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "DetectionParser",
          "parentId": 3,
          "properties": {
            "networkInputs": {
              "images": {
                "dataType": 1,
                "dims": [
                  512,
                  384,
                  3,
                  1
                ],
                "name": "images",
                "numDimensions": 4,
                "offset": 0,
                "order": 17185,
                "qpScale": 1.0,
                "qpZp": 0.0,
                "quantization": false,
                "strides": []
              }
            },
            "numFramesPool": 8,
            "parser": {
              "anchorMasks": {},
              "anchors": [],
              "anchorsV2": [],
              "classNames": [
                "person",
                "bicycle",
                "car",
                "motorcycle",
                "airplane",
                "bus",
                "train",
                "truck",
                "boat",
                "traffic light",
                "fire hydrant",
                "stop sign",
                "parking meter",
                "bench",
                "bird",
                "cat",
                "dog",
                "horse",
                "sheep",
                "cow",
                "elephant",
                "bear",
                "zebra",
                "giraffe",
                "backpack",
                "umbrella",
                "handbag",
                "tie",
                "suitcase",
                "frisbee",
                "skis",
                "snowboard",
                "sports ball",
                "kite",
                "baseball bat",
                "baseball glove",
                "skateboard",
                "surfboard",
                "tennis racket",
                "bottle",
                "wine glass",
                "cup",
                "fork",
                "knife",
                "spoon",
                "bowl",
                "banana",
                "apple",
                "sandwich",
                "orange",
                "broccoli",
                "carrot",
                "hot dog",
                "pizza",
                "donut",
                "cake",
                "chair",
                "couch",
                "potted plant",
                "bed",
                "dining table",
                "toilet",
                "tv",
                "laptop",
                "mouse",
                "remote",
                "keyboard",
                "cell phone",
                "microwave",
                "oven",
                "toaster",
                "sink",
                "refrigerator",
                "book",
                "clock",
                "vase",
                "scissors",
                "teddy bear",
                "hair drier",
                "toothbrush"
              ],
              "classes": 80,
              "confidenceThreshold": 0.5,
              "coordinates": 4,
              "iouThreshold": 0.5,
              "nnFamily": 0,
              "subtype": "yolov6r2"
            }
          }
        }
      ],
      [
        6,
        {
          "alias": "",
          "id": 6,
          "ioInfo": [
            [
              [
                "",
                "inputConfig"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 12,
                "name": "inputConfig",
                "queueSize": 3,
                "type": 3,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "inputImage"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 13,
                "name": "inputImage",
                "queueSize": 1,
                "type": 3,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "out"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 14,
                "name": "out",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "ImageManip",
          "parentId": -1,
          "properties": {
            "backend": 0,
            "initialConfig": {
              "base": {
                "background": 0,
                "backgroundB": 0,
                "backgroundG": 0,
                "backgroundR": 0,
                "center": true,
                "colormap": 0,
                "operations": [],
                "outputHeight": 800,
                "outputWidth": 800,
                "resizeMode": 1,
                "undistort": false
              },
              "outputFrameType": 33,
              "reusePreviousImage": false,
              "skipCurrentImage": false
            },
            "numFramesPool": 4,
            "outputFrameSize": 1500300,
            "performanceMode": 0
          }
        }
      ],
      [
        9,
        {
          "alias": "",
          "id": 9,
          "ioInfo": [
            [
              [
                "",
                "out"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 15,
                "name": "out",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "XLinkIn",
          "parentId": -1,
          "properties": {
            "maxDataSize": 18482688,
            "numFrames": 8,
            "streamName": "__x_0__inputControl"
          }
        }
      ],
      [
        11,
        {
          "alias": "",
          "id": 11,
          "ioInfo": [
            [
              [
                "",
                "out"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 16,
                "name": "out",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "XLinkIn",
          "parentId": -1,
          "properties": {
            "maxDataSize": 18482688,
            "numFrames": 8,
            "streamName": "__x_6__inputConfig"
          }
        }
      ],
      [
        14,
        {
          "alias": "",
          "id": 14,
          "ioInfo": [
            [
              [
                "",
                "in"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 18,
                "name": "in",
                "queueSize": 3,
                "type": 3,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "XLinkOut",
          "parentId": -1,
          "properties": {
            "maxFpsLimit": -1.0,
            "metadataOnly": false,
            "streamName": "__x_5_out"
          }
        }
      ],
      [
        16,
        {
          "alias": "",
          "id": 16,
          "ioInfo": [
            [
              [
                "",
                "in"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 19,
                "name": "in",
                "queueSize": 3,
                "type": 3,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "XLinkOut",
          "parentId": -1,
          "properties": {
            "maxFpsLimit": -1.0,
            "metadataOnly": false,
            "streamName": "__x_6_out"
          }
        }
      ]
    ]
  }
}
```

## Source code

#### Python

```python
#!/usr/bin/env python3

import cv2
import depthai as dai
import time

# This example shows how to crop out the biggest person from the maximum resolution of the camera and setting the autofocus and autoexposure region to the same ROI.

def getBiggestPerson(imgDetections: dai.ImgDetections):
    biggestDetection = None
    biggestDetectionSize = 0
    for detection in imgDetections.detections:
        if detection.label == 0: # Person
            size = (detection.xmax - detection.xmin) * (detection.ymax - detection.ymin)
            if size > biggestDetectionSize:
                biggestDetection = detection
                biggestDetectionSize = size
    return biggestDetection

def displayFrame(name: str, frame: dai.ImgFrame, imgDetections: dai.ImgDetections, labelMap: dict):
    color = (0, 255, 0)
    assert imgDetections.getTransformation() is not None
    cvFrame = frame.getFrame() if frame.getType() == dai.ImgFrame.Type.RAW16 else frame.getCvFrame()
    for detection in imgDetections.detections:
        # Get the shape of the frame from which the detections originated for denormalization
        normShape = imgDetections.getTransformation().getSize()

        # Create rotated rectangle to remap
        rotRect = dai.RotatedRect(dai.Rect(dai.Point2f(detection.xmin, detection.ymin), dai.Point2f(detection.xmax, detection.ymax)).denormalize(normShape[0], normShape[1]), 0)
        # Remap the detection rectangle to target frame
        remapped = imgDetections.getTransformation().remapRectTo(frame.getTransformation(), rotRect)
        # Remapped rectangle could be rotated, so we get the bounding box
        bbox = [int(l) for l in remapped.getOuterRect()]
        cv2.putText(
            cvFrame,
            labelMap[detection.label],
            (bbox[0] + 10, bbox[1] + 20),
            cv2.FONT_HERSHEY_TRIPLEX,
            0.5,
            255,
        )
        cv2.putText(
            cvFrame,
            f"{int(detection.confidence * 100)}%",
            (bbox[0] + 10, bbox[1] + 40),
            cv2.FONT_HERSHEY_TRIPLEX,
            0.5,
            255,
        )
        cv2.rectangle(cvFrame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
    # Show the frame
    cv2.imshow(name, cvFrame)

def transformDetectionToSource(imgDetections: dai.ImgDetections, detection: dai.ImgDetection):
    normShape = imgDetections.getTransformation().getSize()
    sourceSize = imgDetections.getTransformation().getSourceSize()
    width, height = sourceSize
    rotRect = dai.RotatedRect(dai.Rect(dai.Point2f(detection.xmin, detection.ymin), dai.Point2f(detection.xmax, detection.ymax)).denormalize(normShape[0], normShape[1]), 0)
    rotRect = imgDetections.getTransformation().invTransformRect(rotRect)
    outerRect = rotRect.getOuterRect()

    firstPoint = dai.Point2f(max(0, min(outerRect[0], width)), max(0, min(outerRect[1], height)))
    secondPoint = dai.Point2f(max(0, min(outerRect[2], width)), max(0, min(outerRect[3], height)))
    return dai.Rect(firstPoint, secondPoint)

device = dai.Device()
modelPath = dai.getModelFromZoo(dai.NNModelDescription("yolov6-nano", platform=device.getPlatformAsString()))
modelArchive = dai.NNArchive(modelPath)
inputSize = modelArchive.getInputSize()
type = modelArchive.getConfig().model.inputs[0].preprocessing.daiType

if type:
    try:
        frameType = getattr(dai.ImgFrame.Type, type)
    except AttributeError:
        type = None

if not type:
    if device.getPlatform() == dai.Platform.RVC2:
        frameType = dai.ImgFrame.Type.BGR888p
    else:
        frameType = dai.ImgFrame.Type.BGR888i

# Create pipeline
with dai.Pipeline(device) as pipeline:
    # Define source and output
    cam = pipeline.create(dai.node.Camera).build()
    cameraControlQueue = cam.inputControl.createInputQueue()
    fullResStream = cam.requestFullResolutionOutput(useHighestResolution=True)

    imageManip = pipeline.create(dai.node.ImageManip)
    imageManip.initialConfig.setOutputSize(inputSize[0], inputSize[1])
    imageManip.initialConfig.setFrameType(frameType)

    fullResStream.link(imageManip.inputImage)

    detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(imageManip.out, modelArchive)
    labelMap = detectionNetwork.getClasses()

    imageManipCropOut = pipeline.create(dai.node.ImageManip)
    imageManipCropOut.setMaxOutputFrameSize(round(1000*1000*1.5)+300)
    imageManipCropOut.initialConfig.setOutputSize(800, 800)
    imageManipCropOut.inputImage.setBlocking(False)
    imageManipCropOut.inputImage.setMaxSize(1)

    imageManipConfigQueue = imageManipCropOut.inputConfig.createInputQueue()
    imageManipCropOutQueue = imageManipCropOut.out.createOutputQueue()
    fullResStream.link(imageManipCropOut.inputImage)

    videoQueue = detectionNetwork.passthrough.createOutputQueue()
    detectionQueue = detectionNetwork.out.createOutputQueue()

    # Connect to device and start pipeline
    pipeline.start()
    lastTimeToAutoFocus = time.time()
    while pipeline.isRunning():
        videoIn = videoQueue.get()
        detections = detectionQueue.get()
        biggestDetection = getBiggestPerson(detections)
        if biggestDetection:
            sourceRect = transformDetectionToSource(detections, biggestDetection)
            configQueue = dai.ImageManipConfig()
            configQueue.addCrop(sourceRect, False)
            configQueue.setOutputSize(800, 800, dai.ImageManipConfig.ResizeMode.LETTERBOX)
            imageManipConfigQueue.send(configQueue)

            configCamera = dai.CameraControl()
            configCamera.setAutoExposureRegion(int(sourceRect.x), int(sourceRect.y), int(sourceRect.width), int(sourceRect.height))
            if(time.time() - lastTimeToAutoFocus > 5):
                lastTimeToAutoFocus = time.time()
                configCamera.setAutoFocusRegion(int(sourceRect.x), int(sourceRect.y), int(sourceRect.width), int(sourceRect.height))
            cameraControlQueue.send(configCamera)
        imageManipCropOutFrame = imageManipCropOutQueue.tryGet()
        if imageManipCropOutFrame is not None:
            assert isinstance(imageManipCropOutFrame, dai.ImgFrame)
            cv2.imshow("Cropped out frame", imageManipCropOutFrame.getCvFrame())
        assert isinstance(videoIn, dai.ImgFrame)
        assert isinstance(detections, dai.ImgDetections)
        displayFrame("Full view video", videoIn, detections, labelMap)
        key = cv2.waitKey(1)
        if key == ord("q"):
            break
```

### Need assistance?

Head over to [Discussion Forum](https://discuss.luxonis.com/) for technical support or any other questions you might have.
