# Detection network

This example shows how to run YOLOv6 on-camera on the RGB stream. It downloads the pre-trained model weights from [Model
Zoo](https://docs.luxonis.com/software-v3/ai-inference/model-source/zoo.md), specifically the [YOLOv6
Nano](https://models.luxonis.com/luxonis/yolov6-nano/face58c4-45ab-42a0-bafc-19f9fee8a034) model along with its configuration
files.

As it's using the [DetectionNetwork](https://docs.luxonis.com/software-v3/depthai/depthai-components/nodes/detection_network.md)
node, it does NN output parsing (decoding) on the camera itself, and supports decoding both YOLO and SSD NN output formats.

This examples displays bounding boxes around detected objects on the video stream. Note that for video stream we use passthrough
output, so video stream resolution is the same as the NN input resolution (512x288). We could also display NN results in high
resolution, for more information please see [Resolution Techniques for
NNs](https://docs.luxonis.com/software-v3/depthai/tutorials/resolution-techniques.md).

## Demo

This example requires the DepthAI v3 API, see [installation instructions](https://docs.luxonis.com/software-v3/depthai.md).

## Pipeline

### examples/detection_network.pipeline.json

```json
{
  "pipeline": {
    "connections": [
      {
        "node1Id": 2,
        "node1Output": "passthrough",
        "node1OutputGroup": "",
        "node2Id": 3,
        "node2Input": "imageIn",
        "node2InputGroup": ""
      },
      {
        "node1Id": 2,
        "node1Output": "out",
        "node1OutputGroup": "",
        "node2Id": 3,
        "node2Input": "in",
        "node2InputGroup": ""
      },
      {
        "node1Id": 2,
        "node1Output": "passthrough",
        "node1OutputGroup": "",
        "node2Id": 4,
        "node2Input": "in",
        "node2InputGroup": ""
      },
      {
        "node1Id": 3,
        "node1Output": "out",
        "node1OutputGroup": "",
        "node2Id": 6,
        "node2Input": "in",
        "node2InputGroup": ""
      },
      {
        "node1Id": 0,
        "node1Output": "0",
        "node1OutputGroup": "dynamicOutputs",
        "node2Id": 2,
        "node2Input": "in",
        "node2InputGroup": ""
      }
    ],
    "globalProperties": {
      "calibData": null,
      "cameraTuningBlobSize": null,
      "cameraTuningBlobUri": "",
      "leonCssFrequencyHz": 700000000.0,
      "leonMssFrequencyHz": 700000000.0,
      "pipelineName": null,
      "pipelineVersion": null,
      "sippBufferSize": 18432,
      "sippDmaBufferSize": 16384,
      "xlinkChunkSize": -1
    },
    "nodes": [
      [
        6,
        {
          "alias": "",
          "id": 6,
          "ioInfo": [
            [
              [
                "",
                "in"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 9,
                "name": "in",
                "queueSize": 3,
                "type": 3,
                "waitForMessage": false
              }
            ]
          ],
          "name": "XLinkOut",
          "parentId": -1,
          "properties": {
            "maxFpsLimit": -1.0,
            "metadataOnly": false,
            "streamName": "__x_3_out"
          }
        }
      ],
      [
        4,
        {
          "alias": "",
          "id": 4,
          "ioInfo": [
            [
              [
                "",
                "in"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 8,
                "name": "in",
                "queueSize": 3,
                "type": 3,
                "waitForMessage": false
              }
            ]
          ],
          "name": "XLinkOut",
          "parentId": -1,
          "properties": {
            "maxFpsLimit": -1.0,
            "metadataOnly": false,
            "streamName": "__x_2_passthrough"
          }
        }
      ],
      [
        3,
        {
          "alias": "detectionParser",
          "id": 3,
          "ioInfo": [
            [
              [
                "",
                "in"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 5,
                "name": "in",
                "queueSize": 1,
                "type": 3,
                "waitForMessage": true
              }
            ],
            [
              [
                "",
                "out"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 7,
                "name": "out",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "imageIn"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 6,
                "name": "imageIn",
                "queueSize": 1,
                "type": 3,
                "waitForMessage": true
              }
            ]
          ],
          "name": "DetectionParser",
          "parentId": 1,
          "properties": {
            "networkInputs": {},
            "numFramesPool": 8,
            "parser": {
              "anchorMasks": {},
              "anchors": [],
              "anchorsV2": [],
              "classes": 80,
              "confidenceThreshold": 0.5,
              "coordinates": 4,
              "iouThreshold": 0.5,
              "nnFamily": 0
            }
          }
        }
      ],
      [
        2,
        {
          "alias": "neuralNetwork",
          "id": 2,
          "ioInfo": [
            [
              [
                "",
                "in"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 2,
                "name": "in",
                "queueSize": 3,
                "type": 3,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "out"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 3,
                "name": "out",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "passthrough"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 4,
                "name": "passthrough",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "name": "NeuralNetwork",
          "parentId": 1,
          "properties": {
            "backend": "",
            "backendProperties": {},
            "blobSize": null,
            "blobUri": "",
            "modelSource": 1,
            "modelUri": "asset:__model",
            "numFrames": 8,
            "numNCEPerThread": 0,
            "numShavesPerThread": 0,
            "numThreads": 0
          }
        }
      ],
      [
        0,
        {
          "alias": "",
          "id": 0,
          "ioInfo": [
            [
              [
                "dynamicOutputs",
                "0"
              ],
              {
                "blocking": false,
                "group": "dynamicOutputs",
                "id": 1,
                "name": "0",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "inputControl"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 0,
                "name": "inputControl",
                "queueSize": 3,
                "type": 3,
                "waitForMessage": false
              }
            ]
          ],
          "name": "Camera",
          "parentId": -1,
          "properties": {
            "boardSocket": 0,
            "calibAlpha": null,
            "cameraName": "",
            "colorOrder": 0,
            "fp16": false,
            "fps": 30.0,
            "imageOrientation": -1,
            "initialControl": {
              "aeLockMode": false,
              "aeMaxExposureTimeUs": 1553397456,
              "aeRegion": {
                "height": 0,
                "priority": 317625904,
                "width": 1,
                "x": 62784,
                "y": 23702
              },
              "afRegion": {
                "height": 23703,
                "priority": 1,
                "width": 112,
                "x": 1,
                "y": 0
              },
              "antiBandingMode": 150,
              "autoFocusMode": 3,
              "awbLockMode": false,
              "awbMode": 224,
              "brightness": 0,
              "captureIntent": 92,
              "chromaDenoise": 1,
              "cmdMask": 0,
              "contrast": -16,
              "controlMode": 1,
              "effectMode": 0,
              "expCompensation": 0,
              "expManual": {
                "exposureTimeUs": 1,
                "frameDurationUs": 1,
                "sensitivityIso": 1553399888
              },
              "frameSyncMode": 0,
              "lensPosAutoInfinity": 0,
              "lensPosAutoMacro": 145,
              "lensPosition": 0,
              "lensPositionRaw": 0.0,
              "lowPowerNumFramesBurst": 176,
              "lowPowerNumFramesDiscard": 0,
              "lumaDenoise": 18,
              "miscControls": [],
              "saturation": -92,
              "sceneMode": 245,
              "sharpness": 238,
              "strobeConfig": {
                "activeLevel": 144,
                "enable": 0,
                "gpioNumber": -99
              },
              "strobeTimings": {
                "durationUs": 1,
                "exposureBeginOffsetUs": 1,
                "exposureEndOffsetUs": 1553399952
              },
              "wbColorTemp": 0
            },
            "interleaved": true,
            "isp3aFps": 0,
            "ispScale": {
              "horizDenominator": 0,
              "horizNumerator": 0,
              "vertDenominator": 0,
              "vertNumerator": 0
            },
            "mockIspHeight": -1,
            "mockIspWidth": -1,
            "numFramesPoolIsp": 3,
            "numFramesPoolPreview": 4,
            "numFramesPoolRaw": 3,
            "numFramesPoolStill": 4,
            "numFramesPoolVideo": 4,
            "outputRequests": [
              {
                "fps": {
                  "value": {
                    "index": 0,
                    "value": 30.0
                  }
                },
                "resizeMode": 0,
                "size": {
                  "value": {
                    "index": 0,
                    "value": [
                      512,
                      288
                    ]
                  }
                },
                "type": 10
              }
            ],
            "previewHeight": 300,
            "previewKeepAspectRatio": false,
            "previewWidth": 300,
            "rawPacked": null,
            "resolutionHeight": -1,
            "resolutionWidth": -1,
            "sensorCropX": -1.0,
            "sensorCropY": -1.0,
            "sensorType": -1,
            "stillHeight": -1,
            "stillWidth": -1,
            "videoHeight": -1,
            "videoWidth": -1,
            "warpMeshHeight": 0,
            "warpMeshSource": -1,
            "warpMeshStepHeight": 32,
            "warpMeshStepWidth": 32,
            "warpMeshUri": "",
            "warpMeshWidth": 0
          }
        }
      ]
    ]
  }
}
```

## Source code

#### Python

```python
#!/usr/bin/env python3

from pathlib import Path
import cv2
import depthai as dai
import numpy as np
import time

# Create pipeline
with dai.Pipeline() as pipeline:
    cameraNode = pipeline.create(dai.node.Camera).build()
    detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(cameraNode, dai.NNModelDescription("yolov6-nano"))
    labelMap = detectionNetwork.getClasses()

    qRgb = detectionNetwork.passthrough.createOutputQueue()
    qDet = detectionNetwork.out.createOutputQueue()

    pipeline.start()

    frame = None
    detections = []
    startTime = time.monotonic()
    counter = 0
    color2 = (255, 255, 255)

    # nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
    def frameNorm(frame, bbox):
        normVals = np.full(len(bbox), frame.shape[0])
        normVals[::2] = frame.shape[1]
        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)

    def displayFrame(name, frame):
        color = (255, 0, 0)
        for detection in detections:
            bbox = frameNorm(
                frame,
                (detection.xmin, detection.ymin, detection.xmax, detection.ymax),
            )
            cv2.putText(
                frame,
                labelMap[detection.label],
                (bbox[0] + 10, bbox[1] + 20),
                cv2.FONT_HERSHEY_TRIPLEX,
                0.5,
                255,
            )
            cv2.putText(
                frame,
                f"{int(detection.confidence * 100)}%",
                (bbox[0] + 10, bbox[1] + 40),
                cv2.FONT_HERSHEY_TRIPLEX,
                0.5,
                255,
            )
            cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
        # Show the frame
        cv2.imshow(name, frame)

    while pipeline.isRunning():
        inRgb: dai.ImgFrame = qRgb.get()
        inDet: dai.ImgDetections = qDet.get()
        if inRgb is not None:
            frame = inRgb.getCvFrame()
            cv2.putText(
                frame,
                "NN fps: {:.2f}".format(counter / (time.monotonic() - startTime)),
                (2, frame.shape[0] - 4),
                cv2.FONT_HERSHEY_TRIPLEX,
                0.4,
                color2,
            )

        if inDet is not None:
            detections = inDet.detections
            counter += 1

        if frame is not None:
            displayFrame("rgb", frame)
            print("FPS: {:.2f}".format(counter / (time.monotonic() - startTime)))
        if cv2.waitKey(1) == ord("q"):
            pipeline.stop()
            break
```

#### C++

```cpp
#include <chrono>
#include <iostream>
#include <opencv2/opencv.hpp>

#include "depthai/depthai.hpp"

// Helper function to normalize frame coordinates
cv::Rect frameNorm(const cv::Mat& frame, const dai::Point2f& topLeft, const dai::Point2f& bottomRight) {
    float width = frame.cols, height = frame.rows;
    return cv::Rect(cv::Point(topLeft.x * width, topLeft.y * height), cv::Point(bottomRight.x * width, bottomRight.y * height));
}

int main() {
    // Create pipeline
    dai::Pipeline pipeline;

    // Create and configure camera node
    auto cameraNode = pipeline.create<dai::node::Camera>();
    cameraNode->build();

    // Create and configure detection network node
    auto detectionNetwork = pipeline.create<dai::node::DetectionNetwork>();

    dai::NNModelDescription modelDescription;
    modelDescription.model = "yolov6-nano";
    detectionNetwork->build(cameraNode, modelDescription);
    auto labelMap = detectionNetwork->getClasses();

    // Create output queues
    auto qRgb = detectionNetwork->passthrough.createOutputQueue();
    auto qDet = detectionNetwork->out.createOutputQueue();

    cv::Mat frame;
    auto startTime = std::chrono::steady_clock::now();
    int counter = 0;
    cv::Scalar color(255, 0, 0);
    cv::Scalar textColor(255, 255, 255);

    pipeline.start();
    while(pipeline.isRunning()) {
        auto inRgb = qRgb->get<dai::ImgFrame>();
        auto inDet = qDet->get<dai::ImgDetections>();

        if(inRgb != nullptr) {
            frame = inRgb->getCvFrame();

            // Add FPS text
            auto currentTime = std::chrono::steady_clock::now();
            float fps = counter / std::chrono::duration<float>(currentTime - startTime).count();
            cv::putText(frame, "NN fps: " + std::to_string(fps), cv::Point(2, frame.rows - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, textColor);
        }

        if(inDet != nullptr) {
            counter++;
        }

        if(!frame.empty()) {
            // Display detections
            for(const auto& detection : inDet->detections) {
                auto bbox = frameNorm(frame, dai::Point2f(detection.xmin, detection.ymin), dai::Point2f(detection.xmax, detection.ymax));

                // Draw label
                cv::putText(frame, labelMap.value()[detection.label], cv::Point(bbox.x + 10, bbox.y + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, textColor);

                // Draw confidence
                cv::putText(frame,
                            std::to_string(static_cast<int>(detection.confidence * 100)) + "%",
                            cv::Point(bbox.x + 10, bbox.y + 40),
                            cv::FONT_HERSHEY_TRIPLEX,
                            0.5,
                            textColor);

                // Draw rectangle
                cv::rectangle(frame, bbox, color, 2);
            }

            // Show the frame
            cv::imshow("rgb", frame);

            auto currentTime = std::chrono::steady_clock::now();
            float fps = counter / std::chrono::duration<float>(currentTime - startTime).count();
            std::cout << "FPS: " << fps << std::endl;
        }

        if(cv::waitKey(1) == 'q') {
            break;
        }
    }

    return 0;
}
```

### Need assistance?

Head over to [Discussion Forum](https://discuss.luxonis.com/) for technical support or any other questions you might have.