# Benchmark NN

This example showcases how to use both the [BenchmarkOut
node](https://docs.luxonis.com/software-v3/depthai/depthai-components/nodes/benchmark_out.md) and the
[BenchmarkIn](https://docs.luxonis.com/software-v3/depthai/depthai-components/nodes/benchmark_in.md) node to measure the
performance of a NN model.

BenchmarkIn outputs messages as fast as possible, which is used to measure the performance of a NN model (by linking BenchmarkOut
-> NeuralNetwork -> BenchmarkIn).

## Demo

The yolov6-nano NN model should run at ~273 FPS on an OAK4 camera, and at ~67 FPS on an OAK camera.

```bash
Benchmark $ python3.9 benchmark_nn.py
FPS is 273.2430114746094
FPS is 273.161376953125
FPS is 273.22802734375
```

This example requires the DepthAI v3 API, see [installation instructions](https://docs.luxonis.com/software-v3/depthai.md).

## Pipeline

### examples/benchmark_nn.pipeline.json

```json
{
  "pipeline": {
    "connections": [
      {
        "node1Id": 5,
        "node1Output": "out",
        "node1OutputGroup": "",
        "node2Id": 0,
        "node2Input": "input",
        "node2InputGroup": ""
      },
      {
        "node1Id": 2,
        "node1Output": "report",
        "node1OutputGroup": "",
        "node2Id": 6,
        "node2Input": "in",
        "node2InputGroup": ""
      },
      {
        "node1Id": 1,
        "node1Output": "out",
        "node1OutputGroup": "",
        "node2Id": 2,
        "node2Input": "input",
        "node2InputGroup": ""
      },
      {
        "node1Id": 0,
        "node1Output": "out",
        "node1OutputGroup": "",
        "node2Id": 1,
        "node2Input": "in",
        "node2InputGroup": ""
      }
    ],
    "globalProperties": {
      "calibData": null,
      "cameraTuningBlobSize": null,
      "cameraTuningBlobUri": "",
      "leonCssFrequencyHz": 700000000.0,
      "leonMssFrequencyHz": 700000000.0,
      "pipelineName": null,
      "pipelineVersion": null,
      "sippBufferSize": 18432,
      "sippDmaBufferSize": 16384,
      "xlinkChunkSize": -1
    },
    "nodes": [
      [
        2,
        {
          "alias": "",
          "id": 2,
          "ioInfo": [
            [
              [
                "",
                "input"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 5,
                "name": "input",
                "queueSize": 4,
                "type": 3,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "passthrough"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 6,
                "name": "passthrough",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "report"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 7,
                "name": "report",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "BenchmarkIn",
          "parentId": -1,
          "properties": {
            "attachLatencies": false,
            "logReportsAsWarnings": false,
            "reportEveryNMessages": 100
          }
        }
      ],
      [
        6,
        {
          "alias": "",
          "id": 6,
          "ioInfo": [
            [
              [
                "",
                "in"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 9,
                "name": "in",
                "queueSize": 3,
                "type": 3,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "XLinkOut",
          "parentId": -1,
          "properties": {
            "maxFpsLimit": -1.0,
            "metadataOnly": false,
            "streamName": "__x_2_report"
          }
        }
      ],
      [
        1,
        {
          "alias": "",
          "id": 1,
          "ioInfo": [
            [
              [
                "",
                "in"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 2,
                "name": "in",
                "queueSize": 3,
                "type": 3,
                "waitForMessage": true
              }
            ],
            [
              [
                "",
                "out"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 3,
                "name": "out",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "passthrough"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 4,
                "name": "passthrough",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "NeuralNetwork",
          "parentId": -1,
          "properties": {
            "backend": "",
            "backendProperties": {},
            "blobSize": 8689834,
            "blobUri": "asset:__blob",
            "modelSource": 0,
            "modelUri": "",
            "numFrames": 8,
            "numNCEPerThread": 0,
            "numShavesPerThread": 0,
            "numThreads": 0
          }
        }
      ],
      [
        5,
        {
          "alias": "",
          "id": 5,
          "ioInfo": [
            [
              [
                "",
                "out"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 8,
                "name": "out",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "XLinkIn",
          "parentId": -1,
          "properties": {
            "maxDataSize": 5242880,
            "numFrames": 8,
            "streamName": "__x_0__input"
          }
        }
      ],
      [
        0,
        {
          "alias": "",
          "id": 0,
          "ioInfo": [
            [
              [
                "",
                "input"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 0,
                "name": "input",
                "queueSize": 1,
                "type": 3,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "out"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 1,
                "name": "out",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "BenchmarkOut",
          "parentId": -1,
          "properties": {
            "fps": -1.0,
            "numMessages": -1
          }
        }
      ]
    ]
  }
}
```

## Source code

#### Python

```python
import depthai as dai
import numpy as np

# First prepare the model for benchmarking
device = dai.Device()
modelPath = dai.getModelFromZoo(dai.NNModelDescription("yolov6-nano", platform=device.getPlatformAsString()))
modelArhive = dai.NNArchive(modelPath)
inputSize = modelArhive.getInputSize()
type = modelArhive.getConfig().model.inputs[0].preprocessing.daiType

if type:
    try:
        frameType = getattr(dai.ImgFrame.Type, type)
    except AttributeError:
        type = None

if not type:
    if device.getPlatform() == dai.Platform.RVC2:
        frameType = dai.ImgFrame.Type.BGR888p
    else:
        frameType = dai.ImgFrame.Type.BGR888i

# Construct the input (white) image for benchmarking
img = np.ones((inputSize[1], inputSize[0], 3), np.uint8) * 255
inputFrame = dai.ImgFrame()
inputFrame.setCvFrame(img, frameType)

with dai.Pipeline(device) as p:
    benchmarkOut = p.create(dai.node.BenchmarkOut)
    benchmarkOut.setRunOnHost(False) # The node can run on host or on device
    benchmarkOut.setFps(-1) # As fast as possible

    neuralNetwork = p.create(dai.node.NeuralNetwork).build(benchmarkOut.out, modelArhive)

    benchmarkIn = p.create(dai.node.BenchmarkIn)
    benchmarkIn.setRunOnHost(False) # The node can run on host or on device
    benchmarkIn.sendReportEveryNMessages(100)
    benchmarkIn.logReportsAsWarnings(False)
    neuralNetwork.out.link(benchmarkIn.input)

    outputQueue = benchmarkIn.report.createOutputQueue()
    inputQueue = benchmarkOut.input.createInputQueue()

    p.start()
    inputQueue.send(inputFrame) # Send the input image only once
    while p.isRunning():
        benchmarkReport = outputQueue.get()
        assert isinstance(benchmarkReport, dai.BenchmarkReport)
        print(f"FPS is {benchmarkReport.fps}")
```

#### C++

```cpp
#include <depthai/depthai.hpp>
#include <iostream>
#include <opencv2/opencv.hpp>

int main() {
    // First prepare the model for benchmarking
    std::shared_ptr<dai::Device> device = std::make_shared<dai::Device>();

    dai::NNModelDescription modelDescription;
    modelDescription.model = "yolov6-nano";
    modelDescription.platform = device->getPlatformAsString();

    auto modelPath = getModelFromZoo(modelDescription);
    dai::NNArchive modelArchive(modelPath);
    auto inputSize = modelArchive.getInputSize().value();

    dai::ImgFrame::Type frameType;
    if(device->getPlatform() == dai::Platform::RVC2) {
        frameType = dai::ImgFrame::Type::BGR888p;
    } else {
        frameType = dai::ImgFrame::Type::BGR888i;
    }

    // Construct the input (white) image for benchmarking
    cv::Mat img(std::get<1>(inputSize), std::get<0>(inputSize), CV_8UC3, cv::Scalar(255, 255, 255));
    auto inputFrame = std::make_shared<dai::ImgFrame>();
    inputFrame->setCvFrame(img, frameType);

    dai::Pipeline pipeline(device);

    auto benchmarkOut = pipeline.create<dai::node::BenchmarkOut>();
    benchmarkOut->setRunOnHost(false);  // The node can run on host or on device
    benchmarkOut->setFps(-1);           // As fast as possible

    auto neuralNetwork = pipeline.create<dai::node::NeuralNetwork>();
    neuralNetwork->setNNArchive(modelArchive);

    auto benchmarkIn = pipeline.create<dai::node::BenchmarkIn>();
    benchmarkIn->setRunOnHost(false);  // The node can run on host or on device
    benchmarkIn->sendReportEveryNMessages(100);
    benchmarkIn->logReportsAsWarnings(false);

    // Linking
    benchmarkOut->out.link(neuralNetwork->input);
    neuralNetwork->out.link(benchmarkIn->input);

    auto outputQueue = benchmarkIn->report.createOutputQueue();
    auto inputQueue = benchmarkOut->input.createInputQueue();

    pipeline.start();
    inputQueue->send(inputFrame);

    while(pipeline.isRunning()) {
        auto benchmarkReport = outputQueue->get<dai::BenchmarkReport>();
        std::cout << "FPS is " << benchmarkReport->fps << std::endl;
    }

    return 0;
}
```

### Need assistance?

Head over to [Discussion Forum](https://discuss.luxonis.com/) for technical support or any other questions you might have.
