# Object Tracker Replay

This example demonstrates running YOLOv6-nano object detection with the
[ObjectTracker](https://docs.luxonis.com/software-v3/depthai/depthai-components/nodes/object_tracker.md) node on a replayed video
using the ReplayVideo host node (or a live camera when requested).

It displays per-object Track IDs and status; this demo does not compute XYZ coordinates.

## Demo

This example requires the DepthAI v3 API, see [installation instructions](https://docs.luxonis.com/software-v3/depthai.md).

## Run

Default (demo clip):

```python
python object_tracker_replay.py
```

Use your own video file:

```python
python object_tracker_replay.py -i /path/to/video.mp4
```

Use the live camera instead of a file:

```python
python object_tracker_replay.py --camera True
```

## Pipeline

### examples/object_tracker_replay(daiv3).pipeline.json

```json
{
  "pipeline": {
    "connections": [
      {
        "node1Id": 0,
        "node1Output": "0",
        "node1OutputGroup": "dynamicOutputs",
        "node2Id": 2,
        "node2Input": "in",
        "node2InputGroup": ""
      },
      {
        "node1Id": 2,
        "node1Output": "out",
        "node1OutputGroup": "",
        "node2Id": 3,
        "node2Input": "in",
        "node2InputGroup": ""
      },
      {
        "node1Id": 2,
        "node1Output": "passthrough",
        "node1OutputGroup": "",
        "node2Id": 4,
        "node2Input": "inputTrackerFrame",
        "node2InputGroup": ""
      },
      {
        "node1Id": 2,
        "node1Output": "passthrough",
        "node1OutputGroup": "",
        "node2Id": 4,
        "node2Input": "inputDetectionFrame",
        "node2InputGroup": ""
      },
      {
        "node1Id": 3,
        "node1Output": "out",
        "node1OutputGroup": "",
        "node2Id": 4,
        "node2Input": "inputDetections",
        "node2InputGroup": ""
      }
    ],
    "globalProperties": {
      "calibData": null,
      "cameraTuningBlobSize": null,
      "cameraTuningBlobUri": "",
      "eepromId": 0,
      "leonCssFrequencyHz": 700000000.0,
      "leonMssFrequencyHz": 700000000.0,
      "pipelineName": null,
      "pipelineVersion": null,
      "sippBufferSize": 18432,
      "sippDmaBufferSize": 16384,
      "xlinkChunkSize": -1
    },
    "nodes": [
      [
        0,
        {
          "alias": "",
          "id": 0,
          "ioInfo": [
            [
              [
                "",
                "inputControl"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 0,
                "name": "inputControl",
                "queueSize": 3,
                "type": 3,
                "waitForMessage": false
              }
            ],
            [
              [
                "dynamicOutputs",
                "0"
              ],
              {
                "blocking": false,
                "group": "dynamicOutputs",
                "id": 3,
                "name": "0",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "mockIsp"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 1,
                "name": "mockIsp",
                "queueSize": 8,
                "type": 3,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "raw"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 2,
                "name": "raw",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "Camera",
          "parentId": -1,
          "properties": {
            "boardSocket": 0,
            "cameraName": "",
            "fps": -1.0,
            "imageOrientation": -1,
            "initialControl": {
              "aeLockMode": false,
              "aeMaxExposureTimeUs": 610418160,
              "aeRegion": {
                "height": 0,
                "priority": 46729648,
                "width": 557,
                "x": 2416,
                "y": 713
              },
              "afRegion": {
                "height": 9061,
                "priority": 557,
                "width": 25136,
                "x": 557,
                "y": 0
              },
              "antiBandingMode": 98,
              "autoFocusMode": 3,
              "awbLockMode": false,
              "awbMode": 176,
              "brightness": 0,
              "captureIntent": 36,
              "chromaDenoise": 45,
              "cmdMask": 0,
              "contrast": -16,
              "controlMode": 45,
              "effectMode": 2,
              "enableHdr": false,
              "expCompensation": 0,
              "expManual": {
                "exposureTimeUs": 557,
                "frameDurationUs": 557,
                "sensitivityIso": 46421104
              },
              "frameSyncMode": 0,
              "lensPosAutoInfinity": 176,
              "lensPosAutoMacro": 244,
              "lensPosition": 0,
              "lensPositionRaw": 0.0,
              "lowPowerNumFramesBurst": 48,
              "lowPowerNumFramesDiscard": 1,
              "lumaDenoise": 2,
              "miscControls": [],
              "saturation": -59,
              "sceneMode": 53,
              "sharpness": 204,
              "strobeConfig": {
                "activeLevel": 240,
                "enable": 0,
                "gpioNumber": 4
              },
              "strobeTimings": {
                "durationUs": 557,
                "exposureBeginOffsetUs": 557,
                "exposureEndOffsetUs": 610435840
              },
              "wbColorTemp": 0
            },
            "isp3aFps": 0,
            "mockIspHeight": -1,
            "mockIspWidth": -1,
            "numFramesPoolIsp": 3,
            "numFramesPoolPreview": 4,
            "numFramesPoolRaw": 3,
            "numFramesPoolStill": 4,
            "numFramesPoolVideo": 4,
            "outputRequests": [
              {
                "enableUndistortion": null,
                "fps": {
                  "value": null
                },
                "resizeMode": 0,
                "size": {
                  "value": {
                    "index": 0,
                    "value": [
                      512,
                      288
                    ]
                  }
                },
                "type": 10
              }
            ],
            "resolutionHeight": -1,
            "resolutionWidth": -1,
            "sensorType": -1
          }
        }
      ],
      [
        2,
        {
          "alias": "neuralNetwork",
          "id": 2,
          "ioInfo": [
            [
              [
                "",
                "in"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 4,
                "name": "in",
                "queueSize": 3,
                "type": 3,
                "waitForMessage": true
              }
            ],
            [
              [
                "",
                "out"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 5,
                "name": "out",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "passthrough"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 6,
                "name": "passthrough",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "NeuralNetwork",
          "parentId": 1,
          "properties": {
            "backend": "",
            "backendProperties": {},
            "blobSize": null,
            "blobUri": "",
            "modelSource": 1,
            "modelUri": "asset:__model",
            "numFrames": 8,
            "numNCEPerThread": 0,
            "numShavesPerThread": 0,
            "numThreads": 0
          }
        }
      ],
      [
        3,
        {
          "alias": "detectionParser",
          "id": 3,
          "ioInfo": [
            [
              [
                "",
                "in"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 7,
                "name": "in",
                "queueSize": 1,
                "type": 3,
                "waitForMessage": true
              }
            ],
            [
              [
                "",
                "out"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 8,
                "name": "out",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "DetectionParser",
          "parentId": 1,
          "properties": {
            "networkInputs": {},
            "numFramesPool": 8,
            "parser": {
              "anchorMasks": {},
              "anchors": [],
              "anchorsV2": [],
              "classNames": [
                "person",
                "bicycle",
                "car",
                "motorcycle",
                "airplane",
                "bus",
                "train",
                "truck",
                "boat",
                "traffic light",
                "fire hydrant",
                "stop sign",
                "parking meter",
                "bench",
                "bird",
                "cat",
                "dog",
                "horse",
                "sheep",
                "cow",
                "elephant",
                "bear",
                "zebra",
                "giraffe",
                "backpack",
                "umbrella",
                "handbag",
                "tie",
                "suitcase",
                "frisbee",
                "skis",
                "snowboard",
                "sports ball",
                "kite",
                "baseball bat",
                "baseball glove",
                "skateboard",
                "surfboard",
                "tennis racket",
                "bottle",
                "wine glass",
                "cup",
                "fork",
                "knife",
                "spoon",
                "bowl",
                "banana",
                "apple",
                "sandwich",
                "orange",
                "broccoli",
                "carrot",
                "hot dog",
                "pizza",
                "donut",
                "cake",
                "chair",
                "couch",
                "potted plant",
                "bed",
                "dining table",
                "toilet",
                "tv",
                "laptop",
                "mouse",
                "remote",
                "keyboard",
                "cell phone",
                "microwave",
                "oven",
                "toaster",
                "sink",
                "refrigerator",
                "book",
                "clock",
                "vase",
                "scissors",
                "teddy bear",
                "hair drier",
                "toothbrush"
              ],
              "classes": 80,
              "confidenceThreshold": 0.6000000238418579,
              "coordinates": 4,
              "iouThreshold": 0.5,
              "nnFamily": 0,
              "subtype": "yolov6r2"
            }
          }
        }
      ],
      [
        4,
        {
          "alias": "",
          "id": 4,
          "ioInfo": [
            [
              [
                "",
                "inputTrackerFrame"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 9,
                "name": "inputTrackerFrame",
                "queueSize": 4,
                "type": 3,
                "waitForMessage": true
              }
            ],
            [
              [
                "",
                "inputDetectionFrame"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 10,
                "name": "inputDetectionFrame",
                "queueSize": 4,
                "type": 3,
                "waitForMessage": true
              }
            ],
            [
              [
                "",
                "inputDetections"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 11,
                "name": "inputDetections",
                "queueSize": 4,
                "type": 3,
                "waitForMessage": true
              }
            ],
            [
              [
                "",
                "inputConfig"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 12,
                "name": "inputConfig",
                "queueSize": 4,
                "type": 3,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "passthroughDetections"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 16,
                "name": "passthroughDetections",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "out"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 13,
                "name": "out",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "passthroughTrackerFrame"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 14,
                "name": "passthroughTrackerFrame",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "passthroughDetectionFrame"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 15,
                "name": "passthroughDetectionFrame",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "logLevel": 3,
          "name": "ObjectTracker",
          "parentId": -1,
          "properties": {
            "detectionLabelsToTrack": [
              0
            ],
            "maxObjectsToTrack": 60,
            "occlusionRatioThreshold": 0.20000000298023224,
            "trackerIdAssignmentPolicy": 1,
            "trackerThreshold": 0.0,
            "trackerType": 3,
            "trackingPerClass": true,
            "trackletBirthThreshold": 3,
            "trackletMaxLifespan": 120
          }
        }
      ]
    ]
  }
}
```

## Source code

#### Python

```python
#!/usr/bin/env python3

import cv2
import depthai as dai
import time

from pathlib import Path
from argparse import ArgumentParser

scriptDir = Path(__file__).resolve().parent
examplesRoot = (scriptDir / Path('../')).resolve()  # This resolves the parent directory correctly
models = examplesRoot / 'models'
videoPath = models / 'construction_vest.mp4'

parser = ArgumentParser()
parser.add_argument("-i", "--inputVideo", default=videoPath, help="Input video name")
parser.add_argument("-c", "--camera", type=bool, help="Use camera as input", default=False)
args = parser.parse_args()

# Create pipeline
with dai.Pipeline() as pipeline:
    # Define sources and outputs
    inputSource = None
    if args.camera: 
        camRgb = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_A)
        inputSource = camRgb
    else:
        replay = pipeline.create(dai.node.ReplayVideo)
        replay.setReplayVideoFile(Path(args.inputVideo))
        inputSource = replay

    detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(inputSource, "yolov6-nano")
    objectTracker = pipeline.create(dai.node.ObjectTracker)

    detectionNetwork.setConfidenceThreshold(0.6)
    detectionNetwork.input.setBlocking(False)
    labelMap = detectionNetwork.getClasses()

    objectTracker.setDetectionLabelsToTrack([0])  # track only person
    # possible tracking types: ZERO_TERM_COLOR_HISTOGRAM, ZERO_TERM_IMAGELESS, SHORT_TERM_IMAGELESS, SHORT_TERM_KCF
    objectTracker.setTrackerType(dai.TrackerType.SHORT_TERM_IMAGELESS)
    # take the smallest ID when new object is tracked, possible options: SMALLEST_ID, UNIQUE_ID
    objectTracker.setTrackerIdAssignmentPolicy(dai.TrackerIdAssignmentPolicy.SMALLEST_ID)

    preview = objectTracker.passthroughTrackerFrame.createOutputQueue()
    tracklets = objectTracker.out.createOutputQueue()

    detectionNetwork.passthrough.link(objectTracker.inputTrackerFrame)

    detectionNetwork.passthrough.link(objectTracker.inputDetectionFrame)
    detectionNetwork.out.link(objectTracker.inputDetections)

    startTime = time.monotonic()
    counter = 0
    fps = 0
    color = (255, 255, 255)
    pipeline.start()
    while(pipeline.isRunning()):
        imgFrame = preview.get()
        track = tracklets.get()
        assert isinstance(imgFrame, dai.ImgFrame), "Expected ImgFrame"
        assert isinstance(track, dai.Tracklets), "Expected Tracklets"

        counter+=1
        current_time = time.monotonic()
        if (current_time - startTime) > 1 :
            fps = counter / (current_time - startTime)
            counter = 0
            startTime = current_time

        frame = imgFrame.getCvFrame()
        trackletsData = track.tracklets
        for t in trackletsData:
            roi = t.roi.denormalize(frame.shape[1], frame.shape[0])
            x1 = int(roi.topLeft().x)
            y1 = int(roi.topLeft().y)
            x2 = int(roi.bottomRight().x)
            y2 = int(roi.bottomRight().y)

            try:
                label = labelMap[t.label]
            except:
                label = t.label

            cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
            cv2.putText(frame, f"ID: {[t.id]}", (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
            cv2.putText(frame, t.status.name, (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX)

        cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)

        cv2.imshow("tracker", frame)

        if cv2.waitKey(1) == ord('q'):
            break
```

#### C++

```cpp
#include <chrono>
#include <depthai/depthai.hpp>
#include <opencv2/opencv.hpp>

#include "depthai/pipeline/datatype/Tracklets.hpp"

int main() {
    // Create pipeline
    dai::Pipeline pipeline;

    // Define sources and outputs
    auto replay = pipeline.create<dai::node::ReplayVideo>();
    replay->setReplayVideoFile(VIDEO_PATH);

    // Create spatial detection network
    dai::NNModelDescription modelDescription{"yolov6-nano"};
    auto detectionNetwork = pipeline.create<dai::node::DetectionNetwork>()->build(replay, modelDescription);
    detectionNetwork->setConfidenceThreshold(0.6f);
    detectionNetwork->input.setBlocking(false);

    // Create object tracker
    auto objectTracker = pipeline.create<dai::node::ObjectTracker>();
    objectTracker->setDetectionLabelsToTrack({0});  // track only person
    objectTracker->setTrackerType(dai::TrackerType::SHORT_TERM_IMAGELESS);
    objectTracker->setTrackerIdAssignmentPolicy(dai::TrackerIdAssignmentPolicy::SMALLEST_ID);

    // Create output queues
    auto preview = objectTracker->passthroughTrackerFrame.createOutputQueue();
    auto tracklets = objectTracker->out.createOutputQueue();

    // Link nodes
    detectionNetwork->passthrough.link(objectTracker->inputTrackerFrame);

    detectionNetwork->passthrough.link(objectTracker->inputDetectionFrame);
    detectionNetwork->out.link(objectTracker->inputDetections);

    // Start pipeline
    pipeline.start();

    // FPS calculation variables
    auto startTime = std::chrono::steady_clock::now();
    int counter = 0;
    float fps = 0;
    cv::Scalar color(255, 255, 255);

    while(pipeline.isRunning()) {
        auto imgFrame = preview->get<dai::ImgFrame>();
        auto track = tracklets->get<dai::Tracklets>();

        counter++;
        auto currentTime = std::chrono::steady_clock::now();
        auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(currentTime - startTime).count();
        if(elapsed >= 1) {
            fps = counter / static_cast<float>(elapsed);
            counter = 0;
            startTime = currentTime;
        }

        cv::Mat frame = imgFrame->getCvFrame();
        auto trackletsData = track->tracklets;

        for(const auto& t : trackletsData) {
            auto roi = t.roi.denormalize(frame.cols, frame.rows);
            int x1 = static_cast<int>(roi.topLeft().x);
            int y1 = static_cast<int>(roi.topLeft().y);
            int x2 = static_cast<int>(roi.bottomRight().x);
            int y2 = static_cast<int>(roi.bottomRight().y);

            std::string label;
            try {
                label = detectionNetwork->getClasses().value()[t.label];
            } catch(...) {
                label = std::to_string(t.label);
            }

            cv::putText(frame, label, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
            cv::putText(frame, "ID: " + std::to_string(t.id), cv::Point(x1 + 10, y1 + 35), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
            cv::putText(frame,
                        std::string(t.status == dai::Tracklet::TrackingStatus::LOST ? "LOST" : "TRACKED"),
                        cv::Point(x1 + 10, y1 + 50),
                        cv::FONT_HERSHEY_TRIPLEX,
                        0.5,
                        color);
            cv::rectangle(frame, cv::Point(x1, y1), cv::Point(x2, y2), color, cv::FONT_HERSHEY_SIMPLEX);
        }

        cv::putText(frame, "NN fps: " + std::to_string(fps).substr(0, 4), cv::Point(2, frame.rows - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, color);

        cv::imshow("tracker", frame);

        if(cv::waitKey(1) == 'q') {
            break;
        }
    }

    return 0;
}
```

### Need assistance?

Head over to [Discussion Forum](https://discuss.luxonis.com/) for technical support or any other questions you might have.
