# Custom Visualizations

The example creates a DepthAI pipeline in Python to perform YOLOv6-Nano object detection, generates custom image annotations with
bounding boxes and text, and streams the results along with 640x480 NV12 camera frames over to a remote connection for
visualization.

This example requires the DepthAI v3 API, see [installation instructions](https://docs.luxonis.com/software-v3/depthai.md).

## Pipeline

### examples/custom_visualizations.pipeline.json

```json
{"pipeline": {"connections": [{"node1Id": 3, "node1Output": "out", "node1OutputGroup": "", "node2Id": 5, "node2Input": "in", "node2InputGroup": ""}, {"node1Id": 2, "node1Output": "passthrough", "node1OutputGroup": "", "node2Id": 3, "node2Input": "imageIn", "node2InputGroup": ""}, {"node1Id": 2, "node1Output": "out", "node1OutputGroup": "", "node2Id": 3, "node2Input": "in", "node2InputGroup": ""}, {"node1Id": 0, "node1Output": "1", "node1OutputGroup": "dynamicOutputs", "node2Id": 7, "node2Input": "in", "node2InputGroup": ""}, {"node1Id": 0, "node1Output": "0", "node1OutputGroup": "dynamicOutputs", "node2Id": 2, "node2Input": "in", "node2InputGroup": ""}], "globalProperties": {"calibData": null, "cameraTuningBlobSize": null, "cameraTuningBlobUri": "", "leonCssFrequencyHz": 700000000.0, "leonMssFrequencyHz": 700000000.0, "pipelineName": null, "pipelineVersion": null, "sippBufferSize": 18432, "sippDmaBufferSize": 16384, "xlinkChunkSize": -1}, "nodes": [[7, {"alias": "", "id": 7, "ioInfo": [[["", "in"], {"blocking": true, "group": "", "id": 12, "name": "in", "queueSize": 3, "type": 3, "waitForMessage": false}]], "logLevel": 3, "name": "XLinkOut", "parentId": -1, "properties": {"maxFpsLimit": -1.0, "metadataOnly": false, "streamName": "__x_0_1"}}], [5, {"alias": "", "id": 5, "ioInfo": [[["", "in"], {"blocking": true, "group": "", "id": 11, "name": "in", "queueSize": 3, "type": 3, "waitForMessage": false}]], "logLevel": 3, "name": "XLinkOut", "parentId": -1, "properties": {"maxFpsLimit": -1.0, "metadataOnly": false, "streamName": "__x_3__out"}}], [3, {"alias": "detectionParser", "id": 3, "ioInfo": [[["", "out"], {"blocking": false, "group": "", "id": 10, "name": "out", "queueSize": 8, "type": 0, "waitForMessage": false}], [["", "imageIn"], {"blocking": false, "group": "", "id": 9, "name": "imageIn", "queueSize": 1, "type": 3, "waitForMessage": true}], [["", "in"], {"blocking": true, "group": "", "id": 8, "name": "in", "queueSize": 1, "type": 3, "waitForMessage": true}]], "logLevel": 3, "name": "DetectionParser", "parentId": 1, "properties": {"networkInputs": {"images": {"dataType": 1, "dims": [416, 416, 3, 1], "name": "images", "numDimensions": 4, "offset": 0, "order": 17185, "qpScale": 1.0, "qpZp": 0.0, "quantization": false, "strides": []}}, "numFramesPool": 8, "parser": {"anchorMasks": {}, "anchors": [], "anchorsV2": [], "classNames": ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"], "classes": 80, "confidenceThreshold": 0.5, "coordinates": 4, "iouThreshold": 0.5, "nnFamily": 0, "subtype": "yolov6"}}}], [2, {"alias": "neuralNetwork", "id": 2, "ioInfo": [[["", "passthrough"], {"blocking": false, "group": "", "id": 7, "name": "passthrough", "queueSize": 8, "type": 0, "waitForMessage": false}], [["", "out"], {"blocking": false, "group": "", "id": 6, "name": "out", "queueSize": 8, "type": 0, "waitForMessage": false}], [["", "in"], {"blocking": true, "group": "", "id": 5, "name": "in", "queueSize": 3, "type": 3, "waitForMessage": true}]], "logLevel": 3, "name": "NeuralNetwork", "parentId": 1, "properties": {"backend": "", "backendProperties": {}, "blobSize": 8689834, "blobUri": "asset:__blob", "modelSource": 0, "modelUri": "", "numFrames": 8, "numNCEPerThread": 0, "numShavesPerThread": 0, "numThreads": 0}}], [0, {"alias": "", "id": 0, "ioInfo": [[["dynamicOutputs", "0"], {"blocking": false, "group": "dynamicOutputs", "id": 4, "name": "0", "queueSize": 8, "type": 0, "waitForMessage": false}], [["dynamicOutputs", "1"], {"blocking": false, "group": "dynamicOutputs", "id": 3, "name": "1", "queueSize": 8, "type": 0, "waitForMessage": false}], [["", "raw"], {"blocking": false, "group": "", "id": 2, "name": "raw", "queueSize": 8, "type": 0, "waitForMessage": false}], [["", "mockIsp"], {"blocking": true, "group": "", "id": 1, "name": "mockIsp", "queueSize": 8, "type": 3, "waitForMessage": false}], [["", "inputControl"], {"blocking": true, "group": "", "id": 0, "name": "inputControl", "queueSize": 3, "type": 3, "waitForMessage": false}]], "logLevel": 3, "name": "Camera", "parentId": -1, "properties": {"boardSocket": 0, "cameraName": "", "fps": -1.0, "imageOrientation": -1, "initialControl": {"aeLockMode": false, "aeMaxExposureTimeUs": 100663297, "aeRegion": {"height": 0, "priority": 4155921504, "width": 0, "x": 4163, "y": 1}, "afRegion": {"height": 0, "priority": 0, "width": 0, "x": 25042, "y": 0}, "antiBandingMode": 182, "autoFocusMode": 3, "awbLockMode": false, "awbMode": 168, "brightness": -105, "captureIntent": 247, "chromaDenoise": 0, "cmdMask": 0, "contrast": -33, "controlMode": 210, "effectMode": 97, "enableHdr": false, "expCompensation": 39, "expManual": {"exposureTimeUs": 25042, "frameDurationUs": 25042, "sensitivityIso": 4155921612}, "frameSyncMode": 0, "lensPosAutoInfinity": 224, "lensPosAutoMacro": 84, "lensPosition": 0, "lensPositionRaw": 0.0, "lowPowerNumFramesBurst": 0, "lowPowerNumFramesDiscard": 0, "lumaDenoise": 0, "miscControls": [], "saturation": -34, "sceneMode": 84, "sharpness": 0, "strobeConfig": {"activeLevel": 8, "enable": 0, "gpioNumber": 91}, "strobeTimings": {"durationUs": 16, "exposureBeginOffsetUs": 25042, "exposureEndOffsetUs": 17}, "wbColorTemp": 0}, "isp3aFps": 0, "mockIspHeight": -1, "mockIspWidth": -1, "numFramesPoolIsp": 3, "numFramesPoolPreview": 4, "numFramesPoolRaw": 3, "numFramesPoolStill": 4, "numFramesPoolVideo": 4, "outputRequests": [{"enableUndistortion": null, "fps": {"value": null}, "resizeMode": 0, "size": {"value": {"index": 0, "value": [416, 416]}}, "type": 8}, {"enableUndistortion": null, "fps": {"value": null}, "resizeMode": 0, "size": {"value": {"index": 0, "value": [640, 480]}}, "type": 22}], "resolutionHeight": -1, "resolutionWidth": -1}}]]}}
```

## Source code

#### Python

```python
#!/usr/bin/env python3

import depthai as dai

class ImgDetectionsExtended(dai.ImgDetections):
    def __init__(self, detections: dai.ImgDetections):
        dai.ImgDetections.__init__(self)
        self.detections = detections.detections

    # The function can return dai.ImgAnnotations or dai.ImgFrame
    def getVisualizationMessage(self):
        detections = self.detections
        imgAnnt = dai.ImgAnnotations()
        # Setting the timestamp is important, as the visualizer uses it to synchronize the data
        imgAnnt.setTimestamp(self.getTimestamp())
        annotation = dai.ImgAnnotation()
        for detection in detections:
            pointsAnnotation = dai.PointsAnnotation()
            pointsAnnotation.type = dai.PointsAnnotationType.LINE_STRIP
            pointsAnnotation.points = dai.VectorPoint2f([
                dai.Point2f(detection.xmin, detection.ymin),
                dai.Point2f(detection.xmax, detection.ymin),
                dai.Point2f(detection.xmax, detection.ymax),
                dai.Point2f(detection.xmin, detection.ymax),
            ])
            outlineColor = dai.Color(1.0, 0.5, 0.5, 1.0)
            pointsAnnotation.outlineColor = outlineColor
            fillColor = dai.Color(0.5, 1.0, 0.5, 0.5)
            pointsAnnotation.fillColor = fillColor
            pointsAnnotation.thickness = 2.0
            text = dai.TextAnnotation()
            text.position = dai.Point2f(detection.xmin, detection.ymin)
            text.text = f"Test annotation"
            text.fontSize = 50.5
            textColor = dai.Color(0.5, 0.5, 1.0, 1.0)
            text.textColor = textColor
            backgroundColor = dai.Color(1.0, 1.0, 0.5, 1.0)
            text.backgroundColor = backgroundColor
            annotation.points.append(pointsAnnotation)
            annotation.texts.append(text)

        imgAnnt.annotations.append(annotation)
        return imgAnnt

class ImgAnnotationsGenerator(dai.node.ThreadedHostNode):
    def __init__(self):
        super().__init__()
        self.inputDet = self.createInput()
        self.output = self.createOutput()

    def run(self):
        while self.isRunning():
            nnData = self.inputDet.get()
            extended = ImgDetectionsExtended(nnData)
            # Setting the timestamp is important, as the visualizer uses it to synchronize the data
            extended.setTimestamp(nnData.getTimestamp())
            self.output.send(extended)

remoteConnector = dai.RemoteConnection()

# Create pipeline
with dai.Pipeline() as pipeline:
    cameraNode = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_A)
    detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(
        cameraNode, dai.NNModelDescription("yolov6-nano")
    )
    imageAnnotationsGenerator = pipeline.create(ImgAnnotationsGenerator)
    outputToVisualize = cameraNode.requestOutput((640,480), type=dai.ImgFrame.Type.NV12)
    detectionNetwork.out.link(imageAnnotationsGenerator.inputDet)

    # Add the remote connector topics
    remoteConnector.addTopic("encoded", outputToVisualize, "images")
    remoteConnector.addTopic("detections", detectionNetwork.out, "images")
    remoteConnector.addTopic("annotations", imageAnnotationsGenerator.output, "images")

    pipeline.start()

    # Register the pipeline with the remote connector
    remoteConnector.registerPipeline(pipeline)

    while pipeline.isRunning():
        if remoteConnector.waitKey(1) == ord("q"):
            pipeline.stop()
            break
```

#### C++

```cpp
#include <atomic>
#include <csignal>
#include <iostream>
#include <memory>
#include <vector>

#include "depthai/depthai.hpp"
#include "depthai/pipeline/datatype/ImgAnnotations.hpp"
#include "depthai/remote_connection/RemoteConnection.hpp"
// Global flag for graceful shutdown
std::atomic<bool> quitEvent(false);

// Signal handler
void signalHandler(int signum) {
    quitEvent = true;
}

// Extended detections class
class ImgDetectionsExtended : public dai::ImgDetections {
   public:
    ImgDetectionsExtended(const std::shared_ptr<dai::ImgDetections>& detections) {
        this->detections = detections->detections;
        this->setTimestamp(detections->getTimestamp());
    }

    std::shared_ptr<dai::Buffer> getVisualizationMessage() {
        auto imgAnnt = std::make_shared<dai::ImgAnnotations>();
        imgAnnt->setTimestamp(this->getTimestamp());

        auto annotation = std::make_shared<dai::ImgAnnotation>();

        for(const auto& detection : this->detections) {
            // Create points annotation for bounding box
            auto pointsAnnotation = std::make_shared<dai::PointsAnnotation>();
            pointsAnnotation->type = dai::PointsAnnotationType::LINE_STRIP;
            pointsAnnotation->points = {dai::Point2f(detection.xmin, detection.ymin),
                                        dai::Point2f(detection.xmax, detection.ymin),
                                        dai::Point2f(detection.xmax, detection.ymax),
                                        dai::Point2f(detection.xmin, detection.ymax)};

            // Set colors and thickness
            pointsAnnotation->outlineColor = dai::Color(1.0f, 0.5f, 0.5f, 1.0f);
            pointsAnnotation->fillColor = dai::Color(0.5f, 1.0f, 0.5f, 0.5f);
            pointsAnnotation->thickness = 2.0f;

            // Create text annotation
            auto text = std::make_shared<dai::TextAnnotation>();
            text->position = dai::Point2f(detection.xmin, detection.ymin);
            text->text = "Test annotation";
            text->fontSize = 50.5f;
            text->textColor = dai::Color(0.5f, 0.5f, 1.0f, 1.0f);
            text->backgroundColor = dai::Color(1.0f, 1.0f, 0.5f, 1.0f);

            annotation->points.push_back(*pointsAnnotation);
            annotation->texts.push_back(*text);
        }

        imgAnnt->annotations.push_back(*annotation);
        return imgAnnt;
    }
};

// Custom host node for image annotations
class ImgAnnotationsGenerator : public dai::NodeCRTP<dai::node::HostNode, ImgAnnotationsGenerator> {
   public:
    Input& inputDet = inputs["detections"];
    Output& output = out;

    std::shared_ptr<ImgAnnotationsGenerator> build(Output& detections) {
        detections.link(inputDet);
        return std::static_pointer_cast<ImgAnnotationsGenerator>(this->shared_from_this());
    }

    std::shared_ptr<dai::Buffer> processGroup(std::shared_ptr<dai::MessageGroup> in) override {
        auto nnData = in->get<dai::ImgDetections>("detections");
        auto extended = std::make_shared<ImgDetectionsExtended>(nnData);
        return extended->getVisualizationMessage();
    }
};

int main() {
    // Set up signal handlers
    signal(SIGTERM, signalHandler);
    signal(SIGINT, signalHandler);

    try {
        // Create remote connection
        dai::RemoteConnection remoteConnector;

        // Create pipeline
        dai::Pipeline pipeline;

        // Create nodes
        auto cameraNode = pipeline.create<dai::node::Camera>();
        cameraNode->build(dai::CameraBoardSocket::CAM_A);

        auto detectionNetwork = pipeline.create<dai::node::DetectionNetwork>();
        dai::NNModelDescription modelDesc;
        modelDesc.model = "yolov6-nano";
        detectionNetwork->build(cameraNode, modelDesc);

        auto imageAnnotationsGenerator = pipeline.create<ImgAnnotationsGenerator>();
        auto outputToVisualize = cameraNode->requestOutput(std::make_pair(640, 480), dai::ImgFrame::Type::NV12);

        // Linking
        detectionNetwork->out.link(imageAnnotationsGenerator->inputDet);

        // Add remote connector topics
        remoteConnector.addTopic("encoded", *outputToVisualize, "images");
        remoteConnector.addTopic("detections", detectionNetwork->out, "images");
        remoteConnector.addTopic("annotations", imageAnnotationsGenerator->output, "images");

        // Start pipeline
        pipeline.start();

        // Register pipeline with remote connector
        remoteConnector.registerPipeline(pipeline);

        // Main loop
        while(pipeline.isRunning() && !quitEvent) {
            if(remoteConnector.waitKey(1) == 'q') {
                pipeline.stop();
                break;
            }
        }

        // Cleanup
        pipeline.stop();
        pipeline.wait();

    } catch(const std::exception& e) {
        std::cerr << "Error: " << e.what() << std::endl;
        return 1;
    }

    return 0;
}
```

### Need assistance?

Head over to [Discussion Forum](https://discuss.luxonis.com/) for technical support or any other questions you might have.
