# Mono & MobilenetSSD

This example shows how to run MobileNetv2SSD on the right grayscale camera and how to display the neural network results on a
preview of the right camera stream.

### Similar samples:

 * [RGB & MobilenetSSD](https://docs.luxonis.com/software/depthai/examples/rgb_mobilenet.md)
 * [RGB & MobileNetSSD @ 4K](https://docs.luxonis.com/software/depthai/examples/rgb_mobilenet_4k.md)
 * [Video & MobilenetSSD](https://docs.luxonis.com/software/depthai/examples/video_mobilenet.md)
 * [# Mono & MobilenetSSD & Depth](https://docs.luxonis.com/software/depthai/examples/mono_depth_mobilenetssd.md)

## Demo

## Setup

Please run the [install script](https://github.com/luxonis/depthai-python/blob/main/examples/install_requirements.py) to download
all required dependencies. Please note that this script must be ran from git context, so you have to download the
[depthai-python](https://github.com/luxonis/depthai-python) repository first and then run the script

```bash
git clone https://github.com/luxonis/depthai-python.git
cd depthai-python/examples
python3 install_requirements.py
```

For additional information, please follow the [installation guide](https://docs.luxonis.com/software/depthai/manual-install.md).

## Source code

#### Python

```python
#!/usr/bin/env python3

from pathlib import Path
import sys
import cv2
import depthai as dai
import numpy as np

# Get argument first
nnPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
if len(sys.argv) > 1:
    nnPath = sys.argv[1]

if not Path(nnPath).exists():
    import sys
    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')

# MobilenetSSD label texts
labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
            "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

# Create pipeline
pipeline = dai.Pipeline()

# Define sources and outputs
monoRight = pipeline.create(dai.node.MonoCamera)
manip = pipeline.create(dai.node.ImageManip)
nn = pipeline.create(dai.node.MobileNetDetectionNetwork)
manipOut = pipeline.create(dai.node.XLinkOut)
nnOut = pipeline.create(dai.node.XLinkOut)

manipOut.setStreamName("right")
nnOut.setStreamName("nn")

# Properties
monoRight.setCamera("right")
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_720_P)

# Convert the grayscale frame into the nn-acceptable form
manip.initialConfig.setResize(300, 300)
# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
manip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p)

nn.setConfidenceThreshold(0.5)
nn.setBlobPath(nnPath)
nn.setNumInferenceThreads(2)
nn.input.setBlocking(False)

# Linking
monoRight.out.link(manip.inputImage)
manip.out.link(nn.input)
manip.out.link(manipOut.input)
nn.out.link(nnOut.input)

# Connect to device and start pipeline
with dai.Device(pipeline) as device:

    # Output queues will be used to get the grayscale frames and nn data from the outputs defined above
    qRight = device.getOutputQueue("right", maxSize=4, blocking=False)
    qDet = device.getOutputQueue("nn", maxSize=4, blocking=False)

    frame = None
    detections = []

    # nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
    def frameNorm(frame, bbox):
        normVals = np.full(len(bbox), frame.shape[0])
        normVals[::2] = frame.shape[1]
        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)

    def displayFrame(name, frame):
        color = (255, 0, 0)
        for detection in detections:
            bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
            cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
            cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
            cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
        # Show the frame
        cv2.imshow(name, frame)

    while True:
        # Instead of get (blocking), we use tryGet (non-blocking) which will return the available data or None otherwise
        inRight = qRight.tryGet()
        inDet = qDet.tryGet()

        if inRight is not None:
            frame = inRight.getCvFrame()

        if inDet is not None:
            detections = inDet.detections

        if frame is not None:
            displayFrame("right", frame)

        if cv2.waitKey(1) == ord('q'):
            break
```

#### C++

```cpp
#include <iostream>

#include "utility.hpp"

// Includes common necessary includes for development using depthai library
#include "depthai/depthai.hpp"

// MobilenetSSD label texts
static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle",     "bird",  "boat",        "bottle", "bus",
                                                  "car",        "cat",       "chair",       "cow",   "diningtable", "dog",    "horse",
                                                  "motorbike",  "person",    "pottedplant", "sheep", "sofa",        "train",  "tvmonitor"};

int main(int argc, char** argv) {
    using namespace std;
    // Default blob path provided by Hunter private data download
    // Applicable for easier example usage only
    std::string nnPath(BLOB_PATH);

    // If path to blob specified, use that
    if(argc > 1) {
        nnPath = std::string(argv[1]);
    }

    // Print which blob we are using
    printf("Using blob at path: %s\n", nnPath.c_str());

    // Create pipeline
    dai::Pipeline pipeline;

    // Define sources and outputs
    auto monoRight = pipeline.create<dai::node::MonoCamera>();
    auto manip = pipeline.create<dai::node::ImageManip>();
    auto nn = pipeline.create<dai::node::MobileNetDetectionNetwork>();
    auto manipOut = pipeline.create<dai::node::XLinkOut>();
    auto nnOut = pipeline.create<dai::node::XLinkOut>();

    manipOut->setStreamName("right");
    nnOut->setStreamName("nn");

    // Properties
    monoRight->setCamera("right");
    monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_720_P);

    // Convert the grayscale frame into the nn-acceptable form
    manip->initialConfig.setResize(300, 300);
    // The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
    manip->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p);

    nn->setConfidenceThreshold(0.5);
    nn->setBlobPath(nnPath);
    nn->setNumInferenceThreads(2);
    nn->input.setBlocking(false);

    // Linking
    monoRight->out.link(manip->inputImage);
    manip->out.link(nn->input);
    manip->out.link(manipOut->input);
    nn->out.link(nnOut->input);

    // Connect to device and start pipeline
    dai::Device device(pipeline);

    // Output queues will be used to get the grayscale frames and nn data from the outputs defined above
    auto qRight = device.getOutputQueue("right", 4, false);
    auto qDet = device.getOutputQueue("nn", 4, false);

    cv::Mat frame;
    std::vector<dai::ImgDetection> detections;

    // Add bounding boxes and text to the frame and show it to the user
    auto displayFrame = [](std::string name, cv::Mat frame, std::vector<dai::ImgDetection>& detections) {
        auto color = cv::Scalar(255, 0, 0);
        // nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
        for(auto& detection : detections) {
            int x1 = detection.xmin * frame.cols;
            int y1 = detection.ymin * frame.rows;
            int x2 = detection.xmax * frame.cols;
            int y2 = detection.ymax * frame.rows;

            uint32_t labelIndex = detection.label;
            std::string labelStr = to_string(labelIndex);
            if(labelIndex < labelMap.size()) {
                labelStr = labelMap[labelIndex];
            }
            cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
            std::stringstream confStr;
            confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
            cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
            cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
        }
        // Show the frame
        cv::imshow(name, frame);
    };

    while(true) {
        // Instead of get (blocking), we use tryGet (non-blocking) which will return the available data or None otherwise
        auto inRight = qRight->tryGet<dai::ImgFrame>();
        auto inDet = qDet->tryGet<dai::ImgDetections>();

        if(inRight) {
            frame = inRight->getCvFrame();
        }

        if(inDet) {
            detections = inDet->detections;
        }

        if(!frame.empty()) {
            displayFrame("right", frame, detections);
        }

        int key = cv::waitKey(1);
        if(key == 'q' || key == 'Q') return 0;
    }
    return 0;
}
```

## Pipeline

### examples/mono_mobilenet.pipeline.json

```json
{
  "pipeline": {
    "connections": [
      {
        "node1Id": 0,
        "node1Output": "out",
        "node1OutputGroup": "",
        "node2Id": 1,
        "node2Input": "inputImage",
        "node2InputGroup": ""
      },
      {
        "node1Id": 1,
        "node1Output": "out",
        "node1OutputGroup": "",
        "node2Id": 2,
        "node2Input": "in",
        "node2InputGroup": ""
      },
      {
        "node1Id": 1,
        "node1Output": "out",
        "node1OutputGroup": "",
        "node2Id": 3,
        "node2Input": "in",
        "node2InputGroup": ""
      },
      {
        "node1Id": 2,
        "node1Output": "out",
        "node1OutputGroup": "",
        "node2Id": 4,
        "node2Input": "in",
        "node2InputGroup": ""
      }
    ],
    "globalProperties": {
      "calibData": null,
      "cameraTuningBlobSize": null,
      "cameraTuningBlobUri": "",
      "leonCssFrequencyHz": 700000000.0,
      "leonMssFrequencyHz": 700000000.0,
      "pipelineName": null,
      "pipelineVersion": null,
      "sippBufferSize": 18432,
      "sippDmaBufferSize": 16384,
      "xlinkChunkSize": -1
    },
    "nodes": [
      [
        0,
        {
          "id": 0,
          "ioInfo": [
            [
              [
                "",
                "inputControl"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 1,
                "name": "inputControl",
                "queueSize": 8,
                "type": 3,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "out"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 2,
                "name": "out",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "raw"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 3,
                "name": "raw",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "frameEvent"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 4,
                "name": "frameEvent",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "name": "MonoCamera",
          "properties": {
            "boardSocket": -1,
            "cameraName": "right",
            "fps": 30.0,
            "imageOrientation": -1,
            "initialControl": {
              "aeLockMode": false,
              "aeMaxExposureTimeUs": 0,
              "aeRegion": {
                "height": 0,
                "priority": 0,
                "width": 0,
                "x": 0,
                "y": 0
              },
              "afRegion": {
                "height": 0,
                "priority": 0,
                "width": 0,
                "x": 0,
                "y": 0
              },
              "antiBandingMode": 0,
              "autoFocusMode": 3,
              "awbLockMode": false,
              "awbMode": 0,
              "brightness": 0,
              "captureIntent": 0,
              "chromaDenoise": 0,
              "cmdMask": 0,
              "contrast": 0,
              "controlMode": 0,
              "effectMode": 0,
              "expCompensation": 0,
              "expManual": {
                "exposureTimeUs": 0,
                "frameDurationUs": 0,
                "sensitivityIso": 0
              },
              "frameSyncMode": 0,
              "lensPosAutoInfinity": 0,
              "lensPosAutoMacro": 0,
              "lensPosition": 0,
              "lensPositionRaw": 0.0,
              "lowPowerNumFramesBurst": 0,
              "lowPowerNumFramesDiscard": 0,
              "lumaDenoise": 0,
              "saturation": 0,
              "sceneMode": 0,
              "sharpness": 0,
              "strobeConfig": {
                "activeLevel": 0,
                "enable": 0,
                "gpioNumber": 0
              },
              "strobeTimings": {
                "durationUs": 0,
                "exposureBeginOffsetUs": 0,
                "exposureEndOffsetUs": 0
              },
              "wbColorTemp": 0
            },
            "isp3aFps": 0,
            "numFramesPool": 3,
            "numFramesPoolRaw": 3,
            "rawPacked": null,
            "resolution": 0
          }
        }
      ],
      [
        1,
        {
          "id": 1,
          "ioInfo": [
            [
              [
                "",
                "inputConfig"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 5,
                "name": "inputConfig",
                "queueSize": 8,
                "type": 3,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "inputImage"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 6,
                "name": "inputImage",
                "queueSize": 8,
                "type": 3,
                "waitForMessage": true
              }
            ],
            [
              [
                "",
                "out"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 7,
                "name": "out",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "name": "ImageManip",
          "properties": {
            "initialConfig": {
              "cropConfig": {
                "cropRatio": 1.0,
                "cropRect": {
                  "xmax": 0.0,
                  "xmin": 0.0,
                  "ymax": 0.0,
                  "ymin": 0.0
                },
                "cropRotatedRect": {
                  "angle": 0.0,
                  "center": {
                    "x": 0.0,
                    "y": 0.0
                  },
                  "size": {
                    "height": 0.0,
                    "width": 0.0
                  }
                },
                "enableCenterCropRectangle": false,
                "enableRotatedRect": false,
                "normalizedCoords": true,
                "widthHeightAspectRatio": 1.0
              },
              "enableCrop": false,
              "enableFormat": true,
              "enableResize": true,
              "formatConfig": {
                "colormap": 0,
                "colormapMax": 255,
                "colormapMin": 0,
                "flipHorizontal": false,
                "flipVertical": false,
                "type": 8
              },
              "interpolation": -1,
              "resizeConfig": {
                "bgBlue": 0,
                "bgGreen": 0,
                "bgRed": 0,
                "enableRotation": false,
                "enableWarp4pt": false,
                "enableWarpMatrix": false,
                "height": 300,
                "keepAspectRatio": true,
                "lockAspectRatioFill": false,
                "normalizedCoords": true,
                "rotationAngleDeg": 0.0,
                "warpBorderReplicate": false,
                "warpFourPoints": [],
                "warpMatrix3x3": [],
                "width": 300
              },
              "reusePreviousImage": false,
              "skipCurrentImage": false
            },
            "meshHeight": 0,
            "meshUri": "",
            "meshWidth": 0,
            "numFramesPool": 4,
            "outputFrameSize": 1048576
          }
        }
      ],
      [
        2,
        {
          "id": 2,
          "ioInfo": [
            [
              [
                "",
                "in"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 8,
                "name": "in",
                "queueSize": 5,
                "type": 3,
                "waitForMessage": true
              }
            ],
            [
              [
                "",
                "out"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 9,
                "name": "out",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ],
            [
              [
                "",
                "passthrough"
              ],
              {
                "blocking": false,
                "group": "",
                "id": 10,
                "name": "passthrough",
                "queueSize": 8,
                "type": 0,
                "waitForMessage": false
              }
            ]
          ],
          "name": "DetectionNetwork",
          "properties": {
            "blobSize": 14499200,
            "blobUri": "asset:__blob",
            "numFrames": 8,
            "numNCEPerThread": 0,
            "numThreads": 2,
            "parser": {
              "anchorMasks": {},
              "anchors": [],
              "classes": 0,
              "confidenceThreshold": 0.5,
              "coordinates": 0,
              "iouThreshold": 0.0,
              "nnFamily": 1
            }
          }
        }
      ],
      [
        3,
        {
          "id": 3,
          "ioInfo": [
            [
              [
                "",
                "in"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 11,
                "name": "in",
                "queueSize": 8,
                "type": 3,
                "waitForMessage": true
              }
            ]
          ],
          "name": "XLinkOut",
          "properties": {
            "maxFpsLimit": -1.0,
            "metadataOnly": false,
            "streamName": "right"
          }
        }
      ],
      [
        4,
        {
          "id": 4,
          "ioInfo": [
            [
              [
                "",
                "in"
              ],
              {
                "blocking": true,
                "group": "",
                "id": 12,
                "name": "in",
                "queueSize": 8,
                "type": 3,
                "waitForMessage": true
              }
            ]
          ],
          "name": "XLinkOut",
          "properties": {
            "maxFpsLimit": -1.0,
            "metadataOnly": false,
            "streamName": "nn"
          }
        }
      ]
    ]
  }
}
```

### Need assistance?

Head over to [Discussion Forum](https://discuss.luxonis.com/) for technical support or any other questions you might have.
