Mono & MobilenetSSD¶
This example shows how to run MobileNetv2SSD on the right grayscale camera and how to display the neural network results on a preview of the right camera stream.
Similar samples:
Demo¶
Setup¶
Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the script
git clone https://github.com/luxonis/depthai-python.git
cd depthai-python/examples
python3 install_requirements.py
For additional information, please follow installation guide
This example script requires external file(s) to run. If you are using:
depthai-python, run
python3 examples/install_requirements.py
to download required file(s)dephtai-core, required file(s) will get downloaded automatically when building the example
Source code¶
Also available on GitHub
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | #!/usr/bin/env python3 from pathlib import Path import sys import cv2 import depthai as dai import numpy as np # Get argument first nnPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute()) if len(sys.argv) > 1: nnPath = sys.argv[1] if not Path(nnPath).exists(): import sys raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"') # MobilenetSSD label texts labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] # Create pipeline pipeline = dai.Pipeline() # Define sources and outputs monoRight = pipeline.create(dai.node.MonoCamera) manip = pipeline.create(dai.node.ImageManip) nn = pipeline.create(dai.node.MobileNetDetectionNetwork) manipOut = pipeline.create(dai.node.XLinkOut) nnOut = pipeline.create(dai.node.XLinkOut) manipOut.setStreamName("right") nnOut.setStreamName("nn") # Properties monoRight.setCamera("right") monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_720_P) # Convert the grayscale frame into the nn-acceptable form manip.initialConfig.setResize(300, 300) # The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case) manip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p) nn.setConfidenceThreshold(0.5) nn.setBlobPath(nnPath) nn.setNumInferenceThreads(2) nn.input.setBlocking(False) # Linking monoRight.out.link(manip.inputImage) manip.out.link(nn.input) manip.out.link(manipOut.input) nn.out.link(nnOut.input) # Connect to device and start pipeline with dai.Device(pipeline) as device: # Output queues will be used to get the grayscale frames and nn data from the outputs defined above qRight = device.getOutputQueue("right", maxSize=4, blocking=False) qDet = device.getOutputQueue("nn", maxSize=4, blocking=False) frame = None detections = [] # nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height def frameNorm(frame, bbox): normVals = np.full(len(bbox), frame.shape[0]) normVals[::2] = frame.shape[1] return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int) def displayFrame(name, frame): color = (255, 0, 0) for detection in detections: bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax)) cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) # Show the frame cv2.imshow(name, frame) while True: # Instead of get (blocking), we use tryGet (non-blocking) which will return the available data or None otherwise inRight = qRight.tryGet() inDet = qDet.tryGet() if inRight is not None: frame = inRight.getCvFrame() if inDet is not None: detections = inDet.detections if frame is not None: displayFrame("right", frame) if cv2.waitKey(1) == ord('q'): break |
Also available on GitHub
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | #include <iostream> #include "utility.hpp" // Includes common necessary includes for development using depthai library #include "depthai/depthai.hpp" // MobilenetSSD label texts static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; int main(int argc, char** argv) { using namespace std; // Default blob path provided by Hunter private data download // Applicable for easier example usage only std::string nnPath(BLOB_PATH); // If path to blob specified, use that if(argc > 1) { nnPath = std::string(argv[1]); } // Print which blob we are using printf("Using blob at path: %s\n", nnPath.c_str()); // Create pipeline dai::Pipeline pipeline; // Define sources and outputs auto monoRight = pipeline.create<dai::node::MonoCamera>(); auto manip = pipeline.create<dai::node::ImageManip>(); auto nn = pipeline.create<dai::node::MobileNetDetectionNetwork>(); auto manipOut = pipeline.create<dai::node::XLinkOut>(); auto nnOut = pipeline.create<dai::node::XLinkOut>(); manipOut->setStreamName("right"); nnOut->setStreamName("nn"); // Properties monoRight->setCamera("right"); monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_720_P); // Convert the grayscale frame into the nn-acceptable form manip->initialConfig.setResize(300, 300); // The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case) manip->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p); nn->setConfidenceThreshold(0.5); nn->setBlobPath(nnPath); nn->setNumInferenceThreads(2); nn->input.setBlocking(false); // Linking monoRight->out.link(manip->inputImage); manip->out.link(nn->input); manip->out.link(manipOut->input); nn->out.link(nnOut->input); // Connect to device and start pipeline dai::Device device(pipeline); // Output queues will be used to get the grayscale frames and nn data from the outputs defined above auto qRight = device.getOutputQueue("right", 4, false); auto qDet = device.getOutputQueue("nn", 4, false); cv::Mat frame; std::vector<dai::ImgDetection> detections; // Add bounding boxes and text to the frame and show it to the user auto displayFrame = [](std::string name, cv::Mat frame, std::vector<dai::ImgDetection>& detections) { auto color = cv::Scalar(255, 0, 0); // nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height for(auto& detection : detections) { int x1 = detection.xmin * frame.cols; int y1 = detection.ymin * frame.rows; int x2 = detection.xmax * frame.cols; int y2 = detection.ymax * frame.rows; uint32_t labelIndex = detection.label; std::string labelStr = to_string(labelIndex); if(labelIndex < labelMap.size()) { labelStr = labelMap[labelIndex]; } cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color); std::stringstream confStr; confStr << std::fixed << std::setprecision(2) << detection.confidence * 100; cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color); cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX); } // Show the frame cv::imshow(name, frame); }; while(true) { // Instead of get (blocking), we use tryGet (non-blocking) which will return the available data or None otherwise auto inRight = qRight->tryGet<dai::ImgFrame>(); auto inDet = qDet->tryGet<dai::ImgDetections>(); if(inRight) { frame = inRight->getCvFrame(); } if(inDet) { detections = inDet->detections; } if(!frame.empty()) { displayFrame("right", frame, detections); } int key = cv::waitKey(1); if(key == 'q' || key == 'Q') return 0; } return 0; } |