24 #define uint64 uint64_t
25 #include <opencv2/core/ocl.hpp>
28 #include "objdetectdata.pb.h"
29 #include <google/protobuf/util/time_util.h>
33 using google::protobuf::util::TimeUtil;
37 bool LooksLikeTransposedYoloOutput(
const cv::Mat& out,
size_t classCount)
42 return out.dims == 3 && out.size[0] == 1 && out.size[1] >= 4 &&
43 out.size[2] > out.size[1] &&
44 (classCount == 0 || out.size[1] >= 4 +
static_cast<int>(classCount));
47 cv::Rect ScaledXYWHBox(
52 const cv::Size& frameDims,
56 if (centerX <= 1.0f && centerY <= 1.0f && width <= 1.0f && height <= 1.0f) {
57 centerX *=
static_cast<float>(frameDims.width);
58 width *=
static_cast<float>(frameDims.width);
59 centerY *=
static_cast<float>(frameDims.height);
60 height *=
static_cast<float>(frameDims.height);
62 const float xFactor =
static_cast<float>(frameDims.width) /
static_cast<float>(inputWidth);
63 const float yFactor =
static_cast<float>(frameDims.height) /
static_cast<float>(inputHeight);
70 float left = centerX - width / 2.0f;
71 float top = centerY - height / 2.0f;
72 float right = centerX + width / 2.0f;
73 float bottom = centerY + height / 2.0f;
75 left = std::max(0.0f, std::min(left,
static_cast<float>(frameDims.width - 1)));
76 top = std::max(0.0f, std::min(top,
static_cast<float>(frameDims.height - 1)));
77 right = std::max(0.0f, std::min(right,
static_cast<float>(frameDims.width)));
78 bottom = std::max(0.0f, std::min(bottom,
static_cast<float>(frameDims.height)));
81 static_cast<int>(left),
82 static_cast<int>(top),
83 std::max(0,
static_cast<int>(right - left)),
84 std::max(0,
static_cast<int>(bottom - top)));
87 std::vector<uint32_t> EncodeBinaryMaskRLE(
const std::vector<uint8_t>& mask)
89 std::vector<uint32_t> rle;
95 for (uint8_t value : mask) {
96 value = value ? 1 : 0;
97 if (value == current) {
100 rle.push_back(count);
105 rle.push_back(count);
111 cv::Mat image(mask.
height, mask.
width, CV_8UC1, cv::Scalar(0));
118 uint8_t* data = image.ptr<uint8_t>();
119 for (uint32_t count : mask.
rle) {
120 const int end = std::min(total, offset +
static_cast<int>(count));
122 std::fill(data + offset, data + end,
static_cast<uint8_t
>(1));
134 const cv::Rect_<float>& sourceBox,
135 const cv::Rect_<float>& targetBox,
136 const cv::Size& frameDims)
139 if (!sourceMask.
HasData() || sourceBox.width <= 0.0f || sourceBox.height <= 0.0f ||
140 targetBox.width <= 0.0f || targetBox.height <= 0.0f ||
141 frameDims.width <= 0 || frameDims.height <= 0) {
145 const float scaleX = sourceMask.
width /
static_cast<float>(frameDims.width);
146 const float scaleY = sourceMask.
height /
static_cast<float>(frameDims.height);
147 const cv::Rect_<float> sourceMaskBox(
148 sourceBox.x * scaleX,
149 sourceBox.y * scaleY,
150 sourceBox.width * scaleX,
151 sourceBox.height * scaleY);
152 const cv::Rect_<float> targetMaskBox(
153 targetBox.x * scaleX,
154 targetBox.y * scaleY,
155 targetBox.width * scaleX,
156 targetBox.height * scaleY);
157 if (sourceMaskBox.width <= 0.0f || sourceMaskBox.height <= 0.0f)
160 const double xScale = targetMaskBox.
width / sourceMaskBox.width;
161 const double yScale = targetMaskBox.height / sourceMaskBox.height;
162 cv::Mat transform = (cv::Mat_<double>(2, 3) <<
163 xScale, 0.0, targetMaskBox.x - xScale * sourceMaskBox.x,
164 0.0, yScale, targetMaskBox.y - yScale * sourceMaskBox.y);
166 cv::Mat source = DecodeBinaryMaskRLE(sourceMask);
169 source, transformed, transform, source.size(),
170 cv::INTER_NEAREST, cv::BORDER_CONSTANT, cv::Scalar(0));
171 if (cv::countNonZero(transformed) == 0)
176 result.
rle = EncodeBinaryMaskRLE(
177 std::vector<uint8_t>(transformed.data, transformed.data + transformed.total()));
182 const cv::Mat& prototype,
183 const std::vector<float>& coefficients,
185 const cv::Size& frameDims)
188 if (prototype.dims != 4 || prototype.size[0] != 1 ||
189 prototype.size[1] !=
static_cast<int>(coefficients.size()))
192 const int channels = prototype.size[1];
193 const int maskHeight = prototype.size[2];
194 const int maskWidth = prototype.size[3];
195 const int maskPixels = maskWidth * maskHeight;
196 const float* protoData =
reinterpret_cast<const float*
>(prototype.data);
198 const int left = std::max(0,
static_cast<int>(box.x * maskWidth /
static_cast<float>(frameDims.width)));
199 const int top = std::max(0,
static_cast<int>(box.y * maskHeight /
static_cast<float>(frameDims.height)));
200 const int right = std::min(maskWidth,
static_cast<int>((box.x + box.width) * maskWidth /
static_cast<float>(frameDims.width)));
201 const int bottom = std::min(maskHeight,
static_cast<int>((box.y + box.height) * maskHeight /
static_cast<float>(frameDims.height)));
202 if (left >= right || top >= bottom)
205 std::vector<uint8_t> binary(maskPixels, 0);
206 for (
int y = top; y < bottom; ++y) {
207 for (
int x = left; x < right; ++x) {
208 const int pixel = y * maskWidth + x;
210 for (
int channel = 0; channel < channels; ++channel) {
211 value += coefficients[channel] * protoData[channel * maskPixels + pixel];
213 binary[pixel] = value > 0.0f ? 1 : 0;
217 result.
width = maskWidth;
218 result.
height = maskHeight;
219 result.
rle = EncodeBinaryMaskRLE(binary);
223 std::string LoadONNXModel(std::string modelPath, cv::dnn::Net *net)
225 #if CV_VERSION_MAJOR < 4 || (CV_VERSION_MAJOR == 4 && CV_VERSION_MINOR < 3)
226 return std::string(
"Failed to load ONNX model: YOLO requires OpenCV 4.3.0 or newer. "
227 "This OpenCV build is ") + CV_VERSION +
".";
230 cv::dnn::Net loaded_net = cv::dnn::readNetFromONNX(modelPath);
235 }
catch (
const cv::Exception& e) {
236 std::string error_text = std::string(
"Failed to load ONNX model: ") + e.what();
237 if (error_text.find(
"Unsupported data type: FLOAT16") != std::string::npos) {
238 error_text =
"Failed to load ONNX model: FLOAT16 is not supported by this OpenCV build. "
239 "Please use an FP32 ONNX model.";
242 }
catch (
const std::exception& e) {
243 return std::string(
"Failed to load ONNX model: ") + e.what();
245 return "Failed to load ONNX model: unknown error";
253 : processingController(&processingController), processingDevice(
"CPU"), inpWidth(640), inpHeight(640), generateMasks(true){
254 confThreshold = 0.10;
261 return LoadONNXModel(modelPath,
nullptr);
264 void CVObjectDetection::setProcessingDevice(){
265 const std::string requestedDevice = processingDevice;
266 if (processingDevice ==
"CPU") {
267 net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
268 net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
273 if(processingDevice ==
"GPU" || processingDevice ==
"GPU_AUTO" || processingDevice ==
"GPU_CUDA"){
275 const std::vector<cv::dnn::Target> targets = cv::dnn::getAvailableTargets(cv::dnn::DNN_BACKEND_CUDA);
276 if (std::find(targets.begin(), targets.end(), cv::dnn::DNN_TARGET_CUDA) != targets.end()) {
277 net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
278 net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
279 ZmqLogger::Instance()->
Log(
"Object Detection DNN device: requested " + requestedDevice +
", selected CUDA");
282 }
catch (
const cv::Exception&) {
286 if(processingDevice ==
"GPU_OPENCL"){
288 const std::vector<cv::dnn::Target> targets = cv::dnn::getAvailableTargets(cv::dnn::DNN_BACKEND_OPENCV);
289 if (std::find(targets.begin(), targets.end(), cv::dnn::DNN_TARGET_OPENCL) != targets.end()) {
290 cv::ocl::setUseOpenCL(
true);
291 net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
292 net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL);
293 ZmqLogger::Instance()->
Log(
"Object Detection DNN device: requested " + requestedDevice +
", selected OpenCL");
296 }
catch (
const cv::Exception&) {
300 processingDevice =
"CPU";
301 net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
302 net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
303 ZmqLogger::Instance()->
Log(
"Object Detection DNN device: requested " + requestedDevice +
", selected CPU");
309 start = _start; end = _end;
317 processingController->
SetError(
false,
"");
319 if(modelPath.empty()) {
320 processingController->
SetError(
true,
"Missing path to YOLO ONNX model file");
324 if(classesFile.empty()) {
325 processingController->
SetError(
true,
"Missing path to class name file");
330 std::ifstream model_file(modelPath);
331 if(!model_file.good()){
332 processingController->
SetError(
true,
"Incorrect path to YOLO ONNX model file");
336 std::ifstream classes_file(classesFile);
337 if(!classes_file.good()){
338 processingController->
SetError(
true,
"Incorrect path to class name file");
346 while (std::getline(classes_file, line)) classNames.push_back(line);
349 std::string error_text = LoadONNXModel(modelPath, &net);
350 if (!error_text.empty()) {
351 processingController->
SetError(
true, error_text);
355 setProcessingDevice();
358 if(!process_interval || end <= 1 || end-start == 0){
360 start = (int)(video.
Start() * video.
Reader()->info.fps.ToFloat());
361 end = (int)(video.
End() * video.
Reader()->info.fps.ToFloat());
364 for (frame_number = start; frame_number <= end; frame_number++)
371 std::shared_ptr<openshot::Frame> f = video.
GetFrame(frame_number);
374 cv::Mat cvimage = f->GetImageCV();
376 DetectObjects(cvimage, frame_number);
379 processingController->
SetProgress(uint(100*(frame_number-start)/(end-start)));
384 void CVObjectDetection::DetectObjects(
const cv::Mat &frame,
size_t frameId){
389 cv::dnn::blobFromImage(frame, blob, 1/255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(0,0,0),
true,
false);
391 std::vector<cv::Mat> outs;
396 net.forward(outs, getOutputsNames(net));
397 }
catch (
const cv::Exception& e) {
398 processingController->
SetError(
true, std::string(
"Object detection inference failed: ") + e.what());
404 postprocess(frame.size(), outs, frameId);
410 void CVObjectDetection::postprocess(
const cv::Size &frameDims,
const std::vector<cv::Mat>& outs,
size_t frameId)
412 std::vector<int> classIds;
413 std::vector<float> confidences;
414 std::vector<cv::Rect> boxes;
415 std::vector<std::vector<ClassScore>> detectionClassScores;
416 std::vector<CVObjectMaskData> detectionMasks;
417 std::vector<int> objectIds;
418 const int maxClassCandidates = 5;
420 for (
size_t i = 0; i < outs.size(); ++i) {
421 cv::Mat det = outs[i];
423 if (LooksLikeTransposedYoloOutput(det, classNames.size())) {
424 const int attributes = det.size[1];
425 const int candidates = det.size[2];
426 const int classCount = !classNames.empty()
427 ?
static_cast<int>(classNames.size())
429 const int maskCoefficientCount = attributes - 4 - classCount;
430 const cv::Mat* prototype =
nullptr;
431 if (generateMasks && maskCoefficientCount > 0) {
432 auto prototypeIt = std::find_if(outs.begin(), outs.end(),
433 [maskCoefficientCount](
const cv::Mat& out) {
434 return out.dims == 4 && out.size[0] == 1 && out.size[1] == maskCoefficientCount;
436 if (prototypeIt != outs.end()) {
437 prototype = &(*prototypeIt);
440 const float* data =
reinterpret_cast<const float*
>(det.data);
442 for (
int candidateIndex = 0; candidateIndex < candidates; ++candidateIndex) {
443 std::vector<ClassScore> rowClassScores;
444 rowClassScores.reserve(maxClassCandidates);
446 for (
int classIndex = 0; classIndex < classCount; ++classIndex) {
447 const float classConfidence = data[(4 + classIndex) * candidates + candidateIndex];
448 if (rowClassScores.size() <
static_cast<size_t>(maxClassCandidates)) {
449 rowClassScores.emplace_back(classIndex, classConfidence);
450 std::sort(rowClassScores.begin(), rowClassScores.end(),
452 }
else if (classConfidence > rowClassScores.back().score) {
453 rowClassScores.back() =
ClassScore(classIndex, classConfidence);
454 std::sort(rowClassScores.begin(), rowClassScores.end(),
459 if (rowClassScores.empty() || rowClassScores.front().score <= confThreshold) {
463 cv::Rect box = ScaledXYWHBox(
464 data[candidateIndex],
465 data[candidates + candidateIndex],
466 data[2 * candidates + candidateIndex],
467 data[3 * candidates + candidateIndex],
468 frameDims, inpWidth, inpHeight);
469 if (box.width <= 0 || box.height <= 0) {
473 classIds.push_back(rowClassScores.front().classId);
474 confidences.push_back(rowClassScores.front().score);
475 boxes.push_back(box);
476 detectionClassScores.push_back(rowClassScores);
478 std::vector<float> coefficients;
479 coefficients.reserve(maskCoefficientCount);
480 for (
int coefficientIndex = 0; coefficientIndex < maskCoefficientCount; ++coefficientIndex) {
481 coefficients.push_back(data[(4 + classCount + coefficientIndex) * candidates + candidateIndex]);
483 detectionMasks.push_back(BuildMaskFromPrototype(*prototype, coefficients, box, frameDims));
485 detectionMasks.push_back({});
493 det = det.reshape(1, det.size[1]);
495 if (det.dims != 2 || det.cols < 6) {
499 const float xFactor =
static_cast<float>(frameDims.width) /
static_cast<float>(inpWidth);
500 const float yFactor =
static_cast<float>(frameDims.height) /
static_cast<float>(inpHeight);
502 float* data =
reinterpret_cast<float*
>(det.data);
503 for (
int j = 0; j < det.rows; ++j, data += det.cols) {
504 std::vector<ClassScore> rowClassScores;
505 rowClassScores.reserve(maxClassCandidates);
506 int classScoresEnd = det.cols;
507 if (!classNames.empty()) {
508 classScoresEnd = std::min(det.cols, 5 +
static_cast<int>(classNames.size()));
510 for (
int classIndex = 5; classIndex < classScoresEnd; ++classIndex) {
511 const float classConfidence = data[classIndex] * data[4];
512 if (rowClassScores.size() <
static_cast<size_t>(maxClassCandidates)) {
513 rowClassScores.emplace_back(classIndex - 5, classConfidence);
514 std::sort(rowClassScores.begin(), rowClassScores.end(),
516 }
else if (classConfidence > rowClassScores.back().score) {
517 rowClassScores.back() =
ClassScore(classIndex - 5, classConfidence);
518 std::sort(rowClassScores.begin(), rowClassScores.end(),
522 if (rowClassScores.empty()) {
526 float confidence = rowClassScores.front().score;
528 if (confidence > confThreshold) {
534 if (data[0] > 1.0f || data[1] > 1.0f || data[2] > 1.0f || data[3] > 1.0f) {
535 centerX =
static_cast<int>(data[0] * xFactor);
536 centerY =
static_cast<int>(data[1] * yFactor);
537 width =
static_cast<int>(data[2] * xFactor);
538 height =
static_cast<int>(data[3] * yFactor);
540 centerX =
static_cast<int>(data[0] * frameDims.width);
541 centerY =
static_cast<int>(data[1] * frameDims.height);
542 width =
static_cast<int>(data[2] * frameDims.width);
543 height =
static_cast<int>(data[3] * frameDims.height);
546 int left = centerX - width / 2;
547 int top = centerY - height / 2;
549 classIds.push_back(rowClassScores.front().classId);
550 confidences.push_back(confidence);
551 boxes.push_back(cv::Rect(left, top, width, height));
552 detectionClassScores.push_back(rowClassScores);
553 detectionMasks.push_back({});
560 std::vector<int> indices;
561 cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
564 std::vector<cv::Rect> sortBoxes;
565 std::vector<float> sortConfidences;
566 std::vector<int> sortClassIds;
567 std::vector<std::vector<ClassScore>> sortClassScores;
568 std::vector<CVObjectMaskData> sortMasks;
569 for(
auto index : indices) {
570 sortBoxes.push_back(boxes[index]);
571 sortConfidences.push_back(confidences[index]);
572 sortClassIds.push_back(classIds[index]);
573 sortClassScores.push_back(detectionClassScores[index]);
574 sortMasks.push_back(index <
static_cast<int>(detectionMasks.size()) ? detectionMasks[index] :
CVObjectMaskData());
576 sort.
update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), sortConfidences, sortClassIds, sortClassScores);
579 boxes.clear(); confidences.clear(); classIds.clear(); objectIds.clear();
580 std::vector<CVObjectMaskData> masks;
583 if(TBox.frame == frameId){
584 boxes.push_back(TBox.box);
585 confidences.push_back(TBox.confidence);
586 classIds.push_back(TBox.classId);
587 objectIds.push_back(TBox.id);
589 double bestIoU = 0.0;
590 for (
size_t maskIndex = 0; maskIndex < sortMasks.size(); ++maskIndex) {
591 if (!sortMasks[maskIndex].HasData() || sortClassIds[maskIndex] != TBox.classId)
594 if (score > bestIoU) {
596 mask = sortMasks[maskIndex];
602 const auto recentMask = recentObjectMasks.find(TBox.id);
603 if (recentMask != recentObjectMasks.end() &&
604 frameId > recentMask->second.frameId &&
605 frameId - recentMask->second.frameId <= 5) {
606 mask = TransformMaskToBox(
607 recentMask->second.mask,
608 recentMask->second.box,
616 masks.push_back(mask);
621 for(uint i = 0; i<boxes.size(); i++){
622 for(uint j = i+1; j<boxes.size(); j++){
623 int xc_1 = boxes[i].x + (int)(boxes[i].width/2), yc_1 = boxes[i].y + (int)(boxes[i].height/2);
624 int xc_2 = boxes[j].x + (int)(boxes[j].width/2), yc_2 = boxes[j].y + (int)(boxes[j].height/2);
626 if(fabs(xc_1 - xc_2) < 10 && fabs(yc_1 - yc_2) < 10){
627 if(classIds[i] == classIds[j]){
628 if(confidences[i] >= confidences[j]){
629 boxes.erase(boxes.begin() + j);
630 classIds.erase(classIds.begin() + j);
631 confidences.erase(confidences.begin() + j);
632 objectIds.erase(objectIds.begin() + j);
633 masks.erase(masks.begin() + j);
637 boxes.erase(boxes.begin() + i);
638 classIds.erase(classIds.begin() + i);
639 confidences.erase(confidences.begin() + i);
640 objectIds.erase(objectIds.begin() + i);
641 masks.erase(masks.begin() + i);
651 for(uint i = 0; i<boxes.size(); i++){
652 for(uint j = i+1; j<boxes.size(); j++){
654 if( iou(boxes[i], boxes[j])){
655 if(classIds[i] == classIds[j]){
656 if(confidences[i] >= confidences[j]){
657 boxes.erase(boxes.begin() + j);
658 classIds.erase(classIds.begin() + j);
659 confidences.erase(confidences.begin() + j);
660 objectIds.erase(objectIds.begin() + j);
661 masks.erase(masks.begin() + j);
665 boxes.erase(boxes.begin() + i);
666 classIds.erase(classIds.begin() + i);
667 confidences.erase(confidences.begin() + i);
668 objectIds.erase(objectIds.begin() + i);
669 masks.erase(masks.begin() + i);
679 std::vector<cv::Rect_<float>> normalized_boxes;
680 for(
auto box : boxes){
681 cv::Rect_<float> normalized_box;
682 normalized_box.x = (box.x)/(
float)frameDims.
width;
683 normalized_box.y = (box.y)/(
float)frameDims.height;
684 normalized_box.width = (box.width)/(
float)frameDims.width;
685 normalized_box.height = (box.height)/(
float)frameDims.height;
686 normalized_boxes.push_back(normalized_box);
693 bool CVObjectDetection::iou(cv::Rect pred_box, cv::Rect sort_box){
695 int xA = std::max(pred_box.x, sort_box.x);
696 int yA = std::max(pred_box.y, sort_box.y);
697 int xB = std::min(pred_box.x + pred_box.width, sort_box.x + sort_box.width);
698 int yB = std::min(pred_box.y + pred_box.height, sort_box.y + sort_box.height);
701 int interArea = std::max(0, xB - xA + 1) * std::max(0, yB - yA + 1);
704 int boxAArea = (pred_box.width + 1) * (pred_box.height + 1);
705 int boxBArea = (sort_box.width + 1) * (sort_box.height + 1);
708 float iou = interArea / (float)(boxAArea + boxBArea - interArea);
717 std::vector<cv::String> CVObjectDetection::getOutputsNames(
const cv::dnn::Net& net)
720 std::vector<int> outLayers = net.getUnconnectedOutLayers();
723 std::vector<cv::String> layersNames = net.getLayerNames();
726 std::vector<cv::String> names;
727 names.resize(outLayers.size());
728 for (
size_t i = 0; i < outLayers.size(); ++i)
729 names[i] = layersNames[outLayers[i] - 1];
744 void CVObjectDetection::NormalizeTrackedClasses()
746 struct ClassEvidence {
747 float confidenceSum = 0.0f;
751 std::map<int, std::map<int, ClassEvidence>> objectClassEvidence;
754 const size_t detectionCount = std::min(detections.
objectIds.size(), detections.
classIds.size());
755 for (
size_t i = 0; i < detectionCount; ++i) {
757 ClassEvidence& evidence = objectClassEvidence[detections.
objectIds[i]][detections.
classIds[i]];
758 evidence.confidenceSum += confidence;
763 std::map<int, int> dominantClassByObject;
764 for (
const auto& objectEvidence : objectClassEvidence) {
765 const int objectId = objectEvidence.first;
766 int bestClassId = -1;
767 ClassEvidence bestEvidence;
768 for (
const auto& classEvidence : objectEvidence.second) {
769 const int classId = classEvidence.first;
770 const ClassEvidence& evidence = classEvidence.second;
771 if (bestClassId < 0 ||
772 evidence.confidenceSum > bestEvidence.confidenceSum ||
773 (evidence.confidenceSum == bestEvidence.confidenceSum && evidence.count > bestEvidence.count)) {
774 bestClassId = classId;
775 bestEvidence = evidence;
778 if (bestClassId >= 0) {
779 dominantClassByObject[objectId] = bestClassId;
785 const size_t detectionCount = std::min(detections.
objectIds.size(), detections.
classIds.size());
786 for (
size_t i = 0; i < detectionCount; ++i) {
787 const auto dominantClass = dominantClassByObject.find(detections.
objectIds[i]);
788 if (dominantClass != dominantClassByObject.end()) {
789 detections.
classIds[i] = dominantClass->second;
796 if(protobuf_data_path.empty()) {
797 cerr <<
"Missing path to object detection protobuf data file." << endl;
801 NormalizeTrackedClasses();
804 pb_objdetect::ObjDetect objMessage;
807 for(
int i = 0; i<classNames.size(); i++){
808 std::string* className = objMessage.add_classnames();
809 className->assign(classNames.at(i));
819 *objMessage.mutable_last_updated() = TimeUtil::SecondsToTimestamp(time(NULL));
823 std::fstream output(protobuf_data_path, ios::out | ios::trunc | ios::binary);
824 if (!objMessage.SerializeToOstream(&output)) {
825 cerr <<
"Failed to write protobuf message." << endl;
831 google::protobuf::ShutdownProtobufLibrary();
841 pbFrameData->set_id(dData.
frameId);
843 for(
size_t i = 0; i < dData.
boxes.size(); i++){
844 pb_objdetect::Frame_Box* box = pbFrameData->add_bounding_box();
847 box->set_x(dData.
boxes.at(i).x);
848 box->set_y(dData.
boxes.at(i).y);
849 box->set_w(dData.
boxes.at(i).width);
850 box->set_h(dData.
boxes.at(i).height);
851 box->set_classid(dData.
classIds.at(i));
853 box->set_objectid(dData.
objectIds.at(i));
855 if (i < dData.
masks.size() && dData.
masks.at(i).HasData()) {
856 pb_objdetect::Frame_Box_Mask* mask = box->mutable_mask();
857 mask->set_width(dData.
masks.at(i).width);
858 mask->set_height(dData.
masks.at(i).height);
859 for (uint32_t count : dData.
masks.at(i).rle) {
860 mask->add_rle(count);
876 catch (
const std::exception& e)
880 std::cout<<
"JSON is invalid (missing keys or invalid data types)"<<std::endl;
888 if (!root[
"protobuf_data_path"].isNull()){
889 protobuf_data_path = (root[
"protobuf_data_path"].asString());
892 if (!root[
"processing-device"].isNull()){
893 processingDevice = (root[
"processing-device"].asString());
895 if (!root[
"processing_device"].isNull()){
896 processingDevice = (root[
"processing_device"].asString());
898 if (!root[
"class-names"].isNull()){
899 classesFile = (root[
"class-names"].asString());
901 if (!root[
"classes_file"].isNull()){
902 classesFile = (root[
"classes_file"].asString());
904 if (!root[
"model"].isNull()){
905 modelPath = (root[
"model"].asString());
907 if (!root[
"model_path"].isNull()){
908 modelPath = (root[
"model_path"].asString());
910 if (!root[
"input-width"].isNull()){
911 inpWidth = root[
"input-width"].asInt();
913 if (!root[
"input_width"].isNull()){
914 inpWidth = root[
"input_width"].asInt();
916 if (!root[
"input-height"].isNull()){
917 inpHeight = root[
"input-height"].asInt();
919 if (!root[
"input_height"].isNull()){
920 inpHeight = root[
"input_height"].asInt();
922 if (!root[
"confidence-threshold"].isNull()){
923 confThreshold = root[
"confidence-threshold"].asFloat();
925 if (!root[
"confidence_threshold"].isNull()){
926 confThreshold = root[
"confidence_threshold"].asFloat();
928 if (!root[
"nms-threshold"].isNull()){
929 nmsThreshold = root[
"nms-threshold"].asFloat();
931 if (!root[
"nms_threshold"].isNull()){
932 nmsThreshold = root[
"nms_threshold"].asFloat();
934 if (!root[
"generate-masks"].isNull()){
935 generateMasks = root[
"generate-masks"].asBool();
937 if (!root[
"generate_masks"].isNull()){
938 generateMasks = root[
"generate_masks"].asBool();
950 if(protobuf_data_path.empty()) {
951 cerr <<
"Missing path to object detection protobuf data file." << endl;
956 pb_objdetect::ObjDetect objMessage;
960 fstream input(protobuf_data_path, ios::in | ios::binary);
961 if (!objMessage.ParseFromIstream(&input)) {
962 cerr <<
"Failed to parse protobuf message." << endl;
971 for(
int i = 0; i < objMessage.classnames_size(); i++){
972 classNames.push_back(objMessage.classnames(i));
976 for (
size_t i = 0; i < objMessage.frame_size(); i++) {
978 const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);
981 size_t id = pbFrameData.id();
984 const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();
987 std::vector<int> classIds;
988 std::vector<float> confidences;
989 std::vector<cv::Rect_<float>> boxes;
990 std::vector<int> objectIds;
991 std::vector<CVObjectMaskData> masks;
993 for(
int i = 0; i < pbFrameData.bounding_box_size(); i++){
995 float x = pBox.Get(i).x();
float y = pBox.Get(i).y();
996 float w = pBox.Get(i).w();
float h = pBox.Get(i).h();
998 cv::Rect_<float> box(x, y, w, h);
1001 int classId = pBox.Get(i).classid();
float confidence = pBox.Get(i).confidence();
1003 int objectId = pBox.Get(i).objectid();
1006 boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence);
1007 objectIds.push_back(objectId);
1009 if (pBox.Get(i).has_mask()) {
1010 mask.
width = pBox.Get(i).mask().width();
1011 mask.
height = pBox.Get(i).mask().height();
1012 for (
int rleIndex = 0; rleIndex < pBox.Get(i).mask().rle_size(); ++rleIndex) {
1013 mask.
rle.push_back(pBox.Get(i).mask().rle(rleIndex));
1016 masks.push_back(mask);
1024 google::protobuf::ShutdownProtobufLibrary();