OpenShot Library | libopenshot  0.4.0
CVObjectDetection.cpp
Go to the documentation of this file.
1 
10 // Copyright (c) 2008-2019 OpenShot Studios, LLC
11 //
12 // SPDX-License-Identifier: LGPL-3.0-or-later
13 
14 #include <fstream>
15 #include <iomanip>
16 #include <iostream>
17 
18 #include "CVObjectDetection.h"
19 #include "Exceptions.h"
20 
21 #include "objdetectdata.pb.h"
22 #include <google/protobuf/util/time_util.h>
23 
24 using namespace std;
25 using namespace openshot;
26 using google::protobuf::util::TimeUtil;
27 
28 CVObjectDetection::CVObjectDetection(std::string processInfoJson, ProcessingController &processingController)
29 : processingController(&processingController), processingDevice("CPU"){
30  SetJson(processInfoJson);
31  confThreshold = 0.5;
32  nmsThreshold = 0.1;
33 }
34 
35 void CVObjectDetection::setProcessingDevice(){
36  if(processingDevice == "GPU"){
37  net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
38  net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
39  }
40  else if(processingDevice == "CPU"){
41  net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
42  net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
43  }
44 }
45 
46 void CVObjectDetection::detectObjectsClip(openshot::Clip &video, size_t _start, size_t _end, bool process_interval)
47 {
48 
49  start = _start; end = _end;
50 
51  video.Open();
52 
53  if(error){
54  return;
55  }
56 
57  processingController->SetError(false, "");
58 
59  // Load names of classes
60  std::ifstream ifs(classesFile.c_str());
61  std::string line;
62  while (std::getline(ifs, line)) classNames.push_back(line);
63 
64  // Load the network
65  if(classesFile == "" || modelConfiguration == "" || modelWeights == "")
66  return;
67  net = cv::dnn::readNetFromDarknet(modelConfiguration, modelWeights);
68  setProcessingDevice();
69 
70  size_t frame_number;
71  if(!process_interval || end <= 1 || end-start == 0){
72  // Get total number of frames in video
73  start = (int)(video.Start() * video.Reader()->info.fps.ToFloat());
74  end = (int)(video.End() * video.Reader()->info.fps.ToFloat());
75  }
76 
77  for (frame_number = start; frame_number <= end; frame_number++)
78  {
79  // Stop the feature tracker process
80  if(processingController->ShouldStop()){
81  return;
82  }
83 
84  std::shared_ptr<openshot::Frame> f = video.GetFrame(frame_number);
85 
86  // Grab OpenCV Mat image
87  cv::Mat cvimage = f->GetImageCV();
88 
89  DetectObjects(cvimage, frame_number);
90 
91  // Update progress
92  processingController->SetProgress(uint(100*(frame_number-start)/(end-start)));
93 
94  }
95 }
96 
97 void CVObjectDetection::DetectObjects(const cv::Mat &frame, size_t frameId){
98  // Get frame as OpenCV Mat
99  cv::Mat blob;
100 
101  // Create a 4D blob from the frame.
102  int inpWidth, inpHeight;
103  inpWidth = inpHeight = 416;
104 
105  cv::dnn::blobFromImage(frame, blob, 1/255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(0,0,0), true, false);
106 
107  //Sets the input to the network
108  net.setInput(blob);
109 
110  // Runs the forward pass to get output of the output layers
111  std::vector<cv::Mat> outs;
112  net.forward(outs, getOutputsNames(net));
113 
114  // Remove the bounding boxes with low confidence
115  postprocess(frame.size(), outs, frameId);
116 
117 }
118 
119 
120 // Remove the bounding boxes with low confidence using non-maxima suppression
121 void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector<cv::Mat>& outs, size_t frameId)
122 {
123  std::vector<int> classIds;
124  std::vector<float> confidences;
125  std::vector<cv::Rect> boxes;
126  std::vector<int> objectIds;
127 
128  for (size_t i = 0; i < outs.size(); ++i)
129  {
130  // Scan through all the bounding boxes output from the network and keep only the
131  // ones with high confidence scores. Assign the box's class label as the class
132  // with the highest score for the box.
133  float* data = (float*)outs[i].data;
134  for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
135  {
136  cv::Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
137  cv::Point classIdPoint;
138  double confidence;
139  // Get the value and location of the maximum score
140  cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
141  if (confidence > confThreshold)
142  {
143  int centerX = (int)(data[0] * frameDims.width);
144  int centerY = (int)(data[1] * frameDims.height);
145  int width = (int)(data[2] * frameDims.width);
146  int height = (int)(data[3] * frameDims.height);
147  int left = centerX - width / 2;
148  int top = centerY - height / 2;
149 
150  classIds.push_back(classIdPoint.x);
151  confidences.push_back((float)confidence);
152  boxes.push_back(cv::Rect(left, top, width, height));
153  }
154  }
155  }
156 
157  // Perform non maximum suppression to eliminate redundant overlapping boxes with
158  // lower confidences
159  std::vector<int> indices;
160  cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
161 
162  // Pass boxes to SORT algorithm
163  std::vector<cv::Rect> sortBoxes;
164  for(auto box : boxes)
165  sortBoxes.push_back(box);
166  sort.update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), confidences, classIds);
167 
168  // Clear data vectors
169  boxes.clear(); confidences.clear(); classIds.clear(); objectIds.clear();
170  // Get SORT predicted boxes
171  for(auto TBox : sort.frameTrackingResult){
172  if(TBox.frame == frameId){
173  boxes.push_back(TBox.box);
174  confidences.push_back(TBox.confidence);
175  classIds.push_back(TBox.classId);
176  objectIds.push_back(TBox.id);
177  }
178  }
179 
180  // Remove boxes based on controids distance
181  for(uint i = 0; i<boxes.size(); i++){
182  for(uint j = i+1; j<boxes.size(); j++){
183  int xc_1 = boxes[i].x + (int)(boxes[i].width/2), yc_1 = boxes[i].y + (int)(boxes[i].width/2);
184  int xc_2 = boxes[j].x + (int)(boxes[j].width/2), yc_2 = boxes[j].y + (int)(boxes[j].width/2);
185 
186  if(fabs(xc_1 - xc_2) < 10 && fabs(yc_1 - yc_2) < 10){
187  if(classIds[i] == classIds[j]){
188  if(confidences[i] >= confidences[j]){
189  boxes.erase(boxes.begin() + j);
190  classIds.erase(classIds.begin() + j);
191  confidences.erase(confidences.begin() + j);
192  objectIds.erase(objectIds.begin() + j);
193  break;
194  }
195  else{
196  boxes.erase(boxes.begin() + i);
197  classIds.erase(classIds.begin() + i);
198  confidences.erase(confidences.begin() + i);
199  objectIds.erase(objectIds.begin() + i);
200  i = 0;
201  break;
202  }
203  }
204  }
205  }
206  }
207 
208  // Remove boxes based in IOU score
209  for(uint i = 0; i<boxes.size(); i++){
210  for(uint j = i+1; j<boxes.size(); j++){
211 
212  if( iou(boxes[i], boxes[j])){
213  if(classIds[i] == classIds[j]){
214  if(confidences[i] >= confidences[j]){
215  boxes.erase(boxes.begin() + j);
216  classIds.erase(classIds.begin() + j);
217  confidences.erase(confidences.begin() + j);
218  objectIds.erase(objectIds.begin() + j);
219  break;
220  }
221  else{
222  boxes.erase(boxes.begin() + i);
223  classIds.erase(classIds.begin() + i);
224  confidences.erase(confidences.begin() + i);
225  objectIds.erase(objectIds.begin() + i);
226  i = 0;
227  break;
228  }
229  }
230  }
231  }
232  }
233 
234  // Normalize boxes coordinates
235  std::vector<cv::Rect_<float>> normalized_boxes;
236  for(auto box : boxes){
237  cv::Rect_<float> normalized_box;
238  normalized_box.x = (box.x)/(float)frameDims.width;
239  normalized_box.y = (box.y)/(float)frameDims.height;
240  normalized_box.width = (box.width)/(float)frameDims.width;
241  normalized_box.height = (box.height)/(float)frameDims.height;
242  normalized_boxes.push_back(normalized_box);
243  }
244 
245  detectionsData[frameId] = CVDetectionData(classIds, confidences, normalized_boxes, frameId, objectIds);
246 }
247 
248 // Compute IOU between 2 boxes
249 bool CVObjectDetection::iou(cv::Rect pred_box, cv::Rect sort_box){
250  // Determine the (x, y)-coordinates of the intersection rectangle
251  int xA = std::max(pred_box.x, sort_box.x);
252  int yA = std::max(pred_box.y, sort_box.y);
253  int xB = std::min(pred_box.x + pred_box.width, sort_box.x + sort_box.width);
254  int yB = std::min(pred_box.y + pred_box.height, sort_box.y + sort_box.height);
255 
256  // Compute the area of intersection rectangle
257  int interArea = std::max(0, xB - xA + 1) * std::max(0, yB - yA + 1);
258 
259  // Compute the area of both the prediction and ground-truth rectangles
260  int boxAArea = (pred_box.width + 1) * (pred_box.height + 1);
261  int boxBArea = (sort_box.width + 1) * (sort_box.height + 1);
262 
263  // Compute the intersection over union by taking the intersection
264  float iou = interArea / (float)(boxAArea + boxBArea - interArea);
265 
266  // If IOU is above this value the boxes are very close (probably a variation of the same bounding box)
267  if(iou > 0.5)
268  return true;
269  return false;
270 }
271 
272 // Get the names of the output layers
273 std::vector<cv::String> CVObjectDetection::getOutputsNames(const cv::dnn::Net& net)
274 {
275  static std::vector<cv::String> names;
276 
277  //Get the indices of the output layers, i.e. the layers with unconnected outputs
278  std::vector<int> outLayers = net.getUnconnectedOutLayers();
279 
280  //get the names of all the layers in the network
281  std::vector<cv::String> layersNames = net.getLayerNames();
282 
283  // Get the names of the output layers in names
284  names.resize(outLayers.size());
285  for (size_t i = 0; i < outLayers.size(); ++i)
286  names[i] = layersNames[outLayers[i] - 1];
287  return names;
288 }
289 
291  // Check if the stabilizer info for the requested frame exists
292  if ( detectionsData.find(frameId) == detectionsData.end() ) {
293 
294  return CVDetectionData();
295  } else {
296 
297  return detectionsData[frameId];
298  }
299 }
300 
302  // Create tracker message
303  pb_objdetect::ObjDetect objMessage;
304 
305  //Save class names in protobuf message
306  for(int i = 0; i<classNames.size(); i++){
307  std::string* className = objMessage.add_classnames();
308  className->assign(classNames.at(i));
309  }
310 
311  // Iterate over all frames data and save in protobuf message
312  for(std::map<size_t,CVDetectionData>::iterator it=detectionsData.begin(); it!=detectionsData.end(); ++it){
313  CVDetectionData dData = it->second;
314  pb_objdetect::Frame* pbFrameData;
315  AddFrameDataToProto(objMessage.add_frame(), dData);
316  }
317 
318  // Add timestamp
319  *objMessage.mutable_last_updated() = TimeUtil::SecondsToTimestamp(time(NULL));
320 
321  {
322  // Write the new message to disk.
323  std::fstream output(protobuf_data_path, ios::out | ios::trunc | ios::binary);
324  if (!objMessage.SerializeToOstream(&output)) {
325  cerr << "Failed to write protobuf message." << endl;
326  return false;
327  }
328  }
329 
330  // Delete all global objects allocated by libprotobuf.
331  google::protobuf::ShutdownProtobufLibrary();
332 
333  return true;
334 
335 }
336 
337 // Add frame object detection into protobuf message.
338 void CVObjectDetection::AddFrameDataToProto(pb_objdetect::Frame* pbFrameData, CVDetectionData& dData) {
339 
340  // Save frame number and rotation
341  pbFrameData->set_id(dData.frameId);
342 
343  for(size_t i = 0; i < dData.boxes.size(); i++){
344  pb_objdetect::Frame_Box* box = pbFrameData->add_bounding_box();
345 
346  // Save bounding box data
347  box->set_x(dData.boxes.at(i).x);
348  box->set_y(dData.boxes.at(i).y);
349  box->set_w(dData.boxes.at(i).width);
350  box->set_h(dData.boxes.at(i).height);
351  box->set_classid(dData.classIds.at(i));
352  box->set_confidence(dData.confidences.at(i));
353  box->set_objectid(dData.objectIds.at(i));
354 
355  }
356 }
357 
358 // Load JSON string into this object
359 void CVObjectDetection::SetJson(const std::string value) {
360  // Parse JSON string into JSON objects
361  try
362  {
363  const Json::Value root = openshot::stringToJson(value);
364  // Set all values that match
365 
366  SetJsonValue(root);
367  }
368  catch (const std::exception& e)
369  {
370  // Error parsing JSON (or missing keys)
371  // throw InvalidJSON("JSON is invalid (missing keys or invalid data types)");
372  std::cout<<"JSON is invalid (missing keys or invalid data types)"<<std::endl;
373  }
374 }
375 
376 // Load Json::Value into this object
377 void CVObjectDetection::SetJsonValue(const Json::Value root) {
378 
379  // Set data from Json (if key is found)
380  if (!root["protobuf_data_path"].isNull()){
381  protobuf_data_path = (root["protobuf_data_path"].asString());
382  }
383  if (!root["processing-device"].isNull()){
384  processingDevice = (root["processing-device"].asString());
385  }
386  if (!root["model-config"].isNull()){
387  modelConfiguration = (root["model-config"].asString());
388  std::ifstream infile(modelConfiguration);
389  if(!infile.good()){
390  processingController->SetError(true, "Incorrect path to model config file");
391  error = true;
392  }
393 
394  }
395  if (!root["model-weights"].isNull()){
396  modelWeights= (root["model-weights"].asString());
397  std::ifstream infile(modelWeights);
398  if(!infile.good()){
399  processingController->SetError(true, "Incorrect path to model weight file");
400  error = true;
401  }
402 
403  }
404  if (!root["class-names"].isNull()){
405  classesFile = (root["class-names"].asString());
406 
407  std::ifstream infile(classesFile);
408  if(!infile.good()){
409  processingController->SetError(true, "Incorrect path to class name file");
410  error = true;
411  }
412 
413  }
414 }
415 
416 /*
417 ||||||||||||||||||||||||||||||||||||||||||||||||||
418  ONLY FOR MAKE TEST
419 ||||||||||||||||||||||||||||||||||||||||||||||||||
420 */
421 
422 // Load protobuf data file
424  // Create tracker message
425  pb_objdetect::ObjDetect objMessage;
426 
427  {
428  // Read the existing tracker message.
429  fstream input(protobuf_data_path, ios::in | ios::binary);
430  if (!objMessage.ParseFromIstream(&input)) {
431  cerr << "Failed to parse protobuf message." << endl;
432  return false;
433  }
434  }
435 
436  // Make sure classNames and detectionsData are empty
437  classNames.clear(); detectionsData.clear();
438 
439  // Get all classes names and assign a color to them
440  for(int i = 0; i < objMessage.classnames_size(); i++){
441  classNames.push_back(objMessage.classnames(i));
442  }
443 
444  // Iterate over all frames of the saved message
445  for (size_t i = 0; i < objMessage.frame_size(); i++) {
446  // Create protobuf message reader
447  const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);
448 
449  // Get frame Id
450  size_t id = pbFrameData.id();
451 
452  // Load bounding box data
453  const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();
454 
455  // Construct data vectors related to detections in the current frame
456  std::vector<int> classIds;
457  std::vector<float> confidences;
458  std::vector<cv::Rect_<float>> boxes;
459  std::vector<int> objectIds;
460 
461  for(int i = 0; i < pbFrameData.bounding_box_size(); i++){
462  // Get bounding box coordinates
463  float x = pBox.Get(i).x(); float y = pBox.Get(i).y();
464  float w = pBox.Get(i).w(); float h = pBox.Get(i).h();
465  // Create OpenCV rectangle with the bouding box info
466  cv::Rect_<float> box(x, y, w, h);
467 
468  // Get class Id (which will be assign to a class name) and prediction confidence
469  int classId = pBox.Get(i).classid(); float confidence = pBox.Get(i).confidence();
470  // Get object Id
471  int objectId = pBox.Get(i).objectid();
472 
473  // Push back data into vectors
474  boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence);
475  }
476 
477  // Assign data to object detector map
478  detectionsData[id] = CVDetectionData(classIds, confidences, boxes, id, objectIds);
479  }
480 
481  // Delete all global objects allocated by libprotobuf.
482  google::protobuf::ShutdownProtobufLibrary();
483 
484  return true;
485 }
openshot::stringToJson
const Json::Value stringToJson(const std::string value)
Definition: Json.cpp:16
openshot::Clip::Open
void Open() override
Open the internal reader.
Definition: Clip.cpp:320
CVObjectDetection.h
Header file for CVObjectDetection class.
ProcessingController::ShouldStop
bool ShouldStop()
Definition: ProcessingController.h:68
ProcessingController::SetError
void SetError(bool err, std::string message)
Definition: ProcessingController.h:74
openshot
This namespace is the default namespace for all code in the openshot library.
Definition: Compressor.h:28
openshot::CVDetectionData::classIds
std::vector< int > classIds
Definition: CVObjectDetection.h:53
openshot::Clip
This class represents a clip (used to arrange readers on the timeline)
Definition: Clip.h:89
openshot::Clip::End
float End() const override
Get end position (in seconds) of clip (trim end of video), which can be affected by the time curve.
Definition: Clip.cpp:356
openshot::CVObjectDetection::detectionsData
std::map< size_t, CVDetectionData > detectionsData
Definition: CVObjectDetection.h:105
openshot::Clip::GetFrame
std::shared_ptr< openshot::Frame > GetFrame(int64_t clip_frame_number) override
Get an openshot::Frame object for a specific frame number of this clip. The image size and number of ...
Definition: Clip.cpp:391
openshot::CVDetectionData::confidences
std::vector< float > confidences
Definition: CVObjectDetection.h:54
SortTracker::frameTrackingResult
std::vector< TrackingBox > frameTrackingResult
Definition: sort.hpp:56
openshot::CVObjectDetection::GetDetectionData
CVDetectionData GetDetectionData(size_t frameId)
Definition: CVObjectDetection.cpp:290
openshot::CVObjectDetection::AddFrameDataToProto
void AddFrameDataToProto(pb_objdetect::Frame *pbFrameData, CVDetectionData &dData)
Definition: CVObjectDetection.cpp:338
openshot::CVDetectionData::objectIds
std::vector< int > objectIds
Definition: CVObjectDetection.h:56
openshot::CVObjectDetection::_LoadObjDetectdData
bool _LoadObjDetectdData()
Definition: CVObjectDetection.cpp:423
openshot::CVDetectionData::frameId
size_t frameId
Definition: CVObjectDetection.h:52
openshot::CVDetectionData
Definition: CVObjectDetection.h:37
openshot::ClipBase::Start
void Start(float value)
Set start position (in seconds) of clip (trim start of video)
Definition: ClipBase.cpp:42
openshot::CVObjectDetection::SetJsonValue
void SetJsonValue(const Json::Value root)
Load Json::Value into this object.
Definition: CVObjectDetection.cpp:377
openshot::CVDetectionData::boxes
std::vector< cv::Rect_< float > > boxes
Definition: CVObjectDetection.h:55
openshot::CVObjectDetection::SetJson
void SetJson(const std::string value)
Load JSON string into this object.
Definition: CVObjectDetection.cpp:359
ProcessingController
Definition: ProcessingController.h:20
openshot::CVObjectDetection::SaveObjDetectedData
bool SaveObjDetectedData()
Protobuf Save and Load methods.
Definition: CVObjectDetection.cpp:301
openshot::Clip::Reader
void Reader(openshot::ReaderBase *new_reader)
Set the current reader.
Definition: Clip.cpp:274
ProcessingController::SetProgress
void SetProgress(uint p)
Definition: ProcessingController.h:52
openshot::CVObjectDetection::detectObjectsClip
void detectObjectsClip(openshot::Clip &video, size_t start=0, size_t end=0, bool process_interval=false)
Definition: CVObjectDetection.cpp:46
Exceptions.h
Header file for all Exception classes.
SortTracker::update
void update(std::vector< cv::Rect > detection, int frame_count, double image_diagonal, std::vector< float > confidences, std::vector< int > classIds)
Definition: sort.cpp:45