OpenShot Library | libopenshot  0.2.7
CVObjectDetection.cpp
Go to the documentation of this file.
1 /**
2  * @file
3  * @brief Source file for CVObjectDetection class
4  * @author Jonathan Thomas <jonathan@openshot.org>
5  * @author Brenno Caldato <brenno.caldato@outlook.com>
6  *
7  * @ref License
8  */
9 
10 /* LICENSE
11  *
12  * Copyright (c) 2008-2019 OpenShot Studios, LLC
13  * <http://www.openshotstudios.com/>. This file is part of
14  * OpenShot Library (libopenshot), an open-source project dedicated to
15  * delivering high quality video editing and animation solutions to the
16  * world. For more information visit <http://www.openshot.org/>.
17  *
18  * OpenShot Library (libopenshot) is free software: you can redistribute it
19  * and/or modify it under the terms of the GNU Lesser General Public License
20  * as published by the Free Software Foundation, either version 3 of the
21  * License, or (at your option) any later version.
22  *
23  * OpenShot Library (libopenshot) is distributed in the hope that it will be
24  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
25  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26  * GNU Lesser General Public License for more details.
27  *
28  * You should have received a copy of the GNU Lesser General Public License
29  * along with OpenShot Library. If not, see <http://www.gnu.org/licenses/>.
30  */
31 
32 #include <fstream>
33 #include <iomanip>
34 #include <iostream>
35 
36 #include "CVObjectDetection.h"
37 #include <google/protobuf/util/time_util.h>
38 
39 using namespace std;
40 using namespace openshot;
41 using google::protobuf::util::TimeUtil;
42 
43 CVObjectDetection::CVObjectDetection(std::string processInfoJson, ProcessingController &processingController)
44 : processingController(&processingController), processingDevice("CPU"){
45  SetJson(processInfoJson);
46  confThreshold = 0.5;
47  nmsThreshold = 0.1;
48 }
49 
50 void CVObjectDetection::setProcessingDevice(){
51  if(processingDevice == "GPU"){
52  net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
53  net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
54  }
55  else if(processingDevice == "CPU"){
56  net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
57  net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
58  }
59 }
60 
61 void CVObjectDetection::detectObjectsClip(openshot::Clip &video, size_t _start, size_t _end, bool process_interval)
62 {
63 
64  start = _start; end = _end;
65 
66  video.Open();
67 
68  if(error){
69  return;
70  }
71 
72  processingController->SetError(false, "");
73 
74  // Load names of classes
75  std::ifstream ifs(classesFile.c_str());
76  std::string line;
77  while (std::getline(ifs, line)) classNames.push_back(line);
78 
79  // Load the network
80  if(classesFile == "" || modelConfiguration == "" || modelWeights == "")
81  return;
82  net = cv::dnn::readNetFromDarknet(modelConfiguration, modelWeights);
83  setProcessingDevice();
84 
85  size_t frame_number;
86  if(!process_interval || end <= 1 || end-start == 0){
87  // Get total number of frames in video
88  start = (int)(video.Start() * video.Reader()->info.fps.ToFloat());
89  end = (int)(video.End() * video.Reader()->info.fps.ToFloat());
90  }
91 
92  for (frame_number = start; frame_number <= end; frame_number++)
93  {
94  // Stop the feature tracker process
95  if(processingController->ShouldStop()){
96  return;
97  }
98 
99  std::shared_ptr<openshot::Frame> f = video.GetFrame(frame_number);
100 
101  // Grab OpenCV Mat image
102  cv::Mat cvimage = f->GetImageCV();
103 
104  DetectObjects(cvimage, frame_number);
105 
106  // Update progress
107  processingController->SetProgress(uint(100*(frame_number-start)/(end-start)));
108 
109  }
110 }
111 
112 void CVObjectDetection::DetectObjects(const cv::Mat &frame, size_t frameId){
113  // Get frame as OpenCV Mat
114  cv::Mat blob;
115 
116  // Create a 4D blob from the frame.
117  int inpWidth, inpHeight;
118  inpWidth = inpHeight = 416;
119 
120  cv::dnn::blobFromImage(frame, blob, 1/255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(0,0,0), true, false);
121 
122  //Sets the input to the network
123  net.setInput(blob);
124 
125  // Runs the forward pass to get output of the output layers
126  std::vector<cv::Mat> outs;
127  net.forward(outs, getOutputsNames(net));
128 
129  // Remove the bounding boxes with low confidence
130  postprocess(frame.size(), outs, frameId);
131 
132 }
133 
134 
135 // Remove the bounding boxes with low confidence using non-maxima suppression
136 void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector<cv::Mat>& outs, size_t frameId)
137 {
138  std::vector<int> classIds;
139  std::vector<float> confidences;
140  std::vector<cv::Rect> boxes;
141  std::vector<int> objectIds;
142 
143  for (size_t i = 0; i < outs.size(); ++i)
144  {
145  // Scan through all the bounding boxes output from the network and keep only the
146  // ones with high confidence scores. Assign the box's class label as the class
147  // with the highest score for the box.
148  float* data = (float*)outs[i].data;
149  for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
150  {
151  cv::Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
152  cv::Point classIdPoint;
153  double confidence;
154  // Get the value and location of the maximum score
155  cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
156  if (confidence > confThreshold)
157  {
158  int centerX = (int)(data[0] * frameDims.width);
159  int centerY = (int)(data[1] * frameDims.height);
160  int width = (int)(data[2] * frameDims.width);
161  int height = (int)(data[3] * frameDims.height);
162  int left = centerX - width / 2;
163  int top = centerY - height / 2;
164 
165  classIds.push_back(classIdPoint.x);
166  confidences.push_back((float)confidence);
167  boxes.push_back(cv::Rect(left, top, width, height));
168  }
169  }
170  }
171 
172  // Perform non maximum suppression to eliminate redundant overlapping boxes with
173  // lower confidences
174  std::vector<int> indices;
175  cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
176 
177  // Pass boxes to SORT algorithm
178  std::vector<cv::Rect> sortBoxes;
179  for(auto box : boxes)
180  sortBoxes.push_back(box);
181  sort.update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), confidences, classIds);
182 
183  // Clear data vectors
184  boxes.clear(); confidences.clear(); classIds.clear(); objectIds.clear();
185  // Get SORT predicted boxes
186  for(auto TBox : sort.frameTrackingResult){
187  if(TBox.frame == frameId){
188  boxes.push_back(TBox.box);
189  confidences.push_back(TBox.confidence);
190  classIds.push_back(TBox.classId);
191  objectIds.push_back(TBox.id);
192  }
193  }
194 
195  // Remove boxes based on controids distance
196  for(uint i = 0; i<boxes.size(); i++){
197  for(uint j = i+1; j<boxes.size(); j++){
198  int xc_1 = boxes[i].x + (int)(boxes[i].width/2), yc_1 = boxes[i].y + (int)(boxes[i].width/2);
199  int xc_2 = boxes[j].x + (int)(boxes[j].width/2), yc_2 = boxes[j].y + (int)(boxes[j].width/2);
200 
201  if(fabs(xc_1 - xc_2) < 10 && fabs(yc_1 - yc_2) < 10){
202  if(classIds[i] == classIds[j]){
203  if(confidences[i] >= confidences[j]){
204  boxes.erase(boxes.begin() + j);
205  classIds.erase(classIds.begin() + j);
206  confidences.erase(confidences.begin() + j);
207  objectIds.erase(objectIds.begin() + j);
208  break;
209  }
210  else{
211  boxes.erase(boxes.begin() + i);
212  classIds.erase(classIds.begin() + i);
213  confidences.erase(confidences.begin() + i);
214  objectIds.erase(objectIds.begin() + i);
215  i = 0;
216  break;
217  }
218  }
219  }
220  }
221  }
222 
223  // Remove boxes based in IOU score
224  for(uint i = 0; i<boxes.size(); i++){
225  for(uint j = i+1; j<boxes.size(); j++){
226 
227  if( iou(boxes[i], boxes[j])){
228  if(classIds[i] == classIds[j]){
229  if(confidences[i] >= confidences[j]){
230  boxes.erase(boxes.begin() + j);
231  classIds.erase(classIds.begin() + j);
232  confidences.erase(confidences.begin() + j);
233  objectIds.erase(objectIds.begin() + j);
234  break;
235  }
236  else{
237  boxes.erase(boxes.begin() + i);
238  classIds.erase(classIds.begin() + i);
239  confidences.erase(confidences.begin() + i);
240  objectIds.erase(objectIds.begin() + i);
241  i = 0;
242  break;
243  }
244  }
245  }
246  }
247  }
248 
249  // Normalize boxes coordinates
250  std::vector<cv::Rect_<float>> normalized_boxes;
251  for(auto box : boxes){
252  cv::Rect_<float> normalized_box;
253  normalized_box.x = (box.x)/(float)frameDims.width;
254  normalized_box.y = (box.y)/(float)frameDims.height;
255  normalized_box.width = (box.width)/(float)frameDims.width;
256  normalized_box.height = (box.height)/(float)frameDims.height;
257  normalized_boxes.push_back(normalized_box);
258  }
259 
260  detectionsData[frameId] = CVDetectionData(classIds, confidences, normalized_boxes, frameId, objectIds);
261 }
262 
263 // Compute IOU between 2 boxes
264 bool CVObjectDetection::iou(cv::Rect pred_box, cv::Rect sort_box){
265  // Determine the (x, y)-coordinates of the intersection rectangle
266  int xA = std::max(pred_box.x, sort_box.x);
267  int yA = std::max(pred_box.y, sort_box.y);
268  int xB = std::min(pred_box.x + pred_box.width, sort_box.x + sort_box.width);
269  int yB = std::min(pred_box.y + pred_box.height, sort_box.y + sort_box.height);
270 
271  // Compute the area of intersection rectangle
272  int interArea = std::max(0, xB - xA + 1) * std::max(0, yB - yA + 1);
273 
274  // Compute the area of both the prediction and ground-truth rectangles
275  int boxAArea = (pred_box.width + 1) * (pred_box.height + 1);
276  int boxBArea = (sort_box.width + 1) * (sort_box.height + 1);
277 
278  // Compute the intersection over union by taking the intersection
279  float iou = interArea / (float)(boxAArea + boxBArea - interArea);
280 
281  // If IOU is above this value the boxes are very close (probably a variation of the same bounding box)
282  if(iou > 0.5)
283  return true;
284  return false;
285 }
286 
287 // Get the names of the output layers
288 std::vector<cv::String> CVObjectDetection::getOutputsNames(const cv::dnn::Net& net)
289 {
290  static std::vector<cv::String> names;
291 
292  //Get the indices of the output layers, i.e. the layers with unconnected outputs
293  std::vector<int> outLayers = net.getUnconnectedOutLayers();
294 
295  //get the names of all the layers in the network
296  std::vector<cv::String> layersNames = net.getLayerNames();
297 
298  // Get the names of the output layers in names
299  names.resize(outLayers.size());
300  for (size_t i = 0; i < outLayers.size(); ++i)
301  names[i] = layersNames[outLayers[i] - 1];
302  return names;
303 }
304 
306  // Check if the stabilizer info for the requested frame exists
307  if ( detectionsData.find(frameId) == detectionsData.end() ) {
308 
309  return CVDetectionData();
310  } else {
311 
312  return detectionsData[frameId];
313  }
314 }
315 
317  // Create tracker message
318  pb_objdetect::ObjDetect objMessage;
319 
320  //Save class names in protobuf message
321  for(int i = 0; i<classNames.size(); i++){
322  std::string* className = objMessage.add_classnames();
323  className->assign(classNames.at(i));
324  }
325 
326  // Iterate over all frames data and save in protobuf message
327  for(std::map<size_t,CVDetectionData>::iterator it=detectionsData.begin(); it!=detectionsData.end(); ++it){
328  CVDetectionData dData = it->second;
329  pb_objdetect::Frame* pbFrameData;
330  AddFrameDataToProto(objMessage.add_frame(), dData);
331  }
332 
333  // Add timestamp
334  *objMessage.mutable_last_updated() = TimeUtil::SecondsToTimestamp(time(NULL));
335 
336  {
337  // Write the new message to disk.
338  std::fstream output(protobuf_data_path, ios::out | ios::trunc | ios::binary);
339  if (!objMessage.SerializeToOstream(&output)) {
340  cerr << "Failed to write protobuf message." << endl;
341  return false;
342  }
343  }
344 
345  // Delete all global objects allocated by libprotobuf.
346  google::protobuf::ShutdownProtobufLibrary();
347 
348  return true;
349 
350 }
351 
352 // Add frame object detection into protobuf message.
353 void CVObjectDetection::AddFrameDataToProto(pb_objdetect::Frame* pbFrameData, CVDetectionData& dData) {
354 
355  // Save frame number and rotation
356  pbFrameData->set_id(dData.frameId);
357 
358  for(size_t i = 0; i < dData.boxes.size(); i++){
359  pb_objdetect::Frame_Box* box = pbFrameData->add_bounding_box();
360 
361  // Save bounding box data
362  box->set_x(dData.boxes.at(i).x);
363  box->set_y(dData.boxes.at(i).y);
364  box->set_w(dData.boxes.at(i).width);
365  box->set_h(dData.boxes.at(i).height);
366  box->set_classid(dData.classIds.at(i));
367  box->set_confidence(dData.confidences.at(i));
368  box->set_objectid(dData.objectIds.at(i));
369 
370  }
371 }
372 
373 // Load JSON string into this object
374 void CVObjectDetection::SetJson(const std::string value) {
375  // Parse JSON string into JSON objects
376  try
377  {
378  const Json::Value root = openshot::stringToJson(value);
379  // Set all values that match
380 
381  SetJsonValue(root);
382  }
383  catch (const std::exception& e)
384  {
385  // Error parsing JSON (or missing keys)
386  // throw InvalidJSON("JSON is invalid (missing keys or invalid data types)");
387  std::cout<<"JSON is invalid (missing keys or invalid data types)"<<std::endl;
388  }
389 }
390 
391 // Load Json::Value into this object
392 void CVObjectDetection::SetJsonValue(const Json::Value root) {
393 
394  // Set data from Json (if key is found)
395  if (!root["protobuf_data_path"].isNull()){
396  protobuf_data_path = (root["protobuf_data_path"].asString());
397  }
398  if (!root["processing-device"].isNull()){
399  processingDevice = (root["processing-device"].asString());
400  }
401  if (!root["model-config"].isNull()){
402  modelConfiguration = (root["model-config"].asString());
403  std::ifstream infile(modelConfiguration);
404  if(!infile.good()){
405  processingController->SetError(true, "Incorrect path to model config file");
406  error = true;
407  }
408 
409  }
410  if (!root["model-weights"].isNull()){
411  modelWeights= (root["model-weights"].asString());
412  std::ifstream infile(modelWeights);
413  if(!infile.good()){
414  processingController->SetError(true, "Incorrect path to model weight file");
415  error = true;
416  }
417 
418  }
419  if (!root["class-names"].isNull()){
420  classesFile = (root["class-names"].asString());
421 
422  std::ifstream infile(classesFile);
423  if(!infile.good()){
424  processingController->SetError(true, "Incorrect path to class name file");
425  error = true;
426  }
427 
428  }
429 }
430 
431 /*
432 ||||||||||||||||||||||||||||||||||||||||||||||||||
433  ONLY FOR MAKE TEST
434 ||||||||||||||||||||||||||||||||||||||||||||||||||
435 */
436 
437 // Load protobuf data file
439  // Create tracker message
440  pb_objdetect::ObjDetect objMessage;
441 
442  {
443  // Read the existing tracker message.
444  fstream input(protobuf_data_path, ios::in | ios::binary);
445  if (!objMessage.ParseFromIstream(&input)) {
446  cerr << "Failed to parse protobuf message." << endl;
447  return false;
448  }
449  }
450 
451  // Make sure classNames and detectionsData are empty
452  classNames.clear(); detectionsData.clear();
453 
454  // Get all classes names and assign a color to them
455  for(int i = 0; i < objMessage.classnames_size(); i++){
456  classNames.push_back(objMessage.classnames(i));
457  }
458 
459  // Iterate over all frames of the saved message
460  for (size_t i = 0; i < objMessage.frame_size(); i++) {
461  // Create protobuf message reader
462  const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);
463 
464  // Get frame Id
465  size_t id = pbFrameData.id();
466 
467  // Load bounding box data
468  const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();
469 
470  // Construct data vectors related to detections in the current frame
471  std::vector<int> classIds;
472  std::vector<float> confidences;
473  std::vector<cv::Rect_<float>> boxes;
474  std::vector<int> objectIds;
475 
476  for(int i = 0; i < pbFrameData.bounding_box_size(); i++){
477  // Get bounding box coordinates
478  float x = pBox.Get(i).x(); float y = pBox.Get(i).y();
479  float w = pBox.Get(i).w(); float h = pBox.Get(i).h();
480  // Create OpenCV rectangle with the bouding box info
481  cv::Rect_<float> box(x, y, w, h);
482 
483  // Get class Id (which will be assign to a class name) and prediction confidence
484  int classId = pBox.Get(i).classid(); float confidence = pBox.Get(i).confidence();
485  // Get object Id
486  int objectId = pBox.Get(i).objectid();
487 
488  // Push back data into vectors
489  boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence);
490  }
491 
492  // Assign data to object detector map
493  detectionsData[id] = CVDetectionData(classIds, confidences, boxes, id, objectIds);
494  }
495 
496  // Delete all global objects allocated by libprotobuf.
497  google::protobuf::ShutdownProtobufLibrary();
498 
499  return true;
500 }
Header file for CVObjectDetection class.
void SetError(bool err, std::string message)
std::vector< TrackingBox > frameTrackingResult
Definition: sort.hpp:53
void update(std::vector< cv::Rect > detection, int frame_count, double image_diagonal, std::vector< float > confidences, std::vector< int > classIds)
Definition: sort.cpp:41
void SetJsonValue(const Json::Value root)
Load Json::Value into this object.
void detectObjectsClip(openshot::Clip &video, size_t start=0, size_t end=0, bool process_interval=false)
void AddFrameDataToProto(pb_objdetect::Frame *pbFrameData, CVDetectionData &dData)
CVDetectionData GetDetectionData(size_t frameId)
std::map< size_t, CVDetectionData > detectionsData
void SetJson(const std::string value)
Load JSON string into this object.
bool SaveObjDetectedData()
Protobuf Save and Load methods.
float Start() const
Get start position (in seconds) of clip (trim start of video)
Definition: ClipBase.h:110
This class represents a clip (used to arrange readers on the timeline)
Definition: Clip.h:109
void Open() override
Open the internal reader.
Definition: Clip.cpp:302
float End() const
Get end position (in seconds) of clip (trim end of video), which can be affected by the time curve.
Definition: Clip.cpp:338
std::shared_ptr< openshot::Frame > GetFrame(int64_t frame_number) override
Get an openshot::Frame object for a specific frame number of this clip. The image size and number of ...
Definition: Clip.cpp:360
void Reader(openshot::ReaderBase *new_reader)
Set the current reader.
Definition: Clip.cpp:279
This namespace is the default namespace for all code in the openshot library.
Definition: Compressor.h:47
const Json::Value stringToJson(const std::string value)
Definition: Json.cpp:34
std::vector< int > objectIds
std::vector< cv::Rect_< float > > boxes
std::vector< int > classIds
std::vector< float > confidences