37 #include <google/protobuf/util/time_util.h>
41 using google::protobuf::util::TimeUtil;
43 CVObjectDetection::CVObjectDetection(std::string processInfoJson,
ProcessingController &processingController)
44 : processingController(&processingController), processingDevice(
"CPU"){
50 void CVObjectDetection::setProcessingDevice(){
51 if(processingDevice ==
"GPU"){
52 net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
53 net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
55 else if(processingDevice ==
"CPU"){
56 net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
57 net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
64 start = _start; end = _end;
72 processingController->
SetError(
false,
"");
75 std::ifstream ifs(classesFile.c_str());
77 while (std::getline(ifs, line)) classNames.push_back(line);
80 if(classesFile ==
"" || modelConfiguration ==
"" || modelWeights ==
"")
82 net = cv::dnn::readNetFromDarknet(modelConfiguration, modelWeights);
83 setProcessingDevice();
86 if(!process_interval || end <= 1 || end-start == 0){
88 start = (int)(video.
Start() * video.
Reader()->info.fps.ToFloat());
89 end = (int)(video.
End() * video.
Reader()->info.fps.ToFloat());
92 for (frame_number = start; frame_number <= end; frame_number++)
99 std::shared_ptr<openshot::Frame> f = video.
GetFrame(frame_number);
102 cv::Mat cvimage = f->GetImageCV();
104 DetectObjects(cvimage, frame_number);
107 processingController->
SetProgress(uint(100*(frame_number-start)/(end-start)));
112 void CVObjectDetection::DetectObjects(
const cv::Mat &frame,
size_t frameId){
117 int inpWidth, inpHeight;
118 inpWidth = inpHeight = 416;
120 cv::dnn::blobFromImage(frame, blob, 1/255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(0,0,0),
true,
false);
126 std::vector<cv::Mat> outs;
127 net.forward(outs, getOutputsNames(net));
130 postprocess(frame.size(), outs, frameId);
136 void CVObjectDetection::postprocess(
const cv::Size &frameDims,
const std::vector<cv::Mat>& outs,
size_t frameId)
138 std::vector<int> classIds;
139 std::vector<float> confidences;
140 std::vector<cv::Rect> boxes;
141 std::vector<int> objectIds;
143 for (
size_t i = 0; i < outs.size(); ++i)
148 float* data = (
float*)outs[i].data;
149 for (
int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
151 cv::Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
152 cv::Point classIdPoint;
155 cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
156 if (confidence > confThreshold)
158 int centerX = (int)(data[0] * frameDims.width);
159 int centerY = (int)(data[1] * frameDims.height);
160 int width = (int)(data[2] * frameDims.width);
161 int height = (int)(data[3] * frameDims.height);
162 int left = centerX - width / 2;
163 int top = centerY - height / 2;
165 classIds.push_back(classIdPoint.x);
166 confidences.push_back((
float)confidence);
167 boxes.push_back(cv::Rect(left, top, width, height));
174 std::vector<int> indices;
175 cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
178 std::vector<cv::Rect> sortBoxes;
179 for(
auto box : boxes)
180 sortBoxes.push_back(box);
181 sort.
update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), confidences, classIds);
184 boxes.clear(); confidences.clear(); classIds.clear(); objectIds.clear();
187 if(TBox.frame == frameId){
188 boxes.push_back(TBox.box);
189 confidences.push_back(TBox.confidence);
190 classIds.push_back(TBox.classId);
191 objectIds.push_back(TBox.id);
196 for(uint i = 0; i<boxes.size(); i++){
197 for(uint j = i+1; j<boxes.size(); j++){
198 int xc_1 = boxes[i].x + (int)(boxes[i].width/2), yc_1 = boxes[i].y + (int)(boxes[i].width/2);
199 int xc_2 = boxes[j].x + (int)(boxes[j].width/2), yc_2 = boxes[j].y + (int)(boxes[j].width/2);
201 if(fabs(xc_1 - xc_2) < 10 && fabs(yc_1 - yc_2) < 10){
202 if(classIds[i] == classIds[j]){
203 if(confidences[i] >= confidences[j]){
204 boxes.erase(boxes.begin() + j);
205 classIds.erase(classIds.begin() + j);
206 confidences.erase(confidences.begin() + j);
207 objectIds.erase(objectIds.begin() + j);
211 boxes.erase(boxes.begin() + i);
212 classIds.erase(classIds.begin() + i);
213 confidences.erase(confidences.begin() + i);
214 objectIds.erase(objectIds.begin() + i);
224 for(uint i = 0; i<boxes.size(); i++){
225 for(uint j = i+1; j<boxes.size(); j++){
227 if( iou(boxes[i], boxes[j])){
228 if(classIds[i] == classIds[j]){
229 if(confidences[i] >= confidences[j]){
230 boxes.erase(boxes.begin() + j);
231 classIds.erase(classIds.begin() + j);
232 confidences.erase(confidences.begin() + j);
233 objectIds.erase(objectIds.begin() + j);
237 boxes.erase(boxes.begin() + i);
238 classIds.erase(classIds.begin() + i);
239 confidences.erase(confidences.begin() + i);
240 objectIds.erase(objectIds.begin() + i);
250 std::vector<cv::Rect_<float>> normalized_boxes;
251 for(
auto box : boxes){
252 cv::Rect_<float> normalized_box;
253 normalized_box.x = (box.x)/(
float)frameDims.width;
254 normalized_box.y = (box.y)/(
float)frameDims.height;
255 normalized_box.width = (box.width)/(
float)frameDims.width;
256 normalized_box.height = (box.height)/(
float)frameDims.height;
257 normalized_boxes.push_back(normalized_box);
264 bool CVObjectDetection::iou(cv::Rect pred_box, cv::Rect sort_box){
266 int xA = std::max(pred_box.x, sort_box.x);
267 int yA = std::max(pred_box.y, sort_box.y);
268 int xB = std::min(pred_box.x + pred_box.width, sort_box.x + sort_box.width);
269 int yB = std::min(pred_box.y + pred_box.height, sort_box.y + sort_box.height);
272 int interArea = std::max(0, xB - xA + 1) * std::max(0, yB - yA + 1);
275 int boxAArea = (pred_box.width + 1) * (pred_box.height + 1);
276 int boxBArea = (sort_box.width + 1) * (sort_box.height + 1);
279 float iou = interArea / (float)(boxAArea + boxBArea - interArea);
288 std::vector<cv::String> CVObjectDetection::getOutputsNames(
const cv::dnn::Net& net)
290 static std::vector<cv::String> names;
293 std::vector<int> outLayers = net.getUnconnectedOutLayers();
296 std::vector<cv::String> layersNames = net.getLayerNames();
299 names.resize(outLayers.size());
300 for (
size_t i = 0; i < outLayers.size(); ++i)
301 names[i] = layersNames[outLayers[i] - 1];
318 pb_objdetect::ObjDetect objMessage;
321 for(
int i = 0; i<classNames.size(); i++){
322 std::string* className = objMessage.add_classnames();
323 className->assign(classNames.at(i));
329 pb_objdetect::Frame* pbFrameData;
334 *objMessage.mutable_last_updated() = TimeUtil::SecondsToTimestamp(time(NULL));
338 std::fstream output(protobuf_data_path, ios::out | ios::trunc | ios::binary);
339 if (!objMessage.SerializeToOstream(&output)) {
340 cerr <<
"Failed to write protobuf message." << endl;
346 google::protobuf::ShutdownProtobufLibrary();
356 pbFrameData->set_id(dData.
frameId);
358 for(
size_t i = 0; i < dData.
boxes.size(); i++){
359 pb_objdetect::Frame_Box* box = pbFrameData->add_bounding_box();
362 box->set_x(dData.
boxes.at(i).x);
363 box->set_y(dData.
boxes.at(i).y);
364 box->set_w(dData.
boxes.at(i).width);
365 box->set_h(dData.
boxes.at(i).height);
366 box->set_classid(dData.
classIds.at(i));
368 box->set_objectid(dData.
objectIds.at(i));
383 catch (
const std::exception& e)
387 std::cout<<
"JSON is invalid (missing keys or invalid data types)"<<std::endl;
395 if (!root[
"protobuf_data_path"].isNull()){
396 protobuf_data_path = (root[
"protobuf_data_path"].asString());
398 if (!root[
"processing-device"].isNull()){
399 processingDevice = (root[
"processing-device"].asString());
401 if (!root[
"model-config"].isNull()){
402 modelConfiguration = (root[
"model-config"].asString());
403 std::ifstream infile(modelConfiguration);
405 processingController->
SetError(
true,
"Incorrect path to model config file");
410 if (!root[
"model-weights"].isNull()){
411 modelWeights= (root[
"model-weights"].asString());
412 std::ifstream infile(modelWeights);
414 processingController->
SetError(
true,
"Incorrect path to model weight file");
419 if (!root[
"class-names"].isNull()){
420 classesFile = (root[
"class-names"].asString());
422 std::ifstream infile(classesFile);
424 processingController->
SetError(
true,
"Incorrect path to class name file");
440 pb_objdetect::ObjDetect objMessage;
444 fstream input(protobuf_data_path, ios::in | ios::binary);
445 if (!objMessage.ParseFromIstream(&input)) {
446 cerr <<
"Failed to parse protobuf message." << endl;
455 for(
int i = 0; i < objMessage.classnames_size(); i++){
456 classNames.push_back(objMessage.classnames(i));
460 for (
size_t i = 0; i < objMessage.frame_size(); i++) {
462 const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);
465 size_t id = pbFrameData.id();
468 const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();
471 std::vector<int> classIds;
472 std::vector<float> confidences;
473 std::vector<cv::Rect_<float>> boxes;
474 std::vector<int> objectIds;
476 for(
int i = 0; i < pbFrameData.bounding_box_size(); i++){
478 float x = pBox.Get(i).x();
float y = pBox.Get(i).y();
479 float w = pBox.Get(i).w();
float h = pBox.Get(i).h();
481 cv::Rect_<float> box(x, y, w, h);
484 int classId = pBox.Get(i).classid();
float confidence = pBox.Get(i).confidence();
486 int objectId = pBox.Get(i).objectid();
489 boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence);
497 google::protobuf::ShutdownProtobufLibrary();
Header file for CVObjectDetection class.
void SetError(bool err, std::string message)
std::vector< TrackingBox > frameTrackingResult
void update(std::vector< cv::Rect > detection, int frame_count, double image_diagonal, std::vector< float > confidences, std::vector< int > classIds)
void SetJsonValue(const Json::Value root)
Load Json::Value into this object.
bool _LoadObjDetectdData()
void detectObjectsClip(openshot::Clip &video, size_t start=0, size_t end=0, bool process_interval=false)
void AddFrameDataToProto(pb_objdetect::Frame *pbFrameData, CVDetectionData &dData)
CVDetectionData GetDetectionData(size_t frameId)
std::map< size_t, CVDetectionData > detectionsData
void SetJson(const std::string value)
Load JSON string into this object.
bool SaveObjDetectedData()
Protobuf Save and Load methods.
float Start() const
Get start position (in seconds) of clip (trim start of video)
This class represents a clip (used to arrange readers on the timeline)
void Open() override
Open the internal reader.
float End() const
Get end position (in seconds) of clip (trim end of video), which can be affected by the time curve.
std::shared_ptr< openshot::Frame > GetFrame(int64_t frame_number) override
Get an openshot::Frame object for a specific frame number of this clip. The image size and number of ...
void Reader(openshot::ReaderBase *new_reader)
Set the current reader.
This namespace is the default namespace for all code in the openshot library.
const Json::Value stringToJson(const std::string value)
std::vector< int > objectIds
std::vector< cv::Rect_< float > > boxes
std::vector< int > classIds
std::vector< float > confidences