1 #include "opencv2/core.hpp" 2 #include "opencv2/core/utility.hpp" 3 4 using cv::Size; 5 using cv::Mat; 6 using cv::Point; 7 using cv::FileStorage; 8 using cv::Rect; 9 using cv::Ptr; 10 using cv::FileNode; 11 using cv::Mat_; 12 using cv::Range; 13 using cv::FileNodeIterator; 14 using cv::ParallelLoopBody; 15 16 17 using cv::Size; 18 using cv::Mat; 19 using cv::Point; 20 using cv::FileStorage; 21 using cv::Rect; 22 using cv::Ptr; 23 using cv::FileNode; 24 using cv::Mat_; 25 using cv::Range; 26 using cv::FileNodeIterator; 27 using cv::ParallelLoopBody; 28 29 30 #include "boost.h" 31 #include "cascadeclassifier.h" 32 #include <queue> 33 #include "cxmisc.h" 34 35 #include "cvconfig.h" 36 #ifdef HAVE_TBB 37 # include "tbb/tbb_stddef.h" 38 # if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202 39 # include "tbb/tbb.h" 40 # include "tbb/task.h" 41 # undef min 42 # undef max 43 # else 44 # undef HAVE_TBB 45 # endif 46 #endif 47 48 #ifdef HAVE_TBB 49 typedef tbb::blocked_range<int> BlockedRange; 50 51 template<typename Body> static inline 52 void parallel_for( const BlockedRange& range, const Body& body ) 53 { 54 tbb::parallel_for(range, body); 55 } 56 #else 57 class BlockedRange 58 { 59 public: 60 BlockedRange() : _begin(0), _end(0), _grainsize(0) {} 61 BlockedRange(int b, int e, int g=1) : _begin(b), _end(e), _grainsize(g) {} 62 int begin() const { return _begin; } 63 int end() const { return _end; } 64 int grainsize() const { return _grainsize; } 65 66 protected: 67 int _begin, _end, _grainsize; 68 }; 69 70 template<typename Body> static inline 71 void parallel_for( const BlockedRange& range, const Body& body ) 72 { 73 body(range); 74 } 75 #endif 76 77 using namespace std; 78 79 static inline double 80 logRatio( double val ) 81 { 82 const double eps = 1e-5; 83 84 val = max( val, eps ); 85 val = min( val, 1. - eps ); 86 return log( val/(1. - val) ); 87 } 88 89 template<typename T, typename Idx> 90 class LessThanIdx 91 { 92 public: 93 LessThanIdx( const T* _arr ) : arr(_arr) {} 94 bool operator()(Idx a, Idx b) const { return arr[a] < arr[b]; } 95 const T* arr; 96 }; 97 98 static inline int cvAlign( int size, int align ) 99 { 100 CV_DbgAssert( (align & (align-1)) == 0 && size < INT_MAX ); 101 return (size + align - 1) & -align; 102 } 103 104 #define CV_THRESHOLD_EPS (0.00001F) 105 106 static const int MinBlockSize = 1 << 16; 107 static const int BlockSizeDelta = 1 << 10; 108 109 // TODO remove this code duplication with ml/precomp.hpp 110 111 static int CV_CDECL icvCmpIntegers( const void* a, const void* b ) 112 { 113 return *(const int*)a - *(const int*)b; 114 } 115 116 static CvMat* cvPreprocessIndexArray( const CvMat* idx_arr, int data_arr_size, bool check_for_duplicates=false ) 117 { 118 CvMat* idx = 0; 119 120 CV_FUNCNAME( "cvPreprocessIndexArray" ); 121 122 __CV_BEGIN__; 123 124 int i, idx_total, idx_selected = 0, step, type, prev = INT_MIN, is_sorted = 1; 125 uchar* srcb = 0; 126 int* srci = 0; 127 int* dsti; 128 129 if( !CV_IS_MAT(idx_arr) ) 130 CV_ERROR( CV_StsBadArg, "Invalid index array" ); 131 132 if( idx_arr->rows != 1 && idx_arr->cols != 1 ) 133 CV_ERROR( CV_StsBadSize, "the index array must be 1-dimensional" ); 134 135 idx_total = idx_arr->rows + idx_arr->cols - 1; 136 srcb = idx_arr->data.ptr; 137 srci = idx_arr->data.i; 138 139 type = CV_MAT_TYPE(idx_arr->type); 140 step = CV_IS_MAT_CONT(idx_arr->type) ? 1 : idx_arr->step/CV_ELEM_SIZE(type); 141 142 switch( type ) 143 { 144 case CV_8UC1: 145 case CV_8SC1: 146 // idx_arr is array of 1's and 0's - 147 // i.e. it is a mask of the selected components 148 if( idx_total != data_arr_size ) 149 CV_ERROR( CV_StsUnmatchedSizes, 150 "Component mask should contain as many elements as the total number of input variables" ); 151 152 for( i = 0; i < idx_total; i++ ) 153 idx_selected += srcb[i*step] != 0; 154 155 if( idx_selected == 0 ) 156 CV_ERROR( CV_StsOutOfRange, "No components/input_variables is selected!" ); 157 158 break; 159 case CV_32SC1: 160 // idx_arr is array of integer indices of selected components 161 if( idx_total > data_arr_size ) 162 CV_ERROR( CV_StsOutOfRange, 163 "index array may not contain more elements than the total number of input variables" ); 164 idx_selected = idx_total; 165 // check if sorted already 166 for( i = 0; i < idx_total; i++ ) 167 { 168 int val = srci[i*step]; 169 if( val >= prev ) 170 { 171 is_sorted = 0; 172 break; 173 } 174 prev = val; 175 } 176 break; 177 default: 178 CV_ERROR( CV_StsUnsupportedFormat, "Unsupported index array data type " 179 "(it should be 8uC1, 8sC1 or 32sC1)" ); 180 } 181 182 CV_CALL( idx = cvCreateMat( 1, idx_selected, CV_32SC1 )); 183 dsti = idx->data.i; 184 185 if( type < CV_32SC1 ) 186 { 187 for( i = 0; i < idx_total; i++ ) 188 if( srcb[i*step] ) 189 *dsti++ = i; 190 } 191 else 192 { 193 for( i = 0; i < idx_total; i++ ) 194 dsti[i] = srci[i*step]; 195 196 if( !is_sorted ) 197 qsort( dsti, idx_total, sizeof(dsti[0]), icvCmpIntegers ); 198 199 if( dsti[0] < 0 || dsti[idx_total-1] >= data_arr_size ) 200 CV_ERROR( CV_StsOutOfRange, "the index array elements are out of range" ); 201 202 if( check_for_duplicates ) 203 { 204 for( i = 1; i < idx_total; i++ ) 205 if( dsti[i] <= dsti[i-1] ) 206 CV_ERROR( CV_StsBadArg, "There are duplicated index array elements" ); 207 } 208 } 209 210 __CV_END__; 211 212 if( cvGetErrStatus() < 0 ) 213 cvReleaseMat( &idx ); 214 215 return idx; 216 } 217 218 //----------------------------- CascadeBoostParams ------------------------------------------------- 219 220 CvCascadeBoostParams::CvCascadeBoostParams() : minHitRate( 0.995F), maxFalseAlarm( 0.5F ) 221 { 222 boost_type = CvBoost::GENTLE; 223 use_surrogates = use_1se_rule = truncate_pruned_tree = false; 224 } 225 226 CvCascadeBoostParams::CvCascadeBoostParams( int _boostType, 227 float _minHitRate, float _maxFalseAlarm, 228 double _weightTrimRate, int _maxDepth, int _maxWeakCount ) : 229 CvBoostParams( _boostType, _maxWeakCount, _weightTrimRate, _maxDepth, false, 0 ) 230 { 231 boost_type = CvBoost::GENTLE; 232 minHitRate = _minHitRate; 233 maxFalseAlarm = _maxFalseAlarm; 234 use_surrogates = use_1se_rule = truncate_pruned_tree = false; 235 } 236 237 void CvCascadeBoostParams::write( FileStorage &fs ) const 238 { 239 string boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST : 240 boost_type == CvBoost::REAL ? CC_REAL_BOOST : 241 boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST : 242 boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : string(); 243 CV_Assert( !boostTypeStr.empty() ); 244 fs << CC_BOOST_TYPE << boostTypeStr; 245 fs << CC_MINHITRATE << minHitRate; 246 fs << CC_MAXFALSEALARM << maxFalseAlarm; 247 fs << CC_TRIM_RATE << weight_trim_rate; 248 fs << CC_MAX_DEPTH << max_depth; 249 fs << CC_WEAK_COUNT << weak_count; 250 } 251 252 bool CvCascadeBoostParams::read( const FileNode &node ) 253 { 254 string boostTypeStr; 255 FileNode rnode = node[CC_BOOST_TYPE]; 256 rnode >> boostTypeStr; 257 boost_type = !boostTypeStr.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE : 258 !boostTypeStr.compare( CC_REAL_BOOST ) ? CvBoost::REAL : 259 !boostTypeStr.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT : 260 !boostTypeStr.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1; 261 if (boost_type == -1) 262 CV_Error( CV_StsBadArg, "unsupported Boost type" ); 263 node[CC_MINHITRATE] >> minHitRate; 264 node[CC_MAXFALSEALARM] >> maxFalseAlarm; 265 node[CC_TRIM_RATE] >> weight_trim_rate ; 266 node[CC_MAX_DEPTH] >> max_depth ; 267 node[CC_WEAK_COUNT] >> weak_count ; 268 if ( minHitRate <= 0 || minHitRate > 1 || 269 maxFalseAlarm <= 0 || maxFalseAlarm > 1 || 270 weight_trim_rate <= 0 || weight_trim_rate > 1 || 271 max_depth <= 0 || weak_count <= 0 ) 272 CV_Error( CV_StsBadArg, "bad parameters range"); 273 return true; 274 } 275 276 void CvCascadeBoostParams::printDefaults() const 277 { 278 cout << "--boostParams--" << endl; 279 cout << " [-bt <{" << CC_DISCRETE_BOOST << ", " 280 << CC_REAL_BOOST << ", " 281 << CC_LOGIT_BOOST ", " 282 << CC_GENTLE_BOOST << "(default)}>]" << endl; 283 cout << " [-minHitRate <min_hit_rate> = " << minHitRate << ">]" << endl; 284 cout << " [-maxFalseAlarmRate <max_false_alarm_rate = " << maxFalseAlarm << ">]" << endl; 285 cout << " [-weightTrimRate <weight_trim_rate = " << weight_trim_rate << ">]" << endl; 286 cout << " [-maxDepth <max_depth_of_weak_tree = " << max_depth << ">]" << endl; 287 cout << " [-maxWeakCount <max_weak_tree_count = " << weak_count << ">]" << endl; 288 } 289 290 void CvCascadeBoostParams::printAttrs() const 291 { 292 string boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST : 293 boost_type == CvBoost::REAL ? CC_REAL_BOOST : 294 boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST : 295 boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : string(); 296 CV_Assert( !boostTypeStr.empty() ); 297 cout << "boostType: " << boostTypeStr << endl; 298 cout << "minHitRate: " << minHitRate << endl; 299 cout << "maxFalseAlarmRate: " << maxFalseAlarm << endl; 300 cout << "weightTrimRate: " << weight_trim_rate << endl; 301 cout << "maxDepth: " << max_depth << endl; 302 cout << "maxWeakCount: " << weak_count << endl; 303 } 304 305 bool CvCascadeBoostParams::scanAttr( const string prmName, const string val) 306 { 307 bool res = true; 308 309 if( !prmName.compare( "-bt" ) ) 310 { 311 boost_type = !val.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE : 312 !val.compare( CC_REAL_BOOST ) ? CvBoost::REAL : 313 !val.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT : 314 !val.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1; 315 if (boost_type == -1) 316 res = false; 317 } 318 else if( !prmName.compare( "-minHitRate" ) ) 319 { 320 minHitRate = (float) atof( val.c_str() ); 321 } 322 else if( !prmName.compare( "-maxFalseAlarmRate" ) ) 323 { 324 maxFalseAlarm = (float) atof( val.c_str() ); 325 } 326 else if( !prmName.compare( "-weightTrimRate" ) ) 327 { 328 weight_trim_rate = (float) atof( val.c_str() ); 329 } 330 else if( !prmName.compare( "-maxDepth" ) ) 331 { 332 max_depth = atoi( val.c_str() ); 333 } 334 else if( !prmName.compare( "-maxWeakCount" ) ) 335 { 336 weak_count = atoi( val.c_str() ); 337 } 338 else 339 res = false; 340 341 return res; 342 } 343 344 CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_idx ) 345 { 346 CvDTreeNode* root = 0; 347 CvMat* isubsample_idx = 0; 348 CvMat* subsample_co = 0; 349 350 bool isMakeRootCopy = true; 351 352 if( !data_root ) 353 CV_Error( CV_StsError, "No training data has been set" ); 354 355 if( _subsample_idx ) 356 { 357 CV_Assert( (isubsample_idx = cvPreprocessIndexArray( _subsample_idx, sample_count )) != 0 ); 358 359 if( isubsample_idx->cols + isubsample_idx->rows - 1 == sample_count ) 360 { 361 const int* sidx = isubsample_idx->data.i; 362 for( int i = 0; i < sample_count; i++ ) 363 { 364 if( sidx[i] != i ) 365 { 366 isMakeRootCopy = false; 367 break; 368 } 369 } 370 } 371 else 372 isMakeRootCopy = false; 373 } 374 375 if( isMakeRootCopy ) 376 { 377 // make a copy of the root node 378 CvDTreeNode temp; 379 int i; 380 root = new_node( 0, 1, 0, 0 ); 381 temp = *root; 382 *root = *data_root; 383 root->num_valid = temp.num_valid; 384 if( root->num_valid ) 385 { 386 for( i = 0; i < var_count; i++ ) 387 root->num_valid[i] = data_root->num_valid[i]; 388 } 389 root->cv_Tn = temp.cv_Tn; 390 root->cv_node_risk = temp.cv_node_risk; 391 root->cv_node_error = temp.cv_node_error; 392 } 393 else 394 { 395 int* sidx = isubsample_idx->data.i; 396 // co - array of count/offset pairs (to handle duplicated values in _subsample_idx) 397 int* co, cur_ofs = 0; 398 int workVarCount = get_work_var_count(); 399 int count = isubsample_idx->rows + isubsample_idx->cols - 1; 400 401 root = new_node( 0, count, 1, 0 ); 402 403 CV_Assert( (subsample_co = cvCreateMat( 1, sample_count*2, CV_32SC1 )) != 0); 404 cvZero( subsample_co ); 405 co = subsample_co->data.i; 406 for( int i = 0; i < count; i++ ) 407 co[sidx[i]*2]++; 408 for( int i = 0; i < sample_count; i++ ) 409 { 410 if( co[i*2] ) 411 { 412 co[i*2+1] = cur_ofs; 413 cur_ofs += co[i*2]; 414 } 415 else 416 co[i*2+1] = -1; 417 } 418 419 cv::AutoBuffer<uchar> inn_buf(sample_count*(2*sizeof(int) + sizeof(float))); 420 // subsample ordered variables 421 for( int vi = 0; vi < numPrecalcIdx; vi++ ) 422 { 423 int ci = get_var_type(vi); 424 CV_Assert( ci < 0 ); 425 426 int *src_idx_buf = (int*)(uchar*)inn_buf; 427 float *src_val_buf = (float*)(src_idx_buf + sample_count); 428 int* sample_indices_buf = (int*)(src_val_buf + sample_count); 429 const int* src_idx = 0; 430 const float* src_val = 0; 431 get_ord_var_data( data_root, vi, src_val_buf, src_idx_buf, &src_val, &src_idx, sample_indices_buf ); 432 433 int j = 0, idx, count_i; 434 int num_valid = data_root->get_num_valid(vi); 435 CV_Assert( num_valid == sample_count ); 436 437 if (is_buf_16u) 438 { 439 unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() + 440 (size_t)vi*sample_count + data_root->offset); 441 for( int i = 0; i < num_valid; i++ ) 442 { 443 idx = src_idx[i]; 444 count_i = co[idx*2]; 445 if( count_i ) 446 for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ ) 447 udst_idx[j] = (unsigned short)cur_ofs; 448 } 449 } 450 else 451 { 452 int* idst_idx = buf->data.i + root->buf_idx*get_length_subbuf() + 453 (size_t)vi*sample_count + root->offset; 454 for( int i = 0; i < num_valid; i++ ) 455 { 456 idx = src_idx[i]; 457 count_i = co[idx*2]; 458 if( count_i ) 459 for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ ) 460 idst_idx[j] = cur_ofs; 461 } 462 } 463 } 464 465 // subsample cv_lables 466 const int* src_lbls = get_cv_labels(data_root, (int*)(uchar*)inn_buf); 467 if (is_buf_16u) 468 { 469 unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() + 470 (size_t)(workVarCount-1)*sample_count + root->offset); 471 for( int i = 0; i < count; i++ ) 472 udst[i] = (unsigned short)src_lbls[sidx[i]]; 473 } 474 else 475 { 476 int* idst = buf->data.i + root->buf_idx*get_length_subbuf() + 477 (size_t)(workVarCount-1)*sample_count + root->offset; 478 for( int i = 0; i < count; i++ ) 479 idst[i] = src_lbls[sidx[i]]; 480 } 481 482 // subsample sample_indices 483 const int* sample_idx_src = get_sample_indices(data_root, (int*)(uchar*)inn_buf); 484 if (is_buf_16u) 485 { 486 unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() + 487 (size_t)workVarCount*sample_count + root->offset); 488 for( int i = 0; i < count; i++ ) 489 sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]]; 490 } 491 else 492 { 493 int* sample_idx_dst = buf->data.i + root->buf_idx*get_length_subbuf() + 494 (size_t)workVarCount*sample_count + root->offset; 495 for( int i = 0; i < count; i++ ) 496 sample_idx_dst[i] = sample_idx_src[sidx[i]]; 497 } 498 499 for( int vi = 0; vi < var_count; vi++ ) 500 root->set_num_valid(vi, count); 501 } 502 503 cvReleaseMat( &isubsample_idx ); 504 cvReleaseMat( &subsample_co ); 505 506 return root; 507 } 508 509 //---------------------------- CascadeBoostTrainData ----------------------------- 510 511 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator, 512 const CvDTreeParams& _params ) 513 { 514 is_classifier = true; 515 var_all = var_count = (int)_featureEvaluator->getNumFeatures(); 516 517 featureEvaluator = _featureEvaluator; 518 shared = true; 519 set_params( _params ); 520 max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() ); 521 var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 ); 522 if ( featureEvaluator->getMaxCatCount() > 0 ) 523 { 524 numPrecalcIdx = 0; 525 cat_var_count = var_count; 526 ord_var_count = 0; 527 for( int vi = 0; vi < var_count; vi++ ) 528 { 529 var_type->data.i[vi] = vi; 530 } 531 } 532 else 533 { 534 cat_var_count = 0; 535 ord_var_count = var_count; 536 for( int vi = 1; vi <= var_count; vi++ ) 537 { 538 var_type->data.i[vi-1] = -vi; 539 } 540 } 541 var_type->data.i[var_count] = cat_var_count; 542 var_type->data.i[var_count+1] = cat_var_count+1; 543 544 int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) + (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*)); 545 int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize); 546 treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize); 547 tree_storage = cvCreateMemStorage( treeBlockSize ); 548 node_heap = cvCreateSet( 0, sizeof(node_heap[0]), sizeof(CvDTreeNode), tree_storage ); 549 split_heap = cvCreateSet( 0, sizeof(split_heap[0]), maxSplitSize, tree_storage ); 550 } 551 552 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator, 553 int _numSamples, 554 int _precalcValBufSize, int _precalcIdxBufSize, 555 const CvDTreeParams& _params ) 556 { 557 setData( _featureEvaluator, _numSamples, _precalcValBufSize, _precalcIdxBufSize, _params ); 558 } 559 560 void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluator, 561 int _numSamples, 562 int _precalcValBufSize, int _precalcIdxBufSize, 563 const CvDTreeParams& _params ) 564 { 565 int* idst = 0; 566 unsigned short* udst = 0; 567 568 uint64 effective_buf_size = 0; 569 int effective_buf_height = 0, effective_buf_width = 0; 570 571 572 clear(); 573 shared = true; 574 have_labels = true; 575 have_priors = false; 576 is_classifier = true; 577 578 rng = &cv::theRNG(); 579 580 set_params( _params ); 581 582 CV_Assert( _featureEvaluator ); 583 featureEvaluator = _featureEvaluator; 584 585 max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() ); 586 _resp = featureEvaluator->getCls(); 587 responses = &_resp; 588 // TODO: check responses: elements must be 0 or 1 589 590 if( _precalcValBufSize < 0 || _precalcIdxBufSize < 0) 591 CV_Error( CV_StsOutOfRange, "_numPrecalcVal and _numPrecalcIdx must be positive or 0" ); 592 593 var_count = var_all = featureEvaluator->getNumFeatures() * featureEvaluator->getFeatureSize(); 594 sample_count = _numSamples; 595 596 is_buf_16u = false; 597 if (sample_count < 65536) 598 is_buf_16u = true; 599 600 numPrecalcVal = min( cvRound((double)_precalcValBufSize*1048576. / (sizeof(float)*sample_count)), var_count ); 601 numPrecalcIdx = min( cvRound((double)_precalcIdxBufSize*1048576. / 602 ((is_buf_16u ? sizeof(unsigned short) : sizeof (int))*sample_count)), var_count ); 603 604 assert( numPrecalcIdx >= 0 && numPrecalcVal >= 0 ); 605 606 valCache.create( numPrecalcVal, sample_count, CV_32FC1 ); 607 var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 ); 608 609 if ( featureEvaluator->getMaxCatCount() > 0 ) 610 { 611 numPrecalcIdx = 0; 612 cat_var_count = var_count; 613 ord_var_count = 0; 614 for( int vi = 0; vi < var_count; vi++ ) 615 { 616 var_type->data.i[vi] = vi; 617 } 618 } 619 else 620 { 621 cat_var_count = 0; 622 ord_var_count = var_count; 623 for( int vi = 1; vi <= var_count; vi++ ) 624 { 625 var_type->data.i[vi-1] = -vi; 626 } 627 } 628 var_type->data.i[var_count] = cat_var_count; 629 var_type->data.i[var_count+1] = cat_var_count+1; 630 work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1/*cv_lables*/; 631 buf_count = 2; 632 633 buf_size = -1; // the member buf_size is obsolete 634 635 effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated 636 effective_buf_width = sample_count; 637 effective_buf_height = work_var_count+1; 638 639 if (effective_buf_width >= effective_buf_height) 640 effective_buf_height *= buf_count; 641 else 642 effective_buf_width *= buf_count; 643 644 if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size) 645 { 646 CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit"); 647 } 648 649 if ( is_buf_16u ) 650 buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 ); 651 else 652 buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 ); 653 654 cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC1 ); 655 656 // precalculate valCache and set indices in buf 657 precalculate(); 658 659 // now calculate the maximum size of split, 660 // create memory storage that will keep nodes and splits of the decision tree 661 // allocate root node and the buffer for the whole training data 662 int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) + 663 (MAX(0,sample_count - 33)/32)*sizeof(int),sizeof(void*)); 664 int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize); 665 treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize); 666 tree_storage = cvCreateMemStorage( treeBlockSize ); 667 node_heap = cvCreateSet( 0, sizeof(*node_heap), sizeof(CvDTreeNode), tree_storage ); 668 669 int nvSize = var_count*sizeof(int); 670 nvSize = cvAlign(MAX( nvSize, (int)sizeof(CvSetElem) ), sizeof(void*)); 671 int tempBlockSize = nvSize; 672 tempBlockSize = MAX( tempBlockSize + BlockSizeDelta, MinBlockSize ); 673 temp_storage = cvCreateMemStorage( tempBlockSize ); 674 nv_heap = cvCreateSet( 0, sizeof(*nv_heap), nvSize, temp_storage ); 675 676 data_root = new_node( 0, sample_count, 0, 0 ); 677 678 // set sample labels 679 if (is_buf_16u) 680 udst = (unsigned short*)(buf->data.s + (size_t)work_var_count*sample_count); 681 else 682 idst = buf->data.i + (size_t)work_var_count*sample_count; 683 684 for (int si = 0; si < sample_count; si++) 685 { 686 if (udst) 687 udst[si] = (unsigned short)si; 688 else 689 idst[si] = si; 690 } 691 for( int vi = 0; vi < var_count; vi++ ) 692 data_root->set_num_valid(vi, sample_count); 693 for( int vi = 0; vi < cat_var_count; vi++ ) 694 cat_count->data.i[vi] = max_c_count; 695 696 cat_count->data.i[cat_var_count] = 2; 697 698 maxSplitSize = cvAlign(sizeof(CvDTreeSplit) + 699 (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*)); 700 split_heap = cvCreateSet( 0, sizeof(*split_heap), maxSplitSize, tree_storage ); 701 702 priors = cvCreateMat( 1, get_num_classes(), CV_64F ); 703 cvSet(priors, cvScalar(1)); 704 priors_mult = cvCloneMat( priors ); 705 counts = cvCreateMat( 1, get_num_classes(), CV_32SC1 ); 706 direction = cvCreateMat( 1, sample_count, CV_8UC1 ); 707 split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );//TODO: make a pointer 708 } 709 710 void CvCascadeBoostTrainData::free_train_data() 711 { 712 CvDTreeTrainData::free_train_data(); 713 valCache.release(); 714 } 715 716 const int* CvCascadeBoostTrainData::get_class_labels( CvDTreeNode* n, int* labelsBuf) 717 { 718 int nodeSampleCount = n->sample_count; 719 int rStep = CV_IS_MAT_CONT( responses->type ) ? 1 : responses->step / CV_ELEM_SIZE( responses->type ); 720 721 int* sampleIndicesBuf = labelsBuf; // 722 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf); 723 for( int si = 0; si < nodeSampleCount; si++ ) 724 { 725 int sidx = sampleIndices[si]; 726 labelsBuf[si] = (int)responses->data.fl[sidx*rStep]; 727 } 728 return labelsBuf; 729 } 730 731 const int* CvCascadeBoostTrainData::get_sample_indices( CvDTreeNode* n, int* indicesBuf ) 732 { 733 return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count(), indicesBuf ); 734 } 735 736 const int* CvCascadeBoostTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf ) 737 { 738 return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count() - 1, labels_buf ); 739 } 740 741 void CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* sortedIndicesBuf, 742 const float** ordValues, const int** sortedIndices, int* sampleIndicesBuf ) 743 { 744 int nodeSampleCount = n->sample_count; 745 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf); 746 747 if ( vi < numPrecalcIdx ) 748 { 749 if( !is_buf_16u ) 750 *sortedIndices = buf->data.i + n->buf_idx*get_length_subbuf() + (size_t)vi*sample_count + n->offset; 751 else 752 { 753 const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() + 754 (size_t)vi*sample_count + n->offset ); 755 for( int i = 0; i < nodeSampleCount; i++ ) 756 sortedIndicesBuf[i] = shortIndices[i]; 757 758 *sortedIndices = sortedIndicesBuf; 759 } 760 761 if( vi < numPrecalcVal ) 762 { 763 for( int i = 0; i < nodeSampleCount; i++ ) 764 { 765 int idx = (*sortedIndices)[i]; 766 idx = sampleIndices[idx]; 767 ordValuesBuf[i] = valCache.at<float>( vi, idx); 768 } 769 } 770 else 771 { 772 for( int i = 0; i < nodeSampleCount; i++ ) 773 { 774 int idx = (*sortedIndices)[i]; 775 idx = sampleIndices[idx]; 776 ordValuesBuf[i] = (*featureEvaluator)( vi, idx); 777 } 778 } 779 } 780 else // vi >= numPrecalcIdx 781 { 782 cv::AutoBuffer<float> abuf(nodeSampleCount); 783 float* sampleValues = &abuf[0]; 784 785 if ( vi < numPrecalcVal ) 786 { 787 for( int i = 0; i < nodeSampleCount; i++ ) 788 { 789 sortedIndicesBuf[i] = i; 790 sampleValues[i] = valCache.at<float>( vi, sampleIndices[i] ); 791 } 792 } 793 else 794 { 795 for( int i = 0; i < nodeSampleCount; i++ ) 796 { 797 sortedIndicesBuf[i] = i; 798 sampleValues[i] = (*featureEvaluator)( vi, sampleIndices[i]); 799 } 800 } 801 std::sort(sortedIndicesBuf, sortedIndicesBuf + nodeSampleCount, LessThanIdx<float, int>(&sampleValues[0]) ); 802 for( int i = 0; i < nodeSampleCount; i++ ) 803 ordValuesBuf[i] = (&sampleValues[0])[sortedIndicesBuf[i]]; 804 *sortedIndices = sortedIndicesBuf; 805 } 806 807 *ordValues = ordValuesBuf; 808 } 809 810 const int* CvCascadeBoostTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* catValuesBuf ) 811 { 812 int nodeSampleCount = n->sample_count; 813 int* sampleIndicesBuf = catValuesBuf; // 814 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf); 815 816 if ( vi < numPrecalcVal ) 817 { 818 for( int i = 0; i < nodeSampleCount; i++ ) 819 catValuesBuf[i] = (int) valCache.at<float>( vi, sampleIndices[i]); 820 } 821 else 822 { 823 if( vi >= numPrecalcVal && vi < var_count ) 824 { 825 for( int i = 0; i < nodeSampleCount; i++ ) 826 catValuesBuf[i] = (int)(*featureEvaluator)( vi, sampleIndices[i] ); 827 } 828 else 829 { 830 get_cv_labels( n, catValuesBuf ); 831 } 832 } 833 834 return catValuesBuf; 835 } 836 837 float CvCascadeBoostTrainData::getVarValue( int vi, int si ) 838 { 839 if ( vi < numPrecalcVal && !valCache.empty() ) 840 return valCache.at<float>( vi, si ); 841 return (*featureEvaluator)( vi, si ); 842 } 843 844 845 struct FeatureIdxOnlyPrecalc : ParallelLoopBody 846 { 847 FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, int _sample_count, bool _is_buf_16u ) 848 { 849 featureEvaluator = _featureEvaluator; 850 sample_count = _sample_count; 851 udst = (unsigned short*)_buf->data.s; 852 idst = _buf->data.i; 853 is_buf_16u = _is_buf_16u; 854 } 855 void operator()( const Range& range ) const 856 { 857 cv::AutoBuffer<float> valCache(sample_count); 858 float* valCachePtr = (float*)valCache; 859 for ( int fi = range.start; fi < range.end; fi++) 860 { 861 for( int si = 0; si < sample_count; si++ ) 862 { 863 valCachePtr[si] = (*featureEvaluator)( fi, si ); 864 if ( is_buf_16u ) 865 *(udst + (size_t)fi*sample_count + si) = (unsigned short)si; 866 else 867 *(idst + (size_t)fi*sample_count + si) = si; 868 } 869 if ( is_buf_16u ) 870 std::sort(udst + (size_t)fi*sample_count, udst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, unsigned short>(valCachePtr) ); 871 else 872 std::sort(idst + (size_t)fi*sample_count, idst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, int>(valCachePtr) ); 873 } 874 } 875 const CvFeatureEvaluator* featureEvaluator; 876 int sample_count; 877 int* idst; 878 unsigned short* udst; 879 bool is_buf_16u; 880 }; 881 882 struct FeatureValAndIdxPrecalc : ParallelLoopBody 883 { 884 FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u ) 885 { 886 featureEvaluator = _featureEvaluator; 887 valCache = _valCache; 888 sample_count = _sample_count; 889 udst = (unsigned short*)_buf->data.s; 890 idst = _buf->data.i; 891 is_buf_16u = _is_buf_16u; 892 } 893 void operator()( const Range& range ) const 894 { 895 for ( int fi = range.start; fi < range.end; fi++) 896 { 897 for( int si = 0; si < sample_count; si++ ) 898 { 899 valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si ); 900 if ( is_buf_16u ) 901 *(udst + (size_t)fi*sample_count + si) = (unsigned short)si; 902 else 903 *(idst + (size_t)fi*sample_count + si) = si; 904 } 905 if ( is_buf_16u ) 906 std::sort(udst + (size_t)fi*sample_count, udst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, unsigned short>(valCache->ptr<float>(fi)) ); 907 else 908 std::sort(idst + (size_t)fi*sample_count, idst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, int>(valCache->ptr<float>(fi)) ); 909 } 910 } 911 const CvFeatureEvaluator* featureEvaluator; 912 Mat* valCache; 913 int sample_count; 914 int* idst; 915 unsigned short* udst; 916 bool is_buf_16u; 917 }; 918 919 struct FeatureValOnlyPrecalc : ParallelLoopBody 920 { 921 FeatureValOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, Mat* _valCache, int _sample_count ) 922 { 923 featureEvaluator = _featureEvaluator; 924 valCache = _valCache; 925 sample_count = _sample_count; 926 } 927 void operator()( const Range& range ) const 928 { 929 for ( int fi = range.start; fi < range.end; fi++) 930 for( int si = 0; si < sample_count; si++ ) 931 valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si ); 932 } 933 const CvFeatureEvaluator* featureEvaluator; 934 Mat* valCache; 935 int sample_count; 936 }; 937 938 void CvCascadeBoostTrainData::precalculate() 939 { 940 int minNum = MIN( numPrecalcVal, numPrecalcIdx); 941 942 double proctime = -TIME( 0 ); 943 parallel_for_( Range(numPrecalcVal, numPrecalcIdx), 944 FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) ); 945 parallel_for_( Range(0, minNum), 946 FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) ); 947 parallel_for_( Range(minNum, numPrecalcVal), 948 FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) ); 949 cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl; 950 } 951 952 //-------------------------------- CascadeBoostTree ---------------------------------------- 953 954 CvDTreeNode* CvCascadeBoostTree::predict( int sampleIdx ) const 955 { 956 CvDTreeNode* node = root; 957 if( !node ) 958 CV_Error( CV_StsError, "The tree has not been trained yet" ); 959 960 if ( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount() == 0 ) // ordered 961 { 962 while( node->left ) 963 { 964 CvDTreeSplit* split = node->split; 965 float val = ((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx ); 966 node = val <= split->ord.c ? node->left : node->right; 967 } 968 } 969 else // categorical 970 { 971 while( node->left ) 972 { 973 CvDTreeSplit* split = node->split; 974 int c = (int)((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx ); 975 node = CV_DTREE_CAT_DIR(c, split->subset) < 0 ? node->left : node->right; 976 } 977 } 978 return node; 979 } 980 981 void CvCascadeBoostTree::write( FileStorage &fs, const Mat& featureMap ) 982 { 983 int maxCatCount = ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount(); 984 int subsetN = (maxCatCount + 31)/32; 985 queue<CvDTreeNode*> internalNodesQueue; 986 int size = (int)pow( 2.f, (float)ensemble->get_params().max_depth); 987 std::vector<float> leafVals(size); 988 int leafValIdx = 0; 989 int internalNodeIdx = 1; 990 CvDTreeNode* tempNode; 991 992 CV_DbgAssert( root ); 993 internalNodesQueue.push( root ); 994 995 fs << "{"; 996 fs << CC_INTERNAL_NODES << "[:"; 997 while (!internalNodesQueue.empty()) 998 { 999 tempNode = internalNodesQueue.front(); 1000 CV_Assert( tempNode->left ); 1001 if ( !tempNode->left->left && !tempNode->left->right) // left node is leaf 1002 { 1003 leafVals[-leafValIdx] = (float)tempNode->left->value; 1004 fs << leafValIdx-- ; 1005 } 1006 else 1007 { 1008 internalNodesQueue.push( tempNode->left ); 1009 fs << internalNodeIdx++; 1010 } 1011 CV_Assert( tempNode->right ); 1012 if ( !tempNode->right->left && !tempNode->right->right) // right node is leaf 1013 { 1014 leafVals[-leafValIdx] = (float)tempNode->right->value; 1015 fs << leafValIdx--; 1016 } 1017 else 1018 { 1019 internalNodesQueue.push( tempNode->right ); 1020 fs << internalNodeIdx++; 1021 } 1022 int fidx = tempNode->split->var_idx; 1023 fidx = featureMap.empty() ? fidx : featureMap.at<int>(0, fidx); 1024 fs << fidx; 1025 if ( !maxCatCount ) 1026 fs << tempNode->split->ord.c; 1027 else 1028 for( int i = 0; i < subsetN; i++ ) 1029 fs << tempNode->split->subset[i]; 1030 internalNodesQueue.pop(); 1031 } 1032 fs << "]"; // CC_INTERNAL_NODES 1033 1034 fs << CC_LEAF_VALUES << "[:"; 1035 for (int ni = 0; ni < -leafValIdx; ni++) 1036 fs << leafVals[ni]; 1037 fs << "]"; // CC_LEAF_VALUES 1038 fs << "}"; 1039 } 1040 1041 void CvCascadeBoostTree::read( const FileNode &node, CvBoost* _ensemble, 1042 CvDTreeTrainData* _data ) 1043 { 1044 int maxCatCount = ((CvCascadeBoostTrainData*)_data)->featureEvaluator->getMaxCatCount(); 1045 int subsetN = (maxCatCount + 31)/32; 1046 int step = 3 + ( maxCatCount>0 ? subsetN : 1 ); 1047 1048 queue<CvDTreeNode*> internalNodesQueue; 1049 FileNodeIterator internalNodesIt, leafValsuesIt; 1050 CvDTreeNode* prntNode, *cldNode; 1051 1052 clear(); 1053 data = _data; 1054 ensemble = _ensemble; 1055 pruned_tree_idx = 0; 1056 1057 // read tree nodes 1058 FileNode rnode = node[CC_INTERNAL_NODES]; 1059 internalNodesIt = rnode.end(); 1060 leafValsuesIt = node[CC_LEAF_VALUES].end(); 1061 internalNodesIt--; leafValsuesIt--; 1062 for( size_t i = 0; i < rnode.size()/step; i++ ) 1063 { 1064 prntNode = data->new_node( 0, 0, 0, 0 ); 1065 if ( maxCatCount > 0 ) 1066 { 1067 prntNode->split = data->new_split_cat( 0, 0 ); 1068 for( int j = subsetN-1; j>=0; j--) 1069 { 1070 *internalNodesIt >> prntNode->split->subset[j]; internalNodesIt--; 1071 } 1072 } 1073 else 1074 { 1075 float split_value; 1076 *internalNodesIt >> split_value; internalNodesIt--; 1077 prntNode->split = data->new_split_ord( 0, split_value, 0, 0, 0); 1078 } 1079 *internalNodesIt >> prntNode->split->var_idx; internalNodesIt--; 1080 int ridx, lidx; 1081 *internalNodesIt >> ridx; internalNodesIt--; 1082 *internalNodesIt >> lidx;internalNodesIt--; 1083 if ( ridx <= 0) 1084 { 1085 prntNode->right = cldNode = data->new_node( 0, 0, 0, 0 ); 1086 *leafValsuesIt >> cldNode->value; leafValsuesIt--; 1087 cldNode->parent = prntNode; 1088 } 1089 else 1090 { 1091 prntNode->right = internalNodesQueue.front(); 1092 prntNode->right->parent = prntNode; 1093 internalNodesQueue.pop(); 1094 } 1095 1096 if ( lidx <= 0) 1097 { 1098 prntNode->left = cldNode = data->new_node( 0, 0, 0, 0 ); 1099 *leafValsuesIt >> cldNode->value; leafValsuesIt--; 1100 cldNode->parent = prntNode; 1101 } 1102 else 1103 { 1104 prntNode->left = internalNodesQueue.front(); 1105 prntNode->left->parent = prntNode; 1106 internalNodesQueue.pop(); 1107 } 1108 1109 internalNodesQueue.push( prntNode ); 1110 } 1111 1112 root = internalNodesQueue.front(); 1113 internalNodesQueue.pop(); 1114 } 1115 1116 void CvCascadeBoostTree::split_node_data( CvDTreeNode* node ) 1117 { 1118 int n = node->sample_count, nl, nr, scount = data->sample_count; 1119 char* dir = (char*)data->direction->data.ptr; 1120 CvDTreeNode *left = 0, *right = 0; 1121 int* newIdx = data->split_buf->data.i; 1122 int newBufIdx = data->get_child_buf_idx( node ); 1123 int workVarCount = data->get_work_var_count(); 1124 CvMat* buf = data->buf; 1125 size_t length_buf_row = data->get_length_subbuf(); 1126 cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int)+sizeof(float))); 1127 int* tempBuf = (int*)(uchar*)inn_buf; 1128 bool splitInputData; 1129 1130 complete_node_dir(node); 1131 1132 for( int i = nl = nr = 0; i < n; i++ ) 1133 { 1134 int d = dir[i]; 1135 // initialize new indices for splitting ordered variables 1136 newIdx[i] = (nl & (d-1)) | (nr & -d); // d ? ri : li 1137 nr += d; 1138 nl += d^1; 1139 } 1140 1141 node->left = left = data->new_node( node, nl, newBufIdx, node->offset ); 1142 node->right = right = data->new_node( node, nr, newBufIdx, node->offset + nl ); 1143 1144 splitInputData = node->depth + 1 < data->params.max_depth && 1145 (node->left->sample_count > data->params.min_sample_count || 1146 node->right->sample_count > data->params.min_sample_count); 1147 1148 // split ordered variables, keep both halves sorted. 1149 for( int vi = 0; vi < ((CvCascadeBoostTrainData*)data)->numPrecalcIdx; vi++ ) 1150 { 1151 int ci = data->get_var_type(vi); 1152 if( ci >= 0 || !splitInputData ) 1153 continue; 1154 1155 int n1 = node->get_num_valid(vi); 1156 float *src_val_buf = (float*)(tempBuf + n); 1157 int *src_sorted_idx_buf = (int*)(src_val_buf + n); 1158 int *src_sample_idx_buf = src_sorted_idx_buf + n; 1159 const int* src_sorted_idx = 0; 1160 const float* src_val = 0; 1161 data->get_ord_var_data(node, vi, src_val_buf, src_sorted_idx_buf, &src_val, &src_sorted_idx, src_sample_idx_buf); 1162 1163 for(int i = 0; i < n; i++) 1164 tempBuf[i] = src_sorted_idx[i]; 1165 1166 if (data->is_buf_16u) 1167 { 1168 ushort *ldst, *rdst; 1169 ldst = (ushort*)(buf->data.s + left->buf_idx*length_buf_row + 1170 vi*scount + left->offset); 1171 rdst = (ushort*)(ldst + nl); 1172 1173 // split sorted 1174 for( int i = 0; i < n1; i++ ) 1175 { 1176 int idx = tempBuf[i]; 1177 int d = dir[idx]; 1178 idx = newIdx[idx]; 1179 if (d) 1180 { 1181 *rdst = (ushort)idx; 1182 rdst++; 1183 } 1184 else 1185 { 1186 *ldst = (ushort)idx; 1187 ldst++; 1188 } 1189 } 1190 CV_Assert( n1 == n ); 1191 } 1192 else 1193 { 1194 int *ldst, *rdst; 1195 ldst = buf->data.i + left->buf_idx*length_buf_row + 1196 vi*scount + left->offset; 1197 rdst = buf->data.i + right->buf_idx*length_buf_row + 1198 vi*scount + right->offset; 1199 1200 // split sorted 1201 for( int i = 0; i < n1; i++ ) 1202 { 1203 int idx = tempBuf[i]; 1204 int d = dir[idx]; 1205 idx = newIdx[idx]; 1206 if (d) 1207 { 1208 *rdst = idx; 1209 rdst++; 1210 } 1211 else 1212 { 1213 *ldst = idx; 1214 ldst++; 1215 } 1216 } 1217 CV_Assert( n1 == n ); 1218 } 1219 } 1220 1221 // split cv_labels using newIdx relocation table 1222 int *src_lbls_buf = tempBuf + n; 1223 const int* src_lbls = data->get_cv_labels(node, src_lbls_buf); 1224 1225 for(int i = 0; i < n; i++) 1226 tempBuf[i] = src_lbls[i]; 1227 1228 if (data->is_buf_16u) 1229 { 1230 unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row + 1231 (size_t)(workVarCount-1)*scount + left->offset); 1232 unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row + 1233 (size_t)(workVarCount-1)*scount + right->offset); 1234 1235 for( int i = 0; i < n; i++ ) 1236 { 1237 int idx = tempBuf[i]; 1238 if (dir[i]) 1239 { 1240 *rdst = (unsigned short)idx; 1241 rdst++; 1242 } 1243 else 1244 { 1245 *ldst = (unsigned short)idx; 1246 ldst++; 1247 } 1248 } 1249 1250 } 1251 else 1252 { 1253 int *ldst = buf->data.i + left->buf_idx*length_buf_row + 1254 (size_t)(workVarCount-1)*scount + left->offset; 1255 int *rdst = buf->data.i + right->buf_idx*length_buf_row + 1256 (size_t)(workVarCount-1)*scount + right->offset; 1257 1258 for( int i = 0; i < n; i++ ) 1259 { 1260 int idx = tempBuf[i]; 1261 if (dir[i]) 1262 { 1263 *rdst = idx; 1264 rdst++; 1265 } 1266 else 1267 { 1268 *ldst = idx; 1269 ldst++; 1270 } 1271 } 1272 } 1273 1274 // split sample indices 1275 int *sampleIdx_src_buf = tempBuf + n; 1276 const int* sampleIdx_src = data->get_sample_indices(node, sampleIdx_src_buf); 1277 1278 for(int i = 0; i < n; i++) 1279 tempBuf[i] = sampleIdx_src[i]; 1280 1281 if (data->is_buf_16u) 1282 { 1283 unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row + 1284 (size_t)workVarCount*scount + left->offset); 1285 unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row + 1286 (size_t)workVarCount*scount + right->offset); 1287 for (int i = 0; i < n; i++) 1288 { 1289 unsigned short idx = (unsigned short)tempBuf[i]; 1290 if (dir[i]) 1291 { 1292 *rdst = idx; 1293 rdst++; 1294 } 1295 else 1296 { 1297 *ldst = idx; 1298 ldst++; 1299 } 1300 } 1301 } 1302 else 1303 { 1304 int* ldst = buf->data.i + left->buf_idx*length_buf_row + 1305 (size_t)workVarCount*scount + left->offset; 1306 int* rdst = buf->data.i + right->buf_idx*length_buf_row + 1307 (size_t)workVarCount*scount + right->offset; 1308 for (int i = 0; i < n; i++) 1309 { 1310 int idx = tempBuf[i]; 1311 if (dir[i]) 1312 { 1313 *rdst = idx; 1314 rdst++; 1315 } 1316 else 1317 { 1318 *ldst = idx; 1319 ldst++; 1320 } 1321 } 1322 } 1323 1324 for( int vi = 0; vi < data->var_count; vi++ ) 1325 { 1326 left->set_num_valid(vi, (int)(nl)); 1327 right->set_num_valid(vi, (int)(nr)); 1328 } 1329 1330 // deallocate the parent node data that is not needed anymore 1331 data->free_node_data(node); 1332 } 1333 1334 static void auxMarkFeaturesInMap( const CvDTreeNode* node, Mat& featureMap) 1335 { 1336 if ( node && node->split ) 1337 { 1338 featureMap.ptr<int>(0)[node->split->var_idx] = 1; 1339 auxMarkFeaturesInMap( node->left, featureMap ); 1340 auxMarkFeaturesInMap( node->right, featureMap ); 1341 } 1342 } 1343 1344 void CvCascadeBoostTree::markFeaturesInMap( Mat& featureMap ) 1345 { 1346 auxMarkFeaturesInMap( root, featureMap ); 1347 } 1348 1349 //----------------------------------- CascadeBoost -------------------------------------- 1350 1351 bool CvCascadeBoost::train( const CvFeatureEvaluator* _featureEvaluator, 1352 int _numSamples, 1353 int _precalcValBufSize, int _precalcIdxBufSize, 1354 const CvCascadeBoostParams& _params ) 1355 { 1356 bool isTrained = false; 1357 CV_Assert( !data ); 1358 clear(); 1359 data = new CvCascadeBoostTrainData( _featureEvaluator, _numSamples, 1360 _precalcValBufSize, _precalcIdxBufSize, _params ); 1361 CvMemStorage *storage = cvCreateMemStorage(); 1362 weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage ); 1363 storage = 0; 1364 1365 set_params( _params ); 1366 if ( (_params.boost_type == LOGIT) || (_params.boost_type == GENTLE) ) 1367 data->do_responses_copy(); 1368 1369 update_weights( 0 ); 1370 1371 cout << "+----+---------+---------+" << endl; 1372 cout << "| N | HR | FA |" << endl; 1373 cout << "+----+---------+---------+" << endl; 1374 1375 do 1376 { 1377 CvCascadeBoostTree* tree = new CvCascadeBoostTree; 1378 if( !tree->train( data, subsample_mask, this ) ) 1379 { 1380 delete tree; 1381 break; 1382 } 1383 cvSeqPush( weak, &tree ); 1384 update_weights( tree ); 1385 trim_weights(); 1386 if( cvCountNonZero(subsample_mask) == 0 ) 1387 break; 1388 } 1389 while( !isErrDesired() && (weak->total < params.weak_count) ); 1390 1391 if(weak->total > 0) 1392 { 1393 data->is_classifier = true; 1394 data->free_train_data(); 1395 isTrained = true; 1396 } 1397 else 1398 clear(); 1399 1400 return isTrained; 1401 } 1402 1403 float CvCascadeBoost::predict( int sampleIdx, bool returnSum ) const 1404 { 1405 CV_Assert( weak ); 1406 double sum = 0; 1407 CvSeqReader reader; 1408 cvStartReadSeq( weak, &reader ); 1409 cvSetSeqReaderPos( &reader, 0 ); 1410 for( int i = 0; i < weak->total; i++ ) 1411 { 1412 CvBoostTree* wtree; 1413 CV_READ_SEQ_ELEM( wtree, reader ); 1414 sum += ((CvCascadeBoostTree*)wtree)->predict(sampleIdx)->value; 1415 } 1416 if( !returnSum ) 1417 sum = sum < threshold - CV_THRESHOLD_EPS ? 0.0 : 1.0; 1418 return (float)sum; 1419 } 1420 1421 bool CvCascadeBoost::set_params( const CvBoostParams& _params ) 1422 { 1423 minHitRate = ((CvCascadeBoostParams&)_params).minHitRate; 1424 maxFalseAlarm = ((CvCascadeBoostParams&)_params).maxFalseAlarm; 1425 return ( ( minHitRate > 0 ) && ( minHitRate < 1) && 1426 ( maxFalseAlarm > 0 ) && ( maxFalseAlarm < 1) && 1427 CvBoost::set_params( _params )); 1428 } 1429 1430 void CvCascadeBoost::update_weights( CvBoostTree* tree ) 1431 { 1432 int n = data->sample_count; 1433 double sumW = 0.; 1434 int step = 0; 1435 float* fdata = 0; 1436 int *sampleIdxBuf; 1437 const int* sampleIdx = 0; 1438 int inn_buf_size = ((params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? n*sizeof(int) : 0) + 1439 ( !tree ? n*sizeof(int) : 0 ); 1440 cv::AutoBuffer<uchar> inn_buf(inn_buf_size); 1441 uchar* cur_inn_buf_pos = (uchar*)inn_buf; 1442 if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) ) 1443 { 1444 step = CV_IS_MAT_CONT(data->responses_copy->type) ? 1445 1 : data->responses_copy->step / CV_ELEM_SIZE(data->responses_copy->type); 1446 fdata = data->responses_copy->data.fl; 1447 sampleIdxBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(sampleIdxBuf + n); 1448 sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf ); 1449 } 1450 CvMat* buf = data->buf; 1451 size_t length_buf_row = data->get_length_subbuf(); 1452 if( !tree ) // before training the first tree, initialize weights and other parameters 1453 { 1454 int* classLabelsBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(classLabelsBuf + n); 1455 const int* classLabels = data->get_class_labels(data->data_root, classLabelsBuf); 1456 // in case of logitboost and gentle adaboost each weak tree is a regression tree, 1457 // so we need to convert class labels to floating-point values 1458 double w0 = 1./n; 1459 double p[2] = { 1, 1 }; 1460 1461 cvReleaseMat( &orig_response ); 1462 cvReleaseMat( &sum_response ); 1463 cvReleaseMat( &weak_eval ); 1464 cvReleaseMat( &subsample_mask ); 1465 cvReleaseMat( &weights ); 1466 1467 orig_response = cvCreateMat( 1, n, CV_32S ); 1468 weak_eval = cvCreateMat( 1, n, CV_64F ); 1469 subsample_mask = cvCreateMat( 1, n, CV_8U ); 1470 weights = cvCreateMat( 1, n, CV_64F ); 1471 subtree_weights = cvCreateMat( 1, n + 2, CV_64F ); 1472 1473 if (data->is_buf_16u) 1474 { 1475 unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*length_buf_row + 1476 data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count); 1477 for( int i = 0; i < n; i++ ) 1478 { 1479 // save original categorical responses {0,1}, convert them to {-1,1} 1480 orig_response->data.i[i] = classLabels[i]*2 - 1; 1481 // make all the samples active at start. 1482 // later, in trim_weights() deactivate/reactive again some, if need 1483 subsample_mask->data.ptr[i] = (uchar)1; 1484 // make all the initial weights the same. 1485 weights->data.db[i] = w0*p[classLabels[i]]; 1486 // set the labels to find (from within weak tree learning proc) 1487 // the particular sample weight, and where to store the response. 1488 labels[i] = (unsigned short)i; 1489 } 1490 } 1491 else 1492 { 1493 int* labels = buf->data.i + data->data_root->buf_idx*length_buf_row + 1494 data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count; 1495 1496 for( int i = 0; i < n; i++ ) 1497 { 1498 // save original categorical responses {0,1}, convert them to {-1,1} 1499 orig_response->data.i[i] = classLabels[i]*2 - 1; 1500 subsample_mask->data.ptr[i] = (uchar)1; 1501 weights->data.db[i] = w0*p[classLabels[i]]; 1502 labels[i] = i; 1503 } 1504 } 1505 1506 if( params.boost_type == LOGIT ) 1507 { 1508 sum_response = cvCreateMat( 1, n, CV_64F ); 1509 1510 for( int i = 0; i < n; i++ ) 1511 { 1512 sum_response->data.db[i] = 0; 1513 fdata[sampleIdx[i]*step] = orig_response->data.i[i] > 0 ? 2.f : -2.f; 1514 } 1515 1516 // in case of logitboost each weak tree is a regression tree. 1517 // the target function values are recalculated for each of the trees 1518 data->is_classifier = false; 1519 } 1520 else if( params.boost_type == GENTLE ) 1521 { 1522 for( int i = 0; i < n; i++ ) 1523 fdata[sampleIdx[i]*step] = (float)orig_response->data.i[i]; 1524 1525 data->is_classifier = false; 1526 } 1527 } 1528 else 1529 { 1530 // at this moment, for all the samples that participated in the training of the most 1531 // recent weak classifier we know the responses. For other samples we need to compute them 1532 if( have_subsample ) 1533 { 1534 // invert the subsample mask 1535 cvXorS( subsample_mask, cvScalar(1.), subsample_mask ); 1536 1537 // run tree through all the non-processed samples 1538 for( int i = 0; i < n; i++ ) 1539 if( subsample_mask->data.ptr[i] ) 1540 { 1541 weak_eval->data.db[i] = ((CvCascadeBoostTree*)tree)->predict( i )->value; 1542 } 1543 } 1544 1545 // now update weights and other parameters for each type of boosting 1546 if( params.boost_type == DISCRETE ) 1547 { 1548 // Discrete AdaBoost: 1549 // weak_eval[i] (=f(x_i)) is in {-1,1} 1550 // err = sum(w_i*(f(x_i) != y_i))/sum(w_i) 1551 // C = log((1-err)/err) 1552 // w_i *= exp(C*(f(x_i) != y_i)) 1553 1554 double C, err = 0.; 1555 double scale[] = { 1., 0. }; 1556 1557 for( int i = 0; i < n; i++ ) 1558 { 1559 double w = weights->data.db[i]; 1560 sumW += w; 1561 err += w*(weak_eval->data.db[i] != orig_response->data.i[i]); 1562 } 1563 1564 if( sumW != 0 ) 1565 err /= sumW; 1566 C = err = -logRatio( err ); 1567 scale[1] = exp(err); 1568 1569 sumW = 0; 1570 for( int i = 0; i < n; i++ ) 1571 { 1572 double w = weights->data.db[i]* 1573 scale[weak_eval->data.db[i] != orig_response->data.i[i]]; 1574 sumW += w; 1575 weights->data.db[i] = w; 1576 } 1577 1578 tree->scale( C ); 1579 } 1580 else if( params.boost_type == REAL ) 1581 { 1582 // Real AdaBoost: 1583 // weak_eval[i] = f(x_i) = 0.5*log(p(x_i)/(1-p(x_i))), p(x_i)=P(y=1|x_i) 1584 // w_i *= exp(-y_i*f(x_i)) 1585 1586 for( int i = 0; i < n; i++ ) 1587 weak_eval->data.db[i] *= -orig_response->data.i[i]; 1588 1589 cvExp( weak_eval, weak_eval ); 1590 1591 for( int i = 0; i < n; i++ ) 1592 { 1593 double w = weights->data.db[i]*weak_eval->data.db[i]; 1594 sumW += w; 1595 weights->data.db[i] = w; 1596 } 1597 } 1598 else if( params.boost_type == LOGIT ) 1599 { 1600 // LogitBoost: 1601 // weak_eval[i] = f(x_i) in [-z_max,z_max] 1602 // sum_response = F(x_i). 1603 // F(x_i) += 0.5*f(x_i) 1604 // p(x_i) = exp(F(x_i))/(exp(F(x_i)) + exp(-F(x_i))=1/(1+exp(-2*F(x_i))) 1605 // reuse weak_eval: weak_eval[i] <- p(x_i) 1606 // w_i = p(x_i)*1(1 - p(x_i)) 1607 // z_i = ((y_i+1)/2 - p(x_i))/(p(x_i)*(1 - p(x_i))) 1608 // store z_i to the data->data_root as the new target responses 1609 1610 const double lbWeightThresh = FLT_EPSILON; 1611 const double lbZMax = 10.; 1612 1613 for( int i = 0; i < n; i++ ) 1614 { 1615 double s = sum_response->data.db[i] + 0.5*weak_eval->data.db[i]; 1616 sum_response->data.db[i] = s; 1617 weak_eval->data.db[i] = -2*s; 1618 } 1619 1620 cvExp( weak_eval, weak_eval ); 1621 1622 for( int i = 0; i < n; i++ ) 1623 { 1624 double p = 1./(1. + weak_eval->data.db[i]); 1625 double w = p*(1 - p), z; 1626 w = MAX( w, lbWeightThresh ); 1627 weights->data.db[i] = w; 1628 sumW += w; 1629 if( orig_response->data.i[i] > 0 ) 1630 { 1631 z = 1./p; 1632 fdata[sampleIdx[i]*step] = (float)min(z, lbZMax); 1633 } 1634 else 1635 { 1636 z = 1./(1-p); 1637 fdata[sampleIdx[i]*step] = (float)-min(z, lbZMax); 1638 } 1639 } 1640 } 1641 else 1642 { 1643 // Gentle AdaBoost: 1644 // weak_eval[i] = f(x_i) in [-1,1] 1645 // w_i *= exp(-y_i*f(x_i)) 1646 assert( params.boost_type == GENTLE ); 1647 1648 for( int i = 0; i < n; i++ ) 1649 weak_eval->data.db[i] *= -orig_response->data.i[i]; 1650 1651 cvExp( weak_eval, weak_eval ); 1652 1653 for( int i = 0; i < n; i++ ) 1654 { 1655 double w = weights->data.db[i] * weak_eval->data.db[i]; 1656 weights->data.db[i] = w; 1657 sumW += w; 1658 } 1659 } 1660 } 1661 1662 // renormalize weights 1663 if( sumW > FLT_EPSILON ) 1664 { 1665 sumW = 1./sumW; 1666 for( int i = 0; i < n; ++i ) 1667 weights->data.db[i] *= sumW; 1668 } 1669 } 1670 1671 bool CvCascadeBoost::isErrDesired() 1672 { 1673 int sCount = data->sample_count, 1674 numPos = 0, numNeg = 0, numFalse = 0, numPosTrue = 0; 1675 vector<float> eval(sCount); 1676 1677 for( int i = 0; i < sCount; i++ ) 1678 if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 1.0F ) 1679 eval[numPos++] = predict( i, true ); 1680 1681 std::sort(&eval[0], &eval[0] + numPos); 1682 1683 int thresholdIdx = (int)((1.0F - minHitRate) * numPos); 1684 1685 threshold = eval[ thresholdIdx ]; 1686 numPosTrue = numPos - thresholdIdx; 1687 for( int i = thresholdIdx - 1; i >= 0; i--) 1688 if ( abs( eval[i] - threshold) < FLT_EPSILON ) 1689 numPosTrue++; 1690 float hitRate = ((float) numPosTrue) / ((float) numPos); 1691 1692 for( int i = 0; i < sCount; i++ ) 1693 { 1694 if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 0.0F ) 1695 { 1696 numNeg++; 1697 if( predict( i ) ) 1698 numFalse++; 1699 } 1700 } 1701 float falseAlarm = ((float) numFalse) / ((float) numNeg); 1702 1703 cout << "|"; cout.width(4); cout << right << weak->total; 1704 cout << "|"; cout.width(9); cout << right << hitRate; 1705 cout << "|"; cout.width(9); cout << right << falseAlarm; 1706 cout << "|" << endl; 1707 cout << "+----+---------+---------+" << endl; 1708 1709 return falseAlarm <= maxFalseAlarm; 1710 } 1711 1712 void CvCascadeBoost::write( FileStorage &fs, const Mat& featureMap ) const 1713 { 1714 // char cmnt[30]; 1715 CvCascadeBoostTree* weakTree; 1716 fs << CC_WEAK_COUNT << weak->total; 1717 fs << CC_STAGE_THRESHOLD << threshold; 1718 fs << CC_WEAK_CLASSIFIERS << "["; 1719 for( int wi = 0; wi < weak->total; wi++) 1720 { 1721 /*sprintf( cmnt, "tree %i", wi ); 1722 cvWriteComment( fs, cmnt, 0 );*/ 1723 weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi )); 1724 weakTree->write( fs, featureMap ); 1725 } 1726 fs << "]"; 1727 } 1728 1729 bool CvCascadeBoost::read( const FileNode &node, 1730 const CvFeatureEvaluator* _featureEvaluator, 1731 const CvCascadeBoostParams& _params ) 1732 { 1733 CvMemStorage* storage; 1734 clear(); 1735 data = new CvCascadeBoostTrainData( _featureEvaluator, _params ); 1736 set_params( _params ); 1737 1738 node[CC_STAGE_THRESHOLD] >> threshold; 1739 FileNode rnode = node[CC_WEAK_CLASSIFIERS]; 1740 1741 storage = cvCreateMemStorage(); 1742 weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage ); 1743 for( FileNodeIterator it = rnode.begin(); it != rnode.end(); it++ ) 1744 { 1745 CvCascadeBoostTree* tree = new CvCascadeBoostTree(); 1746 tree->read( *it, this, data ); 1747 cvSeqPush( weak, &tree ); 1748 } 1749 return true; 1750 } 1751 1752 void CvCascadeBoost::markUsedFeaturesInMap( Mat& featureMap ) 1753 { 1754 for( int wi = 0; wi < weak->total; wi++ ) 1755 { 1756 CvCascadeBoostTree* weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi )); 1757 weakTree->markFeaturesInMap( featureMap ); 1758 } 1759 } 1760