1 /* 2 3 *************************************************************************************************** 4 ** 5 ** profile.cpp 6 ** 7 ** Real-Time Hierarchical Profiling for Game Programming Gems 3 8 ** 9 ** by Greg Hjelstrom & Byon Garrabrant 10 ** 11 ***************************************************************************************************/ 12 13 // Credits: The Clock class was inspired by the Timer classes in 14 // Ogre (www.ogre3d.org). 15 16 #include "btQuickprof.h" 17 18 #ifndef BT_NO_PROFILE 19 20 21 static btClock gProfileClock; 22 23 24 #ifdef __CELLOS_LV2__ 25 #include <sys/sys_time.h> 26 #include <sys/time_util.h> 27 #include <stdio.h> 28 #endif 29 30 #if defined (SUNOS) || defined (__SUNOS__) 31 #include <stdio.h> 32 #endif 33 34 #if defined(WIN32) || defined(_WIN32) 35 36 #define BT_USE_WINDOWS_TIMERS 37 #define WIN32_LEAN_AND_MEAN 38 #define NOWINRES 39 #define NOMCX 40 #define NOIME 41 42 #ifdef _XBOX 43 #include <Xtl.h> 44 #else //_XBOX 45 #include <windows.h> 46 47 #if WINVER <0x0602 48 #define GetTickCount64 GetTickCount 49 #endif 50 51 #endif //_XBOX 52 53 #include <time.h> 54 55 56 #else //_WIN32 57 #include <sys/time.h> 58 #endif //_WIN32 59 60 #define mymin(a,b) (a > b ? a : b) 61 62 struct btClockData 63 { 64 65 #ifdef BT_USE_WINDOWS_TIMERS 66 LARGE_INTEGER mClockFrequency; 67 LONGLONG mStartTick; 68 LONGLONG mPrevElapsedTime; 69 LARGE_INTEGER mStartTime; 70 #else 71 #ifdef __CELLOS_LV2__ 72 uint64_t mStartTime; 73 #else 74 struct timeval mStartTime; 75 #endif 76 #endif //__CELLOS_LV2__ 77 78 }; 79 80 ///The btClock is a portable basic clock that measures accurate time in seconds, use for profiling. 81 btClock::btClock() 82 { 83 m_data = new btClockData; 84 #ifdef BT_USE_WINDOWS_TIMERS 85 QueryPerformanceFrequency(&m_data->mClockFrequency); 86 #endif 87 reset(); 88 } 89 90 btClock::~btClock() 91 { 92 delete m_data; 93 } 94 95 btClock::btClock(const btClock& other) 96 { 97 m_data = new btClockData; 98 *m_data = *other.m_data; 99 } 100 101 btClock& btClock::operator=(const btClock& other) 102 { 103 *m_data = *other.m_data; 104 return *this; 105 } 106 107 108 /// Resets the initial reference time. 109 void btClock::reset() 110 { 111 #ifdef BT_USE_WINDOWS_TIMERS 112 QueryPerformanceCounter(&m_data->mStartTime); 113 m_data->mStartTick = GetTickCount64(); 114 m_data->mPrevElapsedTime = 0; 115 #else 116 #ifdef __CELLOS_LV2__ 117 118 typedef uint64_t ClockSize; 119 ClockSize newTime; 120 //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory"); 121 SYS_TIMEBASE_GET( newTime ); 122 m_data->mStartTime = newTime; 123 #else 124 gettimeofday(&m_data->mStartTime, 0); 125 #endif 126 #endif 127 } 128 129 /// Returns the time in ms since the last call to reset or since 130 /// the btClock was created. 131 unsigned long int btClock::getTimeMilliseconds() 132 { 133 #ifdef BT_USE_WINDOWS_TIMERS 134 LARGE_INTEGER currentTime; 135 QueryPerformanceCounter(¤tTime); 136 LONGLONG elapsedTime = currentTime.QuadPart - 137 m_data->mStartTime.QuadPart; 138 // Compute the number of millisecond ticks elapsed. 139 unsigned long msecTicks = (unsigned long)(1000 * elapsedTime / 140 m_data->mClockFrequency.QuadPart); 141 // Check for unexpected leaps in the Win32 performance counter. 142 // (This is caused by unexpected data across the PCI to ISA 143 // bridge, aka south bridge. See Microsoft KB274323.) 144 unsigned long elapsedTicks = (unsigned long)(GetTickCount64() - m_data->mStartTick); 145 signed long msecOff = (signed long)(msecTicks - elapsedTicks); 146 if (msecOff < -100 || msecOff > 100) 147 { 148 // Adjust the starting time forwards. 149 LONGLONG msecAdjustment = mymin(msecOff * 150 m_data->mClockFrequency.QuadPart / 1000, elapsedTime - 151 m_data->mPrevElapsedTime); 152 m_data->mStartTime.QuadPart += msecAdjustment; 153 elapsedTime -= msecAdjustment; 154 155 // Recompute the number of millisecond ticks elapsed. 156 msecTicks = (unsigned long)(1000 * elapsedTime / 157 m_data->mClockFrequency.QuadPart); 158 } 159 160 // Store the current elapsed time for adjustments next time. 161 m_data->mPrevElapsedTime = elapsedTime; 162 163 return msecTicks; 164 #else 165 166 #ifdef __CELLOS_LV2__ 167 uint64_t freq=sys_time_get_timebase_frequency(); 168 double dFreq=((double) freq) / 1000.0; 169 typedef uint64_t ClockSize; 170 ClockSize newTime; 171 SYS_TIMEBASE_GET( newTime ); 172 //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory"); 173 174 return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq); 175 #else 176 177 struct timeval currentTime; 178 gettimeofday(¤tTime, 0); 179 return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000 + 180 (currentTime.tv_usec - m_data->mStartTime.tv_usec) / 1000; 181 #endif //__CELLOS_LV2__ 182 #endif 183 } 184 185 /// Returns the time in us since the last call to reset or since 186 /// the Clock was created. 187 unsigned long int btClock::getTimeMicroseconds() 188 { 189 #ifdef BT_USE_WINDOWS_TIMERS 190 LARGE_INTEGER currentTime; 191 QueryPerformanceCounter(¤tTime); 192 LONGLONG elapsedTime = currentTime.QuadPart - 193 m_data->mStartTime.QuadPart; 194 195 // Compute the number of millisecond ticks elapsed. 196 unsigned long msecTicks = (unsigned long)(1000 * elapsedTime / 197 m_data->mClockFrequency.QuadPart); 198 199 // Check for unexpected leaps in the Win32 performance counter. 200 // (This is caused by unexpected data across the PCI to ISA 201 // bridge, aka south bridge. See Microsoft KB274323.) 202 unsigned long elapsedTicks = (unsigned long)(GetTickCount64() - m_data->mStartTick); 203 signed long msecOff = (signed long)(msecTicks - elapsedTicks); 204 if (msecOff < -100 || msecOff > 100) 205 { 206 // Adjust the starting time forwards. 207 LONGLONG msecAdjustment = mymin(msecOff * 208 m_data->mClockFrequency.QuadPart / 1000, elapsedTime - 209 m_data->mPrevElapsedTime); 210 m_data->mStartTime.QuadPart += msecAdjustment; 211 elapsedTime -= msecAdjustment; 212 } 213 214 // Store the current elapsed time for adjustments next time. 215 m_data->mPrevElapsedTime = elapsedTime; 216 217 // Convert to microseconds. 218 unsigned long usecTicks = (unsigned long)(1000000 * elapsedTime / 219 m_data->mClockFrequency.QuadPart); 220 221 return usecTicks; 222 #else 223 224 #ifdef __CELLOS_LV2__ 225 uint64_t freq=sys_time_get_timebase_frequency(); 226 double dFreq=((double) freq)/ 1000000.0; 227 typedef uint64_t ClockSize; 228 ClockSize newTime; 229 //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory"); 230 SYS_TIMEBASE_GET( newTime ); 231 232 return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq); 233 #else 234 235 struct timeval currentTime; 236 gettimeofday(¤tTime, 0); 237 return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000000 + 238 (currentTime.tv_usec - m_data->mStartTime.tv_usec); 239 #endif//__CELLOS_LV2__ 240 #endif 241 } 242 243 244 245 /// Returns the time in s since the last call to reset or since 246 /// the Clock was created. 247 btScalar btClock::getTimeSeconds() 248 { 249 static const btScalar microseconds_to_seconds = btScalar(0.000001); 250 return btScalar(getTimeMicroseconds()) * microseconds_to_seconds; 251 } 252 253 254 255 inline void Profile_Get_Ticks(unsigned long int * ticks) 256 { 257 *ticks = gProfileClock.getTimeMicroseconds(); 258 } 259 260 inline float Profile_Get_Tick_Rate(void) 261 { 262 // return 1000000.f; 263 return 1000.f; 264 265 } 266 267 268 269 /*************************************************************************************************** 270 ** 271 ** CProfileNode 272 ** 273 ***************************************************************************************************/ 274 275 /*********************************************************************************************** 276 * INPUT: * 277 * name - pointer to a static string which is the name of this profile node * 278 * parent - parent pointer * 279 * * 280 * WARNINGS: * 281 * The name is assumed to be a static pointer, only the pointer is stored and compared for * 282 * efficiency reasons. * 283 *=============================================================================================*/ 284 CProfileNode::CProfileNode( const char * name, CProfileNode * parent ) : 285 Name( name ), 286 TotalCalls( 0 ), 287 TotalTime( 0 ), 288 StartTime( 0 ), 289 RecursionCounter( 0 ), 290 Parent( parent ), 291 Child( NULL ), 292 Sibling( NULL ), 293 m_userPtr(0) 294 { 295 Reset(); 296 } 297 298 299 void CProfileNode::CleanupMemory() 300 { 301 delete ( Child); 302 Child = NULL; 303 delete ( Sibling); 304 Sibling = NULL; 305 } 306 307 CProfileNode::~CProfileNode( void ) 308 { 309 CleanupMemory(); 310 } 311 312 313 /*********************************************************************************************** 314 * INPUT: * 315 * name - static string pointer to the name of the node we are searching for * 316 * * 317 * WARNINGS: * 318 * All profile names are assumed to be static strings so this function uses pointer compares * 319 * to find the named node. * 320 *=============================================================================================*/ 321 CProfileNode * CProfileNode::Get_Sub_Node( const char * name ) 322 { 323 // Try to find this sub node 324 CProfileNode * child = Child; 325 while ( child ) { 326 if ( child->Name == name ) { 327 return child; 328 } 329 child = child->Sibling; 330 } 331 332 // We didn't find it, so add it 333 334 CProfileNode * node = new CProfileNode( name, this ); 335 node->Sibling = Child; 336 Child = node; 337 return node; 338 } 339 340 341 void CProfileNode::Reset( void ) 342 { 343 TotalCalls = 0; 344 TotalTime = 0.0f; 345 346 347 if ( Child ) { 348 Child->Reset(); 349 } 350 if ( Sibling ) { 351 Sibling->Reset(); 352 } 353 } 354 355 356 void CProfileNode::Call( void ) 357 { 358 TotalCalls++; 359 if (RecursionCounter++ == 0) { 360 Profile_Get_Ticks(&StartTime); 361 } 362 } 363 364 365 bool CProfileNode::Return( void ) 366 { 367 if ( --RecursionCounter == 0 && TotalCalls != 0 ) { 368 unsigned long int time; 369 Profile_Get_Ticks(&time); 370 time-=StartTime; 371 TotalTime += (float)time / Profile_Get_Tick_Rate(); 372 } 373 return ( RecursionCounter == 0 ); 374 } 375 376 377 /*************************************************************************************************** 378 ** 379 ** CProfileIterator 380 ** 381 ***************************************************************************************************/ 382 CProfileIterator::CProfileIterator( CProfileNode * start ) 383 { 384 CurrentParent = start; 385 CurrentChild = CurrentParent->Get_Child(); 386 } 387 388 389 void CProfileIterator::First(void) 390 { 391 CurrentChild = CurrentParent->Get_Child(); 392 } 393 394 395 void CProfileIterator::Next(void) 396 { 397 CurrentChild = CurrentChild->Get_Sibling(); 398 } 399 400 401 bool CProfileIterator::Is_Done(void) 402 { 403 return CurrentChild == NULL; 404 } 405 406 407 void CProfileIterator::Enter_Child( int index ) 408 { 409 CurrentChild = CurrentParent->Get_Child(); 410 while ( (CurrentChild != NULL) && (index != 0) ) { 411 index--; 412 CurrentChild = CurrentChild->Get_Sibling(); 413 } 414 415 if ( CurrentChild != NULL ) { 416 CurrentParent = CurrentChild; 417 CurrentChild = CurrentParent->Get_Child(); 418 } 419 } 420 421 422 void CProfileIterator::Enter_Parent( void ) 423 { 424 if ( CurrentParent->Get_Parent() != NULL ) { 425 CurrentParent = CurrentParent->Get_Parent(); 426 } 427 CurrentChild = CurrentParent->Get_Child(); 428 } 429 430 431 /*************************************************************************************************** 432 ** 433 ** CProfileManager 434 ** 435 ***************************************************************************************************/ 436 437 CProfileNode CProfileManager::Root( "Root", NULL ); 438 CProfileNode * CProfileManager::CurrentNode = &CProfileManager::Root; 439 int CProfileManager::FrameCounter = 0; 440 unsigned long int CProfileManager::ResetTime = 0; 441 442 443 /*********************************************************************************************** 444 * CProfileManager::Start_Profile -- Begin a named profile * 445 * * 446 * Steps one level deeper into the tree, if a child already exists with the specified name * 447 * then it accumulates the profiling; otherwise a new child node is added to the profile tree. * 448 * * 449 * INPUT: * 450 * name - name of this profiling record * 451 * * 452 * WARNINGS: * 453 * The string used is assumed to be a static string; pointer compares are used throughout * 454 * the profiling code for efficiency. * 455 *=============================================================================================*/ 456 void CProfileManager::Start_Profile( const char * name ) 457 { 458 if (name != CurrentNode->Get_Name()) { 459 CurrentNode = CurrentNode->Get_Sub_Node( name ); 460 } 461 462 CurrentNode->Call(); 463 } 464 465 466 /*********************************************************************************************** 467 * CProfileManager::Stop_Profile -- Stop timing and record the results. * 468 *=============================================================================================*/ 469 void CProfileManager::Stop_Profile( void ) 470 { 471 // Return will indicate whether we should back up to our parent (we may 472 // be profiling a recursive function) 473 if (CurrentNode->Return()) { 474 CurrentNode = CurrentNode->Get_Parent(); 475 } 476 } 477 478 479 /*********************************************************************************************** 480 * CProfileManager::Reset -- Reset the contents of the profiling system * 481 * * 482 * This resets everything except for the tree structure. All of the timing data is reset. * 483 *=============================================================================================*/ 484 void CProfileManager::Reset( void ) 485 { 486 gProfileClock.reset(); 487 Root.Reset(); 488 Root.Call(); 489 FrameCounter = 0; 490 Profile_Get_Ticks(&ResetTime); 491 } 492 493 494 /*********************************************************************************************** 495 * CProfileManager::Increment_Frame_Counter -- Increment the frame counter * 496 *=============================================================================================*/ 497 void CProfileManager::Increment_Frame_Counter( void ) 498 { 499 FrameCounter++; 500 } 501 502 503 /*********************************************************************************************** 504 * CProfileManager::Get_Time_Since_Reset -- returns the elapsed time since last reset * 505 *=============================================================================================*/ 506 float CProfileManager::Get_Time_Since_Reset( void ) 507 { 508 unsigned long int time; 509 Profile_Get_Ticks(&time); 510 time -= ResetTime; 511 return (float)time / Profile_Get_Tick_Rate(); 512 } 513 514 #include <stdio.h> 515 516 void CProfileManager::dumpRecursive(CProfileIterator* profileIterator, int spacing) 517 { 518 profileIterator->First(); 519 if (profileIterator->Is_Done()) 520 return; 521 522 float accumulated_time=0,parent_time = profileIterator->Is_Root() ? CProfileManager::Get_Time_Since_Reset() : profileIterator->Get_Current_Parent_Total_Time(); 523 int i; 524 int frames_since_reset = CProfileManager::Get_Frame_Count_Since_Reset(); 525 for (i=0;i<spacing;i++) printf("."); 526 printf("----------------------------------\n"); 527 for (i=0;i<spacing;i++) printf("."); 528 printf("Profiling: %s (total running time: %.3f ms) ---\n", profileIterator->Get_Current_Parent_Name(), parent_time ); 529 float totalTime = 0.f; 530 531 532 int numChildren = 0; 533 534 for (i = 0; !profileIterator->Is_Done(); i++,profileIterator->Next()) 535 { 536 numChildren++; 537 float current_total_time = profileIterator->Get_Current_Total_Time(); 538 accumulated_time += current_total_time; 539 float fraction = parent_time > SIMD_EPSILON ? (current_total_time / parent_time) * 100 : 0.f; 540 { 541 int i; for (i=0;i<spacing;i++) printf("."); 542 } 543 printf("%d -- %s (%.2f %%) :: %.3f ms / frame (%d calls)\n",i, profileIterator->Get_Current_Name(), fraction,(current_total_time / (double)frames_since_reset),profileIterator->Get_Current_Total_Calls()); 544 totalTime += current_total_time; 545 //recurse into children 546 } 547 548 if (parent_time < accumulated_time) 549 { 550 //printf("what's wrong\n"); 551 } 552 for (i=0;i<spacing;i++) printf("."); 553 printf("%s (%.3f %%) :: %.3f ms\n", "Unaccounted:",parent_time > SIMD_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time); 554 555 for (i=0;i<numChildren;i++) 556 { 557 profileIterator->Enter_Child(i); 558 dumpRecursive(profileIterator,spacing+3); 559 profileIterator->Enter_Parent(); 560 } 561 } 562 563 564 565 void CProfileManager::dumpAll() 566 { 567 CProfileIterator* profileIterator = 0; 568 profileIterator = CProfileManager::Get_Iterator(); 569 570 dumpRecursive(profileIterator,0); 571 572 CProfileManager::Release_Iterator(profileIterator); 573 } 574 575 576 577 578 #endif //BT_NO_PROFILE 579