1 2 /*--------------------------------------------------------------------*/ 3 /*--- Callgrind ---*/ 4 /*--- main.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Callgrind, a Valgrind tool for call graph 9 profiling programs. 10 11 Copyright (C) 2002-2011, Josef Weidendorfer (Josef.Weidendorfer (at) gmx.de) 12 13 This tool is derived from and contains code from Cachegrind 14 Copyright (C) 2002-2011 Nicholas Nethercote (njn (at) valgrind.org) 15 16 This program is free software; you can redistribute it and/or 17 modify it under the terms of the GNU General Public License as 18 published by the Free Software Foundation; either version 2 of the 19 License, or (at your option) any later version. 20 21 This program is distributed in the hope that it will be useful, but 22 WITHOUT ANY WARRANTY; without even the implied warranty of 23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 24 General Public License for more details. 25 26 You should have received a copy of the GNU General Public License 27 along with this program; if not, write to the Free Software 28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 29 02111-1307, USA. 30 31 The GNU General Public License is contained in the file COPYING. 32 */ 33 34 #include "config.h" 35 #include "callgrind.h" 36 #include "global.h" 37 38 #include "pub_tool_threadstate.h" 39 #include "pub_tool_gdbserver.h" 40 41 #include "cg_branchpred.c" 42 43 /*------------------------------------------------------------*/ 44 /*--- Global variables ---*/ 45 /*------------------------------------------------------------*/ 46 47 /* for all threads */ 48 CommandLineOptions CLG_(clo); 49 Statistics CLG_(stat); 50 Bool CLG_(instrument_state) = True; /* Instrumentation on ? */ 51 52 /* thread and signal handler specific */ 53 exec_state CLG_(current_state); 54 55 56 /*------------------------------------------------------------*/ 57 /*--- Statistics ---*/ 58 /*------------------------------------------------------------*/ 59 60 static void CLG_(init_statistics)(Statistics* s) 61 { 62 s->call_counter = 0; 63 s->jcnd_counter = 0; 64 s->jump_counter = 0; 65 s->rec_call_counter = 0; 66 s->ret_counter = 0; 67 s->bb_executions = 0; 68 69 s->context_counter = 0; 70 s->bb_retranslations = 0; 71 72 s->distinct_objs = 0; 73 s->distinct_files = 0; 74 s->distinct_fns = 0; 75 s->distinct_contexts = 0; 76 s->distinct_bbs = 0; 77 s->distinct_bbccs = 0; 78 s->distinct_instrs = 0; 79 s->distinct_skips = 0; 80 81 s->bb_hash_resizes = 0; 82 s->bbcc_hash_resizes = 0; 83 s->jcc_hash_resizes = 0; 84 s->cxt_hash_resizes = 0; 85 s->fn_array_resizes = 0; 86 s->call_stack_resizes = 0; 87 s->fn_stack_resizes = 0; 88 89 s->full_debug_BBs = 0; 90 s->file_line_debug_BBs = 0; 91 s->fn_name_debug_BBs = 0; 92 s->no_debug_BBs = 0; 93 s->bbcc_lru_misses = 0; 94 s->jcc_lru_misses = 0; 95 s->cxt_lru_misses = 0; 96 s->bbcc_clones = 0; 97 } 98 99 100 /*------------------------------------------------------------*/ 101 /*--- Simple callbacks (not cache similator) ---*/ 102 /*------------------------------------------------------------*/ 103 104 VG_REGPARM(1) 105 static void log_global_event(InstrInfo* ii) 106 { 107 ULong* cost_Bus; 108 109 CLG_DEBUG(6, "log_global_event: Ir %#lx/%u\n", 110 CLG_(bb_base) + ii->instr_offset, ii->instr_size); 111 112 if (!CLG_(current_state).collect) return; 113 114 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BUS))>0 ); 115 116 CLG_(current_state).cost[ fullOffset(EG_BUS) ]++; 117 118 if (CLG_(current_state).nonskipped) 119 cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS); 120 else 121 cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS]; 122 cost_Bus[0]++; 123 } 124 125 126 /* For branches, we consult two different predictors, one which 127 predicts taken/untaken for conditional branches, and the other 128 which predicts the branch target address for indirect branches 129 (jump-to-register style ones). */ 130 131 static VG_REGPARM(2) 132 void log_cond_branch(InstrInfo* ii, Word taken) 133 { 134 Bool miss; 135 Int fullOffset_Bc; 136 ULong* cost_Bc; 137 138 CLG_DEBUG(6, "log_cond_branch: Ir %#lx, taken %lu\n", 139 CLG_(bb_base) + ii->instr_offset, taken); 140 141 miss = 1 & do_cond_branch_predict(CLG_(bb_base) + ii->instr_offset, taken); 142 143 if (!CLG_(current_state).collect) return; 144 145 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BC))>0 ); 146 147 if (CLG_(current_state).nonskipped) 148 cost_Bc = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BC); 149 else 150 cost_Bc = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BC]; 151 152 fullOffset_Bc = fullOffset(EG_BC); 153 CLG_(current_state).cost[ fullOffset_Bc ]++; 154 cost_Bc[0]++; 155 if (miss) { 156 CLG_(current_state).cost[ fullOffset_Bc+1 ]++; 157 cost_Bc[1]++; 158 } 159 } 160 161 static VG_REGPARM(2) 162 void log_ind_branch(InstrInfo* ii, UWord actual_dst) 163 { 164 Bool miss; 165 Int fullOffset_Bi; 166 ULong* cost_Bi; 167 168 CLG_DEBUG(6, "log_ind_branch: Ir %#lx, dst %#lx\n", 169 CLG_(bb_base) + ii->instr_offset, actual_dst); 170 171 miss = 1 & do_ind_branch_predict(CLG_(bb_base) + ii->instr_offset, actual_dst); 172 173 if (!CLG_(current_state).collect) return; 174 175 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BI))>0 ); 176 177 if (CLG_(current_state).nonskipped) 178 cost_Bi = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BI); 179 else 180 cost_Bi = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BI]; 181 182 fullOffset_Bi = fullOffset(EG_BI); 183 CLG_(current_state).cost[ fullOffset_Bi ]++; 184 cost_Bi[0]++; 185 if (miss) { 186 CLG_(current_state).cost[ fullOffset_Bi+1 ]++; 187 cost_Bi[1]++; 188 } 189 } 190 191 /*------------------------------------------------------------*/ 192 /*--- Instrumentation structures and event queue handling ---*/ 193 /*------------------------------------------------------------*/ 194 195 /* Maintain an ordered list of memory events which are outstanding, in 196 the sense that no IR has yet been generated to do the relevant 197 helper calls. The BB is scanned top to bottom and memory events 198 are added to the end of the list, merging with the most recent 199 notified event where possible (Dw immediately following Dr and 200 having the same size and EA can be merged). 201 202 This merging is done so that for architectures which have 203 load-op-store instructions (x86, amd64), the insn is treated as if 204 it makes just one memory reference (a modify), rather than two (a 205 read followed by a write at the same address). 206 207 At various points the list will need to be flushed, that is, IR 208 generated from it. That must happen before any possible exit from 209 the block (the end, or an IRStmt_Exit). Flushing also takes place 210 when there is no space to add a new event. 211 212 If we require the simulation statistics to be up to date with 213 respect to possible memory exceptions, then the list would have to 214 be flushed before each memory reference. That would however lose 215 performance by inhibiting event-merging during flushing. 216 217 Flushing the list consists of walking it start to end and emitting 218 instrumentation IR for each event, in the order in which they 219 appear. It may be possible to emit a single call for two adjacent 220 events in order to reduce the number of helper function calls made. 221 For example, it could well be profitable to handle two adjacent Ir 222 events with a single helper call. */ 223 224 typedef 225 IRExpr 226 IRAtom; 227 228 typedef 229 enum { 230 Ev_Ir, // Instruction read 231 Ev_Dr, // Data read 232 Ev_Dw, // Data write 233 Ev_Dm, // Data modify (read then write) 234 Ev_Bc, // branch conditional 235 Ev_Bi, // branch indirect (to unknown destination) 236 Ev_G // Global bus event 237 } 238 EventTag; 239 240 typedef 241 struct { 242 EventTag tag; 243 InstrInfo* inode; 244 union { 245 struct { 246 } Ir; 247 struct { 248 IRAtom* ea; 249 Int szB; 250 } Dr; 251 struct { 252 IRAtom* ea; 253 Int szB; 254 } Dw; 255 struct { 256 IRAtom* ea; 257 Int szB; 258 } Dm; 259 struct { 260 IRAtom* taken; /* :: Ity_I1 */ 261 } Bc; 262 struct { 263 IRAtom* dst; 264 } Bi; 265 struct { 266 } G; 267 } Ev; 268 } 269 Event; 270 271 static void init_Event ( Event* ev ) { 272 VG_(memset)(ev, 0, sizeof(Event)); 273 } 274 275 static IRAtom* get_Event_dea ( Event* ev ) { 276 switch (ev->tag) { 277 case Ev_Dr: return ev->Ev.Dr.ea; 278 case Ev_Dw: return ev->Ev.Dw.ea; 279 case Ev_Dm: return ev->Ev.Dm.ea; 280 default: tl_assert(0); 281 } 282 } 283 284 static Int get_Event_dszB ( Event* ev ) { 285 switch (ev->tag) { 286 case Ev_Dr: return ev->Ev.Dr.szB; 287 case Ev_Dw: return ev->Ev.Dw.szB; 288 case Ev_Dm: return ev->Ev.Dm.szB; 289 default: tl_assert(0); 290 } 291 } 292 293 294 /* Up to this many unnotified events are allowed. Number is 295 arbitrary. Larger numbers allow more event merging to occur, but 296 potentially induce more spilling due to extending live ranges of 297 address temporaries. */ 298 #define N_EVENTS 16 299 300 301 /* A struct which holds all the running state during instrumentation. 302 Mostly to avoid passing loads of parameters everywhere. */ 303 typedef struct { 304 /* The current outstanding-memory-event list. */ 305 Event events[N_EVENTS]; 306 Int events_used; 307 308 /* The array of InstrInfo's is part of BB struct. */ 309 BB* bb; 310 311 /* BB seen before (ie. re-instrumentation) */ 312 Bool seen_before; 313 314 /* Number InstrInfo bins 'used' so far. */ 315 UInt ii_index; 316 317 // current offset of guest instructions from BB start 318 UInt instr_offset; 319 320 /* The output SB being constructed. */ 321 IRSB* sbOut; 322 } ClgState; 323 324 325 static void showEvent ( Event* ev ) 326 { 327 switch (ev->tag) { 328 case Ev_Ir: 329 VG_(printf)("Ir (InstrInfo %p) at +%d\n", 330 ev->inode, ev->inode->instr_offset); 331 break; 332 case Ev_Dr: 333 VG_(printf)("Dr (InstrInfo %p) at +%d %d EA=", 334 ev->inode, ev->inode->instr_offset, ev->Ev.Dr.szB); 335 ppIRExpr(ev->Ev.Dr.ea); 336 VG_(printf)("\n"); 337 break; 338 case Ev_Dw: 339 VG_(printf)("Dw (InstrInfo %p) at +%d %d EA=", 340 ev->inode, ev->inode->instr_offset, ev->Ev.Dw.szB); 341 ppIRExpr(ev->Ev.Dw.ea); 342 VG_(printf)("\n"); 343 break; 344 case Ev_Dm: 345 VG_(printf)("Dm (InstrInfo %p) at +%d %d EA=", 346 ev->inode, ev->inode->instr_offset, ev->Ev.Dm.szB); 347 ppIRExpr(ev->Ev.Dm.ea); 348 VG_(printf)("\n"); 349 break; 350 case Ev_Bc: 351 VG_(printf)("Bc %p GA=", ev->inode); 352 ppIRExpr(ev->Ev.Bc.taken); 353 VG_(printf)("\n"); 354 break; 355 case Ev_Bi: 356 VG_(printf)("Bi %p DST=", ev->inode); 357 ppIRExpr(ev->Ev.Bi.dst); 358 VG_(printf)("\n"); 359 break; 360 case Ev_G: 361 VG_(printf)("G %p\n", ev->inode); 362 break; 363 default: 364 tl_assert(0); 365 break; 366 } 367 } 368 369 /* Generate code for all outstanding memory events, and mark the queue 370 empty. Code is generated into cgs->sbOut, and this activity 371 'consumes' slots in cgs->bb. */ 372 373 static void flushEvents ( ClgState* clgs ) 374 { 375 Int i, regparms, inew; 376 Char* helperName; 377 void* helperAddr; 378 IRExpr** argv; 379 IRExpr* i_node_expr; 380 IRDirty* di; 381 Event* ev; 382 Event* ev2; 383 Event* ev3; 384 385 if (!clgs->seen_before) { 386 // extend event sets as needed 387 // available sets: D0 Dr 388 for(i=0; i<clgs->events_used; i++) { 389 ev = &clgs->events[i]; 390 switch(ev->tag) { 391 case Ev_Ir: 392 // Ir event always is first for a guest instruction 393 CLG_ASSERT(ev->inode->eventset == 0); 394 ev->inode->eventset = CLG_(sets).base; 395 break; 396 case Ev_Dr: 397 // extend event set by Dr counters 398 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 399 EG_DR); 400 break; 401 case Ev_Dw: 402 case Ev_Dm: 403 // extend event set by Dw counters 404 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 405 EG_DW); 406 break; 407 case Ev_Bc: 408 // extend event set by Bc counters 409 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 410 EG_BC); 411 break; 412 case Ev_Bi: 413 // extend event set by Bi counters 414 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 415 EG_BI); 416 break; 417 case Ev_G: 418 // extend event set by Bus counter 419 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 420 EG_BUS); 421 break; 422 default: 423 tl_assert(0); 424 } 425 } 426 } 427 428 for(i = 0; i < clgs->events_used; i = inew) { 429 430 helperName = NULL; 431 helperAddr = NULL; 432 argv = NULL; 433 regparms = 0; 434 435 /* generate IR to notify event i and possibly the ones 436 immediately following it. */ 437 tl_assert(i >= 0 && i < clgs->events_used); 438 439 ev = &clgs->events[i]; 440 ev2 = ( i < clgs->events_used-1 ? &clgs->events[i+1] : NULL ); 441 ev3 = ( i < clgs->events_used-2 ? &clgs->events[i+2] : NULL ); 442 443 CLG_DEBUGIF(5) { 444 VG_(printf)(" flush "); 445 showEvent( ev ); 446 } 447 448 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode ); 449 450 /* Decide on helper fn to call and args to pass it, and advance 451 i appropriately. 452 Dm events have same effect as Dw events */ 453 switch (ev->tag) { 454 case Ev_Ir: 455 /* Merge an Ir with a following Dr. */ 456 if (ev2 && ev2->tag == Ev_Dr) { 457 /* Why is this true? It's because we're merging an Ir 458 with a following Dr. The Ir derives from the 459 instruction's IMark and the Dr from data 460 references which follow it. In short it holds 461 because each insn starts with an IMark, hence an 462 Ev_Ir, and so these Dr must pertain to the 463 immediately preceding Ir. Same applies to analogous 464 assertions in the subsequent cases. */ 465 tl_assert(ev2->inode == ev->inode); 466 helperName = CLG_(cachesim).log_1I1Dr_name; 467 helperAddr = CLG_(cachesim).log_1I1Dr; 468 argv = mkIRExprVec_3( i_node_expr, 469 get_Event_dea(ev2), 470 mkIRExpr_HWord( get_Event_dszB(ev2) ) ); 471 regparms = 3; 472 inew = i+2; 473 } 474 /* Merge an Ir with a following Dw/Dm. */ 475 else 476 if (ev2 && (ev2->tag == Ev_Dw || ev2->tag == Ev_Dm)) { 477 tl_assert(ev2->inode == ev->inode); 478 helperName = CLG_(cachesim).log_1I1Dw_name; 479 helperAddr = CLG_(cachesim).log_1I1Dw; 480 argv = mkIRExprVec_3( i_node_expr, 481 get_Event_dea(ev2), 482 mkIRExpr_HWord( get_Event_dszB(ev2) ) ); 483 regparms = 3; 484 inew = i+2; 485 } 486 /* Merge an Ir with two following Irs. */ 487 else 488 if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) { 489 helperName = CLG_(cachesim).log_3I0D_name; 490 helperAddr = CLG_(cachesim).log_3I0D; 491 argv = mkIRExprVec_3( i_node_expr, 492 mkIRExpr_HWord( (HWord)ev2->inode ), 493 mkIRExpr_HWord( (HWord)ev3->inode ) ); 494 regparms = 3; 495 inew = i+3; 496 } 497 /* Merge an Ir with one following Ir. */ 498 else 499 if (ev2 && ev2->tag == Ev_Ir) { 500 helperName = CLG_(cachesim).log_2I0D_name; 501 helperAddr = CLG_(cachesim).log_2I0D; 502 argv = mkIRExprVec_2( i_node_expr, 503 mkIRExpr_HWord( (HWord)ev2->inode ) ); 504 regparms = 2; 505 inew = i+2; 506 } 507 /* No merging possible; emit as-is. */ 508 else { 509 helperName = CLG_(cachesim).log_1I0D_name; 510 helperAddr = CLG_(cachesim).log_1I0D; 511 argv = mkIRExprVec_1( i_node_expr ); 512 regparms = 1; 513 inew = i+1; 514 } 515 break; 516 case Ev_Dr: 517 /* Data read or modify */ 518 helperName = CLG_(cachesim).log_0I1Dr_name; 519 helperAddr = CLG_(cachesim).log_0I1Dr; 520 argv = mkIRExprVec_3( i_node_expr, 521 get_Event_dea(ev), 522 mkIRExpr_HWord( get_Event_dszB(ev) ) ); 523 regparms = 3; 524 inew = i+1; 525 break; 526 case Ev_Dw: 527 case Ev_Dm: 528 /* Data write */ 529 helperName = CLG_(cachesim).log_0I1Dw_name; 530 helperAddr = CLG_(cachesim).log_0I1Dw; 531 argv = mkIRExprVec_3( i_node_expr, 532 get_Event_dea(ev), 533 mkIRExpr_HWord( get_Event_dszB(ev) ) ); 534 regparms = 3; 535 inew = i+1; 536 break; 537 case Ev_Bc: 538 /* Conditional branch */ 539 helperName = "log_cond_branch"; 540 helperAddr = &log_cond_branch; 541 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken ); 542 regparms = 2; 543 inew = i+1; 544 break; 545 case Ev_Bi: 546 /* Branch to an unknown destination */ 547 helperName = "log_ind_branch"; 548 helperAddr = &log_ind_branch; 549 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst ); 550 regparms = 2; 551 inew = i+1; 552 break; 553 case Ev_G: 554 /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */ 555 helperName = "log_global_event"; 556 helperAddr = &log_global_event; 557 argv = mkIRExprVec_1( i_node_expr ); 558 regparms = 1; 559 inew = i+1; 560 break; 561 default: 562 tl_assert(0); 563 } 564 565 CLG_DEBUGIF(5) { 566 if (inew > i+1) { 567 VG_(printf)(" merge "); 568 showEvent( ev2 ); 569 } 570 if (inew > i+2) { 571 VG_(printf)(" merge "); 572 showEvent( ev3 ); 573 } 574 if (helperAddr) 575 VG_(printf)(" call %s (%p)\n", 576 helperName, helperAddr); 577 } 578 579 /* helper could be unset depending on the simulator used */ 580 if (helperAddr == 0) continue; 581 582 /* Add the helper. */ 583 tl_assert(helperName); 584 tl_assert(helperAddr); 585 tl_assert(argv); 586 di = unsafeIRDirty_0_N( regparms, 587 helperName, VG_(fnptr_to_fnentry)( helperAddr ), 588 argv ); 589 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) ); 590 } 591 592 clgs->events_used = 0; 593 } 594 595 static void addEvent_Ir ( ClgState* clgs, InstrInfo* inode ) 596 { 597 Event* evt; 598 tl_assert(clgs->seen_before || (inode->eventset == 0)); 599 if (!CLG_(clo).simulate_cache) return; 600 601 if (clgs->events_used == N_EVENTS) 602 flushEvents(clgs); 603 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 604 evt = &clgs->events[clgs->events_used]; 605 init_Event(evt); 606 evt->tag = Ev_Ir; 607 evt->inode = inode; 608 clgs->events_used++; 609 } 610 611 static 612 void addEvent_Dr ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea ) 613 { 614 Event* evt; 615 tl_assert(isIRAtom(ea)); 616 tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE); 617 if (!CLG_(clo).simulate_cache) return; 618 619 if (clgs->events_used == N_EVENTS) 620 flushEvents(clgs); 621 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 622 evt = &clgs->events[clgs->events_used]; 623 init_Event(evt); 624 evt->tag = Ev_Dr; 625 evt->inode = inode; 626 evt->Ev.Dr.szB = datasize; 627 evt->Ev.Dr.ea = ea; 628 clgs->events_used++; 629 } 630 631 static 632 void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea ) 633 { 634 Event* lastEvt; 635 Event* evt; 636 tl_assert(isIRAtom(ea)); 637 tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE); 638 if (!CLG_(clo).simulate_cache) return; 639 640 /* Is it possible to merge this write with the preceding read? */ 641 lastEvt = &clgs->events[clgs->events_used-1]; 642 if (clgs->events_used > 0 643 && lastEvt->tag == Ev_Dr 644 && lastEvt->Ev.Dr.szB == datasize 645 && lastEvt->inode == inode 646 && eqIRAtom(lastEvt->Ev.Dr.ea, ea)) 647 { 648 lastEvt->tag = Ev_Dm; 649 return; 650 } 651 652 /* No. Add as normal. */ 653 if (clgs->events_used == N_EVENTS) 654 flushEvents(clgs); 655 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 656 evt = &clgs->events[clgs->events_used]; 657 init_Event(evt); 658 evt->tag = Ev_Dw; 659 evt->inode = inode; 660 evt->Ev.Dw.szB = datasize; 661 evt->Ev.Dw.ea = ea; 662 clgs->events_used++; 663 } 664 665 static 666 void addEvent_Bc ( ClgState* clgs, InstrInfo* inode, IRAtom* guard ) 667 { 668 Event* evt; 669 tl_assert(isIRAtom(guard)); 670 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, guard) 671 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64)); 672 if (!CLG_(clo).simulate_branch) return; 673 674 if (clgs->events_used == N_EVENTS) 675 flushEvents(clgs); 676 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 677 evt = &clgs->events[clgs->events_used]; 678 init_Event(evt); 679 evt->tag = Ev_Bc; 680 evt->inode = inode; 681 evt->Ev.Bc.taken = guard; 682 clgs->events_used++; 683 } 684 685 static 686 void addEvent_Bi ( ClgState* clgs, InstrInfo* inode, IRAtom* whereTo ) 687 { 688 Event* evt; 689 tl_assert(isIRAtom(whereTo)); 690 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, whereTo) 691 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64)); 692 if (!CLG_(clo).simulate_branch) return; 693 694 if (clgs->events_used == N_EVENTS) 695 flushEvents(clgs); 696 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 697 evt = &clgs->events[clgs->events_used]; 698 init_Event(evt); 699 evt->tag = Ev_Bi; 700 evt->inode = inode; 701 evt->Ev.Bi.dst = whereTo; 702 clgs->events_used++; 703 } 704 705 static 706 void addEvent_G ( ClgState* clgs, InstrInfo* inode ) 707 { 708 Event* evt; 709 if (!CLG_(clo).collect_bus) return; 710 711 if (clgs->events_used == N_EVENTS) 712 flushEvents(clgs); 713 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 714 evt = &clgs->events[clgs->events_used]; 715 init_Event(evt); 716 evt->tag = Ev_G; 717 evt->inode = inode; 718 clgs->events_used++; 719 } 720 721 /* Initialise or check (if already seen before) an InstrInfo for next insn. 722 We only can set instr_offset/instr_size here. The required event set and 723 resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest 724 instructions. The event set is extended as required on flush of the event 725 queue (when Dm events were determined), cost offsets are determined at 726 end of BB instrumentation. */ 727 static 728 InstrInfo* next_InstrInfo ( ClgState* clgs, UInt instr_size ) 729 { 730 InstrInfo* ii; 731 tl_assert(clgs->ii_index >= 0); 732 tl_assert(clgs->ii_index < clgs->bb->instr_count); 733 ii = &clgs->bb->instr[ clgs->ii_index ]; 734 735 if (clgs->seen_before) { 736 CLG_ASSERT(ii->instr_offset == clgs->instr_offset); 737 CLG_ASSERT(ii->instr_size == instr_size); 738 } 739 else { 740 ii->instr_offset = clgs->instr_offset; 741 ii->instr_size = instr_size; 742 ii->cost_offset = 0; 743 ii->eventset = 0; 744 } 745 746 clgs->ii_index++; 747 clgs->instr_offset += instr_size; 748 CLG_(stat).distinct_instrs++; 749 750 return ii; 751 } 752 753 // return total number of cost values needed for this BB 754 static 755 UInt update_cost_offsets( ClgState* clgs ) 756 { 757 Int i; 758 InstrInfo* ii; 759 UInt cost_offset = 0; 760 761 CLG_ASSERT(clgs->bb->instr_count == clgs->ii_index); 762 for(i=0; i<clgs->ii_index; i++) { 763 ii = &clgs->bb->instr[i]; 764 if (clgs->seen_before) { 765 CLG_ASSERT(ii->cost_offset == cost_offset); 766 } else 767 ii->cost_offset = cost_offset; 768 cost_offset += ii->eventset ? ii->eventset->size : 0; 769 } 770 771 return cost_offset; 772 } 773 774 /*------------------------------------------------------------*/ 775 /*--- Instrumentation ---*/ 776 /*------------------------------------------------------------*/ 777 778 #if defined(VG_BIGENDIAN) 779 # define CLGEndness Iend_BE 780 #elif defined(VG_LITTLEENDIAN) 781 # define CLGEndness Iend_LE 782 #else 783 # error "Unknown endianness" 784 #endif 785 786 static 787 Addr IRConst2Addr(IRConst* con) 788 { 789 Addr addr; 790 791 if (sizeof(Addr) == 4) { 792 CLG_ASSERT( con->tag == Ico_U32 ); 793 addr = con->Ico.U32; 794 } 795 else if (sizeof(Addr) == 8) { 796 CLG_ASSERT( con->tag == Ico_U64 ); 797 addr = con->Ico.U64; 798 } 799 else 800 VG_(tool_panic)("Callgrind: invalid Addr type"); 801 802 return addr; 803 } 804 805 /* First pass over a BB to instrument, counting instructions and jumps 806 * This is needed for the size of the BB struct to allocate 807 * 808 * Called from CLG_(get_bb) 809 */ 810 void CLG_(collectBlockInfo)(IRSB* sbIn, 811 /*INOUT*/ UInt* instrs, 812 /*INOUT*/ UInt* cjmps, 813 /*INOUT*/ Bool* cjmp_inverted) 814 { 815 Int i; 816 IRStmt* st; 817 Addr instrAddr =0, jumpDst; 818 UInt instrLen = 0; 819 Bool toNextInstr = False; 820 821 // Ist_Exit has to be ignored in preamble code, before first IMark: 822 // preamble code is added by VEX for self modifying code, and has 823 // nothing to do with client code 824 Bool inPreamble = True; 825 826 if (!sbIn) return; 827 828 for (i = 0; i < sbIn->stmts_used; i++) { 829 st = sbIn->stmts[i]; 830 if (Ist_IMark == st->tag) { 831 inPreamble = False; 832 833 instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr); 834 instrLen = st->Ist.IMark.len; 835 836 (*instrs)++; 837 toNextInstr = False; 838 } 839 if (inPreamble) continue; 840 if (Ist_Exit == st->tag) { 841 jumpDst = IRConst2Addr(st->Ist.Exit.dst); 842 toNextInstr = (jumpDst == instrAddr + instrLen); 843 844 (*cjmps)++; 845 } 846 } 847 848 /* if the last instructions of BB conditionally jumps to next instruction 849 * (= first instruction of next BB in memory), this is a inverted by VEX. 850 */ 851 *cjmp_inverted = toNextInstr; 852 } 853 854 static 855 void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy) 856 { 857 addStmtToIRSB( bbOut, 858 IRStmt_Store(CLGEndness, 859 IRExpr_Const(hWordTy == Ity_I32 ? 860 IRConst_U32( addr ) : 861 IRConst_U64( addr )), 862 IRExpr_Const(IRConst_U32(val)) )); 863 } 864 865 866 /* add helper call to setup_bbcc, with pointer to BB struct as argument 867 * 868 * precondition for setup_bbcc: 869 * - jmps_passed has number of cond.jumps passed in last executed BB 870 * - current_bbcc has a pointer to the BBCC of the last executed BB 871 * Thus, if bbcc_jmpkind is != -1 (JmpNone), 872 * current_bbcc->bb->jmp_addr 873 * gives the address of the jump source. 874 * 875 * the setup does 2 things: 876 * - trace call: 877 * * Unwind own call stack, i.e sync our ESP with real ESP 878 * This is for ESP manipulation (longjmps, C++ exec handling) and RET 879 * * For CALLs or JMPs crossing objects, record call arg + 880 * push are on own call stack 881 * 882 * - prepare for cache log functions: 883 * set current_bbcc to BBCC that gets the costs for this BB execution 884 * attached 885 */ 886 static 887 void addBBSetupCall(ClgState* clgs) 888 { 889 IRDirty* di; 890 IRExpr *arg1, **argv; 891 892 arg1 = mkIRExpr_HWord( (HWord)clgs->bb ); 893 argv = mkIRExprVec_1(arg1); 894 di = unsafeIRDirty_0_N( 1, "setup_bbcc", 895 VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ), 896 argv); 897 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) ); 898 } 899 900 901 static 902 IRSB* CLG_(instrument)( VgCallbackClosure* closure, 903 IRSB* sbIn, 904 VexGuestLayout* layout, 905 VexGuestExtents* vge, 906 IRType gWordTy, IRType hWordTy ) 907 { 908 Int i, isize; 909 IRStmt* st; 910 Addr origAddr; 911 Addr64 cia; /* address of current insn */ 912 InstrInfo* curr_inode = NULL; 913 ClgState clgs; 914 UInt cJumps = 0; 915 916 917 if (gWordTy != hWordTy) { 918 /* We don't currently support this case. */ 919 VG_(tool_panic)("host/guest word size mismatch"); 920 } 921 922 // No instrumentation if it is switched off 923 if (! CLG_(instrument_state)) { 924 CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n", 925 (Addr)closure->readdr); 926 return sbIn; 927 } 928 929 CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr)closure->readdr); 930 931 /* Set up SB for instrumented IR */ 932 clgs.sbOut = deepCopyIRSBExceptStmts(sbIn); 933 934 // Copy verbatim any IR preamble preceding the first IMark 935 i = 0; 936 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) { 937 addStmtToIRSB( clgs.sbOut, sbIn->stmts[i] ); 938 i++; 939 } 940 941 // Get the first statement, and origAddr from it 942 CLG_ASSERT(sbIn->stmts_used >0); 943 CLG_ASSERT(i < sbIn->stmts_used); 944 st = sbIn->stmts[i]; 945 CLG_ASSERT(Ist_IMark == st->tag); 946 947 origAddr = (Addr)st->Ist.IMark.addr; 948 cia = st->Ist.IMark.addr; 949 isize = st->Ist.IMark.len; 950 CLG_ASSERT(origAddr == st->Ist.IMark.addr); // XXX: check no overflow 951 952 /* Get BB struct (creating if necessary). 953 * JS: The hash table is keyed with orig_addr_noredir -- important! 954 * JW: Why? If it is because of different chasing of the redirection, 955 * this is not needed, as chasing is switched off in callgrind 956 */ 957 clgs.bb = CLG_(get_bb)(origAddr, sbIn, &(clgs.seen_before)); 958 959 addBBSetupCall(&clgs); 960 961 // Set up running state 962 clgs.events_used = 0; 963 clgs.ii_index = 0; 964 clgs.instr_offset = 0; 965 966 for (/*use current i*/; i < sbIn->stmts_used; i++) { 967 968 st = sbIn->stmts[i]; 969 CLG_ASSERT(isFlatIRStmt(st)); 970 971 switch (st->tag) { 972 case Ist_NoOp: 973 case Ist_AbiHint: 974 case Ist_Put: 975 case Ist_PutI: 976 case Ist_MBE: 977 break; 978 979 case Ist_IMark: { 980 cia = st->Ist.IMark.addr; 981 isize = st->Ist.IMark.len; 982 CLG_ASSERT(clgs.instr_offset == (Addr)cia - origAddr); 983 // If Vex fails to decode an instruction, the size will be zero. 984 // Pretend otherwise. 985 if (isize == 0) isize = VG_MIN_INSTR_SZB; 986 987 // Sanity-check size. 988 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB) 989 || VG_CLREQ_SZB == isize ); 990 991 // Init the inode, record it as the current one. 992 // Subsequent Dr/Dw/Dm events from the same instruction will 993 // also use it. 994 curr_inode = next_InstrInfo (&clgs, isize); 995 996 addEvent_Ir( &clgs, curr_inode ); 997 break; 998 } 999 1000 case Ist_WrTmp: { 1001 IRExpr* data = st->Ist.WrTmp.data; 1002 if (data->tag == Iex_Load) { 1003 IRExpr* aexpr = data->Iex.Load.addr; 1004 // Note also, endianness info is ignored. I guess 1005 // that's not interesting. 1006 addEvent_Dr( &clgs, curr_inode, 1007 sizeofIRType(data->Iex.Load.ty), aexpr ); 1008 } 1009 break; 1010 } 1011 1012 case Ist_Store: { 1013 IRExpr* data = st->Ist.Store.data; 1014 IRExpr* aexpr = st->Ist.Store.addr; 1015 addEvent_Dw( &clgs, curr_inode, 1016 sizeofIRType(typeOfIRExpr(sbIn->tyenv, data)), aexpr ); 1017 break; 1018 } 1019 1020 case Ist_Dirty: { 1021 Int dataSize; 1022 IRDirty* d = st->Ist.Dirty.details; 1023 if (d->mFx != Ifx_None) { 1024 /* This dirty helper accesses memory. Collect the details. */ 1025 tl_assert(d->mAddr != NULL); 1026 tl_assert(d->mSize != 0); 1027 dataSize = d->mSize; 1028 // Large (eg. 28B, 108B, 512B on x86) data-sized 1029 // instructions will be done inaccurately, but they're 1030 // very rare and this avoids errors from hitting more 1031 // than two cache lines in the simulation. 1032 if (dataSize > MIN_LINE_SIZE) 1033 dataSize = MIN_LINE_SIZE; 1034 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) 1035 addEvent_Dr( &clgs, curr_inode, dataSize, d->mAddr ); 1036 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) 1037 addEvent_Dw( &clgs, curr_inode, dataSize, d->mAddr ); 1038 } else { 1039 tl_assert(d->mAddr == NULL); 1040 tl_assert(d->mSize == 0); 1041 } 1042 break; 1043 } 1044 1045 case Ist_CAS: { 1046 /* We treat it as a read and a write of the location. I 1047 think that is the same behaviour as it was before IRCAS 1048 was introduced, since prior to that point, the Vex 1049 front ends would translate a lock-prefixed instruction 1050 into a (normal) read followed by a (normal) write. */ 1051 Int dataSize; 1052 IRCAS* cas = st->Ist.CAS.details; 1053 CLG_ASSERT(cas->addr && isIRAtom(cas->addr)); 1054 CLG_ASSERT(cas->dataLo); 1055 dataSize = sizeofIRType(typeOfIRExpr(sbIn->tyenv, cas->dataLo)); 1056 if (cas->dataHi != NULL) 1057 dataSize *= 2; /* since this is a doubleword-cas */ 1058 addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr ); 1059 addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr ); 1060 addEvent_G( &clgs, curr_inode ); 1061 break; 1062 } 1063 1064 case Ist_LLSC: { 1065 IRType dataTy; 1066 if (st->Ist.LLSC.storedata == NULL) { 1067 /* LL */ 1068 dataTy = typeOfIRTemp(sbIn->tyenv, st->Ist.LLSC.result); 1069 addEvent_Dr( &clgs, curr_inode, 1070 sizeofIRType(dataTy), st->Ist.LLSC.addr ); 1071 } else { 1072 /* SC */ 1073 dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata); 1074 addEvent_Dw( &clgs, curr_inode, 1075 sizeofIRType(dataTy), st->Ist.LLSC.addr ); 1076 /* I don't know whether the global-bus-lock cost should 1077 be attributed to the LL or the SC, but it doesn't 1078 really matter since they always have to be used in 1079 pairs anyway. Hence put it (quite arbitrarily) on 1080 the SC. */ 1081 addEvent_G( &clgs, curr_inode ); 1082 } 1083 break; 1084 } 1085 1086 case Ist_Exit: { 1087 Bool guest_exit, inverted; 1088 1089 /* VEX code generation sometimes inverts conditional branches. 1090 * As Callgrind counts (conditional) jumps, it has to correct 1091 * inversions. The heuristic is the following: 1092 * (1) Callgrind switches off SB chasing and unrolling, and 1093 * therefore it assumes that a candidate for inversion only is 1094 * the last conditional branch in an SB. 1095 * (2) inversion is assumed if the branch jumps to the address of 1096 * the next guest instruction in memory. 1097 * This heuristic is precalculated in CLG_(collectBlockInfo)(). 1098 * 1099 * Branching behavior is also used for branch prediction. Note that 1100 * above heuristic is different from what Cachegrind does. 1101 * Cachegrind uses (2) for all branches. 1102 */ 1103 if (cJumps+1 == clgs.bb->cjmp_count) 1104 inverted = clgs.bb->cjmp_inverted; 1105 else 1106 inverted = False; 1107 1108 // call branch predictor only if this is a branch in guest code 1109 guest_exit = (st->Ist.Exit.jk == Ijk_Boring) || 1110 (st->Ist.Exit.jk == Ijk_Call) || 1111 (st->Ist.Exit.jk == Ijk_Ret); 1112 1113 if (guest_exit) { 1114 /* Stuff to widen the guard expression to a host word, so 1115 we can pass it to the branch predictor simulation 1116 functions easily. */ 1117 IRType tyW = hWordTy; 1118 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64; 1119 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64; 1120 IRTemp guard1 = newIRTemp(clgs.sbOut->tyenv, Ity_I1); 1121 IRTemp guardW = newIRTemp(clgs.sbOut->tyenv, tyW); 1122 IRTemp guard = newIRTemp(clgs.sbOut->tyenv, tyW); 1123 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1)) 1124 : IRExpr_Const(IRConst_U64(1)); 1125 1126 /* Widen the guard expression. */ 1127 addStmtToIRSB( clgs.sbOut, 1128 IRStmt_WrTmp( guard1, st->Ist.Exit.guard )); 1129 addStmtToIRSB( clgs.sbOut, 1130 IRStmt_WrTmp( guardW, 1131 IRExpr_Unop(widen, 1132 IRExpr_RdTmp(guard1))) ); 1133 /* If the exit is inverted, invert the sense of the guard. */ 1134 addStmtToIRSB( 1135 clgs.sbOut, 1136 IRStmt_WrTmp( 1137 guard, 1138 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one) 1139 : IRExpr_RdTmp(guardW) 1140 )); 1141 /* And post the event. */ 1142 addEvent_Bc( &clgs, curr_inode, IRExpr_RdTmp(guard) ); 1143 } 1144 1145 /* We may never reach the next statement, so need to flush 1146 all outstanding transactions now. */ 1147 flushEvents( &clgs ); 1148 1149 CLG_ASSERT(clgs.ii_index>0); 1150 if (!clgs.seen_before) { 1151 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1; 1152 clgs.bb->jmp[cJumps].skip = False; 1153 } 1154 1155 /* Update global variable jmps_passed before the jump 1156 * A correction is needed if VEX inverted the last jump condition 1157 */ 1158 addConstMemStoreStmt( clgs.sbOut, 1159 (UWord) &CLG_(current_state).jmps_passed, 1160 inverted ? cJumps+1 : cJumps, hWordTy); 1161 cJumps++; 1162 1163 break; 1164 } 1165 1166 default: 1167 tl_assert(0); 1168 break; 1169 } 1170 1171 /* Copy the original statement */ 1172 addStmtToIRSB( clgs.sbOut, st ); 1173 1174 CLG_DEBUGIF(5) { 1175 VG_(printf)(" pass "); 1176 ppIRStmt(st); 1177 VG_(printf)("\n"); 1178 } 1179 } 1180 1181 /* Deal with branches to unknown destinations. Except ignore ones 1182 which are function returns as we assume the return stack 1183 predictor never mispredicts. */ 1184 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) { 1185 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); } 1186 switch (sbIn->next->tag) { 1187 case Iex_Const: 1188 break; /* boring - branch to known address */ 1189 case Iex_RdTmp: 1190 /* looks like an indirect branch (branch to unknown) */ 1191 addEvent_Bi( &clgs, curr_inode, sbIn->next ); 1192 break; 1193 default: 1194 /* shouldn't happen - if the incoming IR is properly 1195 flattened, should only have tmp and const cases to 1196 consider. */ 1197 tl_assert(0); 1198 } 1199 } 1200 1201 /* At the end of the bb. Flush outstandings. */ 1202 flushEvents( &clgs ); 1203 1204 /* Always update global variable jmps_passed at end of bb. 1205 * A correction is needed if VEX inverted the last jump condition 1206 */ 1207 { 1208 UInt jmps_passed = cJumps; 1209 if (clgs.bb->cjmp_inverted) jmps_passed--; 1210 addConstMemStoreStmt( clgs.sbOut, 1211 (UWord) &CLG_(current_state).jmps_passed, 1212 jmps_passed, hWordTy); 1213 } 1214 CLG_ASSERT(clgs.bb->cjmp_count == cJumps); 1215 CLG_ASSERT(clgs.bb->instr_count = clgs.ii_index); 1216 1217 /* This stores the instr of the call/ret at BB end */ 1218 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1; 1219 1220 if (clgs.seen_before) { 1221 CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs)); 1222 CLG_ASSERT(clgs.bb->instr_len = clgs.instr_offset); 1223 CLG_ASSERT(clgs.bb->jmpkind == sbIn->jumpkind); 1224 } 1225 else { 1226 clgs.bb->cost_count = update_cost_offsets(&clgs); 1227 clgs.bb->instr_len = clgs.instr_offset; 1228 clgs.bb->jmpkind = sbIn->jumpkind; 1229 } 1230 1231 CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n", 1232 origAddr, clgs.bb->instr_len, 1233 clgs.bb->cjmp_count, clgs.bb->cost_count); 1234 if (cJumps>0) { 1235 CLG_DEBUG(3, " [ "); 1236 for (i=0;i<cJumps;i++) 1237 CLG_DEBUG(3, "%d ", clgs.bb->jmp[i].instr); 1238 CLG_DEBUG(3, "], last inverted: %s \n", 1239 clgs.bb->cjmp_inverted ? "yes":"no"); 1240 } 1241 1242 return clgs.sbOut; 1243 } 1244 1245 /*--------------------------------------------------------------------*/ 1246 /*--- Discarding BB info ---*/ 1247 /*--------------------------------------------------------------------*/ 1248 1249 // Called when a translation is removed from the translation cache for 1250 // any reason at all: to free up space, because the guest code was 1251 // unmapped or modified, or for any arbitrary reason. 1252 static 1253 void clg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge ) 1254 { 1255 Addr orig_addr = (Addr)orig_addr64; 1256 1257 tl_assert(vge.n_used > 0); 1258 1259 if (0) 1260 VG_(printf)( "discard_superblock_info: %p, %p, %llu\n", 1261 (void*)(Addr)orig_addr, 1262 (void*)(Addr)vge.base[0], (ULong)vge.len[0]); 1263 1264 // Get BB info, remove from table, free BB info. Simple! Note that we 1265 // use orig_addr, not the first instruction address in vge. 1266 CLG_(delete_bb)(orig_addr); 1267 } 1268 1269 1270 /*------------------------------------------------------------*/ 1271 /*--- CLG_(fini)() and related function ---*/ 1272 /*------------------------------------------------------------*/ 1273 1274 1275 1276 static void zero_thread_cost(thread_info* t) 1277 { 1278 Int i; 1279 1280 for(i = 0; i < CLG_(current_call_stack).sp; i++) { 1281 if (!CLG_(current_call_stack).entry[i].jcc) continue; 1282 1283 /* reset call counters to current for active calls */ 1284 CLG_(copy_cost)( CLG_(sets).full, 1285 CLG_(current_call_stack).entry[i].enter_cost, 1286 CLG_(current_state).cost ); 1287 CLG_(current_call_stack).entry[i].jcc->call_counter = 0; 1288 } 1289 1290 CLG_(forall_bbccs)(CLG_(zero_bbcc)); 1291 1292 /* set counter for last dump */ 1293 CLG_(copy_cost)( CLG_(sets).full, 1294 t->lastdump_cost, CLG_(current_state).cost ); 1295 } 1296 1297 void CLG_(zero_all_cost)(Bool only_current_thread) 1298 { 1299 if (VG_(clo_verbosity) > 1) 1300 VG_(message)(Vg_DebugMsg, " Zeroing costs...\n"); 1301 1302 if (only_current_thread) 1303 zero_thread_cost(CLG_(get_current_thread)()); 1304 else 1305 CLG_(forall_threads)(zero_thread_cost); 1306 1307 if (VG_(clo_verbosity) > 1) 1308 VG_(message)(Vg_DebugMsg, " ...done\n"); 1309 } 1310 1311 static 1312 void unwind_thread(thread_info* t) 1313 { 1314 /* unwind signal handlers */ 1315 while(CLG_(current_state).sig !=0) 1316 CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig); 1317 1318 /* unwind regular call stack */ 1319 while(CLG_(current_call_stack).sp>0) 1320 CLG_(pop_call_stack)(); 1321 1322 /* reset context and function stack for context generation */ 1323 CLG_(init_exec_state)( &CLG_(current_state) ); 1324 CLG_(current_fn_stack).top = CLG_(current_fn_stack).bottom; 1325 } 1326 1327 static 1328 void zero_state_cost(thread_info* t) 1329 { 1330 CLG_(zero_cost)( CLG_(sets).full, CLG_(current_state).cost ); 1331 } 1332 1333 /* Ups, this can go very wrong... */ 1334 extern void VG_(discard_translations) ( Addr64 start, ULong range, HChar* who ); 1335 1336 void CLG_(set_instrument_state)(Char* reason, Bool state) 1337 { 1338 if (CLG_(instrument_state) == state) { 1339 CLG_DEBUG(2, "%s: instrumentation already %s\n", 1340 reason, state ? "ON" : "OFF"); 1341 return; 1342 } 1343 CLG_(instrument_state) = state; 1344 CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n", 1345 reason, state ? "ON" : "OFF"); 1346 1347 VG_(discard_translations)( (Addr64)0x1000, (ULong) ~0xfffl, "callgrind"); 1348 1349 /* reset internal state: call stacks, simulator */ 1350 CLG_(forall_threads)(unwind_thread); 1351 CLG_(forall_threads)(zero_state_cost); 1352 (*CLG_(cachesim).clear)(); 1353 1354 if (VG_(clo_verbosity) > 1) 1355 VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n", 1356 reason, state ? "ON" : "OFF"); 1357 } 1358 1359 /* helper for dump_state_togdb */ 1360 static void dump_state_of_thread_togdb(thread_info* ti) 1361 { 1362 static Char buf[512]; 1363 static FullCost sum = 0, tmp = 0; 1364 Int t, p, i; 1365 BBCC *from, *to; 1366 call_entry* ce; 1367 1368 t = CLG_(current_tid); 1369 CLG_(init_cost_lz)( CLG_(sets).full, &sum ); 1370 CLG_(copy_cost_lz)( CLG_(sets).full, &tmp, ti->lastdump_cost ); 1371 CLG_(add_diff_cost)( CLG_(sets).full, sum, ti->lastdump_cost, 1372 ti->states.entry[0]->cost); 1373 CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost, tmp ); 1374 CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), sum); 1375 VG_(gdb_printf)("events-%d: %s\n", t, buf); 1376 VG_(gdb_printf)("frames-%d: %d\n", t, CLG_(current_call_stack).sp); 1377 1378 ce = 0; 1379 for(i = 0; i < CLG_(current_call_stack).sp; i++) { 1380 ce = CLG_(get_call_entry)(i); 1381 /* if this frame is skipped, we don't have counters */ 1382 if (!ce->jcc) continue; 1383 1384 from = ce->jcc->from; 1385 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, from->cxt->fn[0]->name); 1386 VG_(gdb_printf)("calls-%d-%d: %llu\n",t, i, ce->jcc->call_counter); 1387 1388 /* FIXME: EventSets! */ 1389 CLG_(copy_cost)( CLG_(sets).full, sum, ce->jcc->cost ); 1390 CLG_(copy_cost)( CLG_(sets).full, tmp, ce->enter_cost ); 1391 CLG_(add_diff_cost)( CLG_(sets).full, sum, 1392 ce->enter_cost, CLG_(current_state).cost ); 1393 CLG_(copy_cost)( CLG_(sets).full, ce->enter_cost, tmp ); 1394 1395 p = VG_(sprintf)(buf, "events-%d-%d: ",t, i); 1396 CLG_(sprint_mappingcost)(buf + p, CLG_(dumpmap), sum ); 1397 VG_(gdb_printf)("%s\n", buf); 1398 } 1399 if (ce && ce->jcc) { 1400 to = ce->jcc->to; 1401 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, to->cxt->fn[0]->name ); 1402 } 1403 } 1404 1405 /* Dump current state */ 1406 static void dump_state_togdb(void) 1407 { 1408 static Char buf[512]; 1409 thread_info** th; 1410 int t, p; 1411 Int orig_tid = CLG_(current_tid); 1412 1413 VG_(gdb_printf)("instrumentation: %s\n", 1414 CLG_(instrument_state) ? "on":"off"); 1415 if (!CLG_(instrument_state)) return; 1416 1417 VG_(gdb_printf)("executed-bbs: %llu\n", CLG_(stat).bb_executions); 1418 VG_(gdb_printf)("executed-calls: %llu\n", CLG_(stat).call_counter); 1419 VG_(gdb_printf)("distinct-bbs: %d\n", CLG_(stat).distinct_bbs); 1420 VG_(gdb_printf)("distinct-calls: %d\n", CLG_(stat).distinct_jccs); 1421 VG_(gdb_printf)("distinct-functions: %d\n", CLG_(stat).distinct_fns); 1422 VG_(gdb_printf)("distinct-contexts: %d\n", CLG_(stat).distinct_contexts); 1423 1424 /* "events:" line. Given here because it will be dynamic in the future */ 1425 p = VG_(sprintf)(buf, "events: "); 1426 CLG_(sprint_eventmapping)(buf+p, CLG_(dumpmap)); 1427 VG_(gdb_printf)("%s\n", buf); 1428 /* "part:" line (number of last part. Is 0 at start */ 1429 VG_(gdb_printf)("part: %d\n", CLG_(get_dump_counter)()); 1430 1431 /* threads */ 1432 th = CLG_(get_threads)(); 1433 p = VG_(sprintf)(buf, "threads:"); 1434 for(t=1;t<VG_N_THREADS;t++) { 1435 if (!th[t]) continue; 1436 p += VG_(sprintf)(buf+p, " %d", t); 1437 } 1438 VG_(gdb_printf)("%s\n", buf); 1439 VG_(gdb_printf)("current-tid: %d\n", orig_tid); 1440 CLG_(forall_threads)(dump_state_of_thread_togdb); 1441 } 1442 1443 1444 static void print_monitor_help ( void ) 1445 { 1446 VG_(gdb_printf) ("\n"); 1447 VG_(gdb_printf) ("callgrind monitor commands:\n"); 1448 VG_(gdb_printf) (" dump [<dump_hint>]\n"); 1449 VG_(gdb_printf) (" dump counters\n"); 1450 VG_(gdb_printf) (" zero\n"); 1451 VG_(gdb_printf) (" zero counters\n"); 1452 VG_(gdb_printf) (" status\n"); 1453 VG_(gdb_printf) (" print status\n"); 1454 VG_(gdb_printf) (" instrumentation [on|off]\n"); 1455 VG_(gdb_printf) (" get/set (if on/off given) instrumentation state\n"); 1456 VG_(gdb_printf) ("\n"); 1457 } 1458 1459 /* return True if request recognised, False otherwise */ 1460 static Bool handle_gdb_monitor_command (ThreadId tid, Char *req) 1461 { 1462 Char* wcmd; 1463 Char s[VG_(strlen(req))]; /* copy for strtok_r */ 1464 Char *ssaveptr; 1465 1466 VG_(strcpy) (s, req); 1467 1468 wcmd = VG_(strtok_r) (s, " ", &ssaveptr); 1469 switch (VG_(keyword_id) ("help dump zero status instrumentation", 1470 wcmd, kwd_report_duplicated_matches)) { 1471 case -2: /* multiple matches */ 1472 return True; 1473 case -1: /* not found */ 1474 return False; 1475 case 0: /* help */ 1476 print_monitor_help(); 1477 return True; 1478 case 1: { /* dump */ 1479 CLG_(dump_profile)(req, False); 1480 return True; 1481 } 1482 case 2: { /* zero */ 1483 CLG_(zero_all_cost)(False); 1484 return True; 1485 } 1486 1487 case 3: { /* status */ 1488 Char* arg = VG_(strtok_r) (0, " ", &ssaveptr); 1489 if (arg && (VG_(strcmp)(arg, "internal") == 0)) { 1490 /* internal interface to callgrind_control */ 1491 dump_state_togdb(); 1492 return True; 1493 } 1494 1495 if (!CLG_(instrument_state)) { 1496 VG_(gdb_printf)("No status available as instrumentation is switched off\n"); 1497 } else { 1498 // Status information to be improved ... 1499 thread_info** th = CLG_(get_threads)(); 1500 Int t, tcount = 0; 1501 for(t=1;t<VG_N_THREADS;t++) 1502 if (th[t]) tcount++; 1503 VG_(gdb_printf)("%d thread(s) running.\n", tcount); 1504 } 1505 return True; 1506 } 1507 1508 case 4: { /* instrumentation */ 1509 Char* arg = VG_(strtok_r) (0, " ", &ssaveptr); 1510 if (!arg) { 1511 VG_(gdb_printf)("instrumentation: %s\n", 1512 CLG_(instrument_state) ? "on":"off"); 1513 } 1514 else 1515 CLG_(set_instrument_state)("Command", VG_(strcmp)(arg,"off")!=0); 1516 return True; 1517 } 1518 1519 default: 1520 tl_assert(0); 1521 return False; 1522 } 1523 } 1524 1525 static 1526 Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret) 1527 { 1528 if (!VG_IS_TOOL_USERREQ('C','T',args[0]) 1529 && VG_USERREQ__GDB_MONITOR_COMMAND != args[0]) 1530 return False; 1531 1532 switch(args[0]) { 1533 case VG_USERREQ__DUMP_STATS: 1534 CLG_(dump_profile)("Client Request", True); 1535 *ret = 0; /* meaningless */ 1536 break; 1537 1538 case VG_USERREQ__DUMP_STATS_AT: 1539 { 1540 Char buf[512]; 1541 VG_(sprintf)(buf,"Client Request: %s", (Char*)args[1]); 1542 CLG_(dump_profile)(buf, True); 1543 *ret = 0; /* meaningless */ 1544 } 1545 break; 1546 1547 case VG_USERREQ__ZERO_STATS: 1548 CLG_(zero_all_cost)(True); 1549 *ret = 0; /* meaningless */ 1550 break; 1551 1552 case VG_USERREQ__TOGGLE_COLLECT: 1553 CLG_(current_state).collect = !CLG_(current_state).collect; 1554 CLG_DEBUG(2, "Client Request: toggled collection state to %s\n", 1555 CLG_(current_state).collect ? "ON" : "OFF"); 1556 *ret = 0; /* meaningless */ 1557 break; 1558 1559 case VG_USERREQ__START_INSTRUMENTATION: 1560 CLG_(set_instrument_state)("Client Request", True); 1561 *ret = 0; /* meaningless */ 1562 break; 1563 1564 case VG_USERREQ__STOP_INSTRUMENTATION: 1565 CLG_(set_instrument_state)("Client Request", False); 1566 *ret = 0; /* meaningless */ 1567 break; 1568 1569 case VG_USERREQ__GDB_MONITOR_COMMAND: { 1570 Bool handled = handle_gdb_monitor_command (tid, (Char*)args[1]); 1571 if (handled) 1572 *ret = 1; 1573 else 1574 *ret = 0; 1575 return handled; 1576 } 1577 default: 1578 return False; 1579 } 1580 1581 return True; 1582 } 1583 1584 1585 /* Syscall Timing */ 1586 1587 /* struct timeval syscalltime[VG_N_THREADS]; */ 1588 #if CLG_MICROSYSTIME 1589 #include <sys/time.h> 1590 #include <sys/syscall.h> 1591 extern Int VG_(do_syscall) ( UInt, ... ); 1592 1593 ULong syscalltime[VG_N_THREADS]; 1594 #else 1595 UInt syscalltime[VG_N_THREADS]; 1596 #endif 1597 1598 static 1599 void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno, 1600 UWord* args, UInt nArgs) 1601 { 1602 if (CLG_(clo).collect_systime) { 1603 #if CLG_MICROSYSTIME 1604 struct vki_timeval tv_now; 1605 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL); 1606 syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec; 1607 #else 1608 syscalltime[tid] = VG_(read_millisecond_timer)(); 1609 #endif 1610 } 1611 } 1612 1613 static 1614 void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno, 1615 UWord* args, UInt nArgs, SysRes res) 1616 { 1617 if (CLG_(clo).collect_systime && 1618 CLG_(current_state).bbcc) { 1619 Int o; 1620 #if CLG_MICROSYSTIME 1621 struct vki_timeval tv_now; 1622 ULong diff; 1623 1624 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL); 1625 diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid]; 1626 #else 1627 UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid]; 1628 #endif 1629 1630 /* offset o is for "SysCount", o+1 for "SysTime" */ 1631 o = fullOffset(EG_SYS); 1632 CLG_ASSERT(o>=0); 1633 CLG_DEBUG(0," Time (Off %d) for Syscall %d: %ull\n", o, syscallno, diff); 1634 1635 CLG_(current_state).cost[o] ++; 1636 CLG_(current_state).cost[o+1] += diff; 1637 if (!CLG_(current_state).bbcc->skipped) 1638 CLG_(init_cost_lz)(CLG_(sets).full, 1639 &(CLG_(current_state).bbcc->skipped)); 1640 CLG_(current_state).bbcc->skipped[o] ++; 1641 CLG_(current_state).bbcc->skipped[o+1] += diff; 1642 } 1643 } 1644 1645 static UInt ULong_width(ULong n) 1646 { 1647 UInt w = 0; 1648 while (n > 0) { 1649 n = n / 10; 1650 w++; 1651 } 1652 if (w == 0) w = 1; 1653 return w + (w-1)/3; // add space for commas 1654 } 1655 1656 static 1657 void branchsim_printstat(int l1, int l2, int l3) 1658 { 1659 static Char buf1[128], buf2[128], buf3[128], fmt[128]; 1660 FullCost total; 1661 ULong Bc_total_b, Bc_total_mp, Bi_total_b, Bi_total_mp; 1662 ULong B_total_b, B_total_mp; 1663 1664 total = CLG_(total_cost); 1665 Bc_total_b = total[ fullOffset(EG_BC) ]; 1666 Bc_total_mp = total[ fullOffset(EG_BC)+1 ]; 1667 Bi_total_b = total[ fullOffset(EG_BI) ]; 1668 Bi_total_mp = total[ fullOffset(EG_BI)+1 ]; 1669 1670 /* Make format string, getting width right for numbers */ 1671 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n", 1672 l1, l2, l3); 1673 1674 if (0 == Bc_total_b) Bc_total_b = 1; 1675 if (0 == Bi_total_b) Bi_total_b = 1; 1676 B_total_b = Bc_total_b + Bi_total_b; 1677 B_total_mp = Bc_total_mp + Bi_total_mp; 1678 1679 VG_(umsg)("\n"); 1680 VG_(umsg)(fmt, "Branches: ", 1681 B_total_b, Bc_total_b, Bi_total_b); 1682 1683 VG_(umsg)(fmt, "Mispredicts: ", 1684 B_total_mp, Bc_total_mp, Bi_total_mp); 1685 1686 VG_(percentify)(B_total_mp, B_total_b, 1, l1+1, buf1); 1687 VG_(percentify)(Bc_total_mp, Bc_total_b, 1, l2+1, buf2); 1688 VG_(percentify)(Bi_total_mp, Bi_total_b, 1, l3+1, buf3); 1689 1690 VG_(umsg)("Mispred rate: %s (%s + %s )\n", buf1, buf2,buf3); 1691 } 1692 1693 1694 static 1695 void finish(void) 1696 { 1697 Char buf[32+COSTS_LEN], fmt[128]; 1698 Int l1, l2, l3; 1699 FullCost total; 1700 1701 CLG_DEBUG(0, "finish()\n"); 1702 1703 (*CLG_(cachesim).finish)(); 1704 1705 /* pop all remaining items from CallStack for correct sum 1706 */ 1707 CLG_(forall_threads)(unwind_thread); 1708 1709 CLG_(dump_profile)(0, False); 1710 1711 CLG_(finish_command)(); 1712 1713 if (VG_(clo_verbosity) == 0) return; 1714 1715 /* Hash table stats */ 1716 if (VG_(clo_stats)) { 1717 int BB_lookups = 1718 CLG_(stat).full_debug_BBs + 1719 CLG_(stat).fn_name_debug_BBs + 1720 CLG_(stat).file_line_debug_BBs + 1721 CLG_(stat).no_debug_BBs; 1722 1723 VG_(message)(Vg_DebugMsg, "\n"); 1724 VG_(message)(Vg_DebugMsg, "Distinct objects: %d\n", 1725 CLG_(stat).distinct_objs); 1726 VG_(message)(Vg_DebugMsg, "Distinct files: %d\n", 1727 CLG_(stat).distinct_files); 1728 VG_(message)(Vg_DebugMsg, "Distinct fns: %d\n", 1729 CLG_(stat).distinct_fns); 1730 VG_(message)(Vg_DebugMsg, "Distinct contexts:%d\n", 1731 CLG_(stat).distinct_contexts); 1732 VG_(message)(Vg_DebugMsg, "Distinct BBs: %d\n", 1733 CLG_(stat).distinct_bbs); 1734 VG_(message)(Vg_DebugMsg, "Cost entries: %d (Chunks %d)\n", 1735 CLG_(costarray_entries), CLG_(costarray_chunks)); 1736 VG_(message)(Vg_DebugMsg, "Distinct BBCCs: %d\n", 1737 CLG_(stat).distinct_bbccs); 1738 VG_(message)(Vg_DebugMsg, "Distinct JCCs: %d\n", 1739 CLG_(stat).distinct_jccs); 1740 VG_(message)(Vg_DebugMsg, "Distinct skips: %d\n", 1741 CLG_(stat).distinct_skips); 1742 VG_(message)(Vg_DebugMsg, "BB lookups: %d\n", 1743 BB_lookups); 1744 if (BB_lookups>0) { 1745 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)\n", 1746 CLG_(stat).full_debug_BBs * 100 / BB_lookups, 1747 CLG_(stat).full_debug_BBs); 1748 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)\n", 1749 CLG_(stat).file_line_debug_BBs * 100 / BB_lookups, 1750 CLG_(stat).file_line_debug_BBs); 1751 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)\n", 1752 CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups, 1753 CLG_(stat).fn_name_debug_BBs); 1754 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)\n", 1755 CLG_(stat).no_debug_BBs * 100 / BB_lookups, 1756 CLG_(stat).no_debug_BBs); 1757 } 1758 VG_(message)(Vg_DebugMsg, "BBCC Clones: %d\n", 1759 CLG_(stat).bbcc_clones); 1760 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d\n", 1761 CLG_(stat).bb_retranslations); 1762 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d\n", 1763 CLG_(stat).distinct_instrs); 1764 VG_(message)(Vg_DebugMsg, ""); 1765 1766 VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d\n", 1767 CLG_(stat).cxt_lru_misses); 1768 VG_(message)(Vg_DebugMsg, "LRU BBCC Misses: %d\n", 1769 CLG_(stat).bbcc_lru_misses); 1770 VG_(message)(Vg_DebugMsg, "LRU JCC Misses: %d\n", 1771 CLG_(stat).jcc_lru_misses); 1772 VG_(message)(Vg_DebugMsg, "BBs Executed: %llu\n", 1773 CLG_(stat).bb_executions); 1774 VG_(message)(Vg_DebugMsg, "Calls: %llu\n", 1775 CLG_(stat).call_counter); 1776 VG_(message)(Vg_DebugMsg, "CondJMP followed: %llu\n", 1777 CLG_(stat).jcnd_counter); 1778 VG_(message)(Vg_DebugMsg, "Boring JMPs: %llu\n", 1779 CLG_(stat).jump_counter); 1780 VG_(message)(Vg_DebugMsg, "Recursive calls: %llu\n", 1781 CLG_(stat).rec_call_counter); 1782 VG_(message)(Vg_DebugMsg, "Returns: %llu\n", 1783 CLG_(stat).ret_counter); 1784 1785 VG_(message)(Vg_DebugMsg, ""); 1786 } 1787 1788 CLG_(sprint_eventmapping)(buf, CLG_(dumpmap)); 1789 VG_(message)(Vg_UserMsg, "Events : %s\n", buf); 1790 CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), CLG_(total_cost)); 1791 VG_(message)(Vg_UserMsg, "Collected : %s\n", buf); 1792 VG_(message)(Vg_UserMsg, "\n"); 1793 1794 /* determine value widths for statistics */ 1795 total = CLG_(total_cost); 1796 l1 = ULong_width( total[fullOffset(EG_IR)] ); 1797 l2 = l3 = 0; 1798 if (CLG_(clo).simulate_cache) { 1799 l2 = ULong_width( total[fullOffset(EG_DR)] ); 1800 l3 = ULong_width( total[fullOffset(EG_DW)] ); 1801 } 1802 if (CLG_(clo).simulate_branch) { 1803 int l2b = ULong_width( total[fullOffset(EG_BC)] ); 1804 int l3b = ULong_width( total[fullOffset(EG_BI)] ); 1805 if (l2b > l2) l2 = l2b; 1806 if (l3b > l3) l3 = l3b; 1807 } 1808 1809 /* Make format string, getting width right for numbers */ 1810 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1); 1811 1812 /* Always print this */ 1813 VG_(umsg)(fmt, "I refs: ", total[fullOffset(EG_IR)] ); 1814 1815 if (CLG_(clo).simulate_cache) 1816 (*CLG_(cachesim).printstat)(l1, l2, l3); 1817 1818 if (CLG_(clo).simulate_branch) 1819 branchsim_printstat(l1, l2, l3); 1820 1821 } 1822 1823 1824 void CLG_(fini)(Int exitcode) 1825 { 1826 finish(); 1827 } 1828 1829 1830 /*--------------------------------------------------------------------*/ 1831 /*--- Setup ---*/ 1832 /*--------------------------------------------------------------------*/ 1833 1834 static void clg_start_client_code_callback ( ThreadId tid, ULong blocks_done ) 1835 { 1836 static ULong last_blocks_done = 0; 1837 1838 if (0) 1839 VG_(printf)("%d R %llu\n", (Int)tid, blocks_done); 1840 1841 /* throttle calls to CLG_(run_thread) by number of BBs executed */ 1842 if (blocks_done - last_blocks_done < 5000) return; 1843 last_blocks_done = blocks_done; 1844 1845 CLG_(run_thread)( tid ); 1846 } 1847 1848 static 1849 void CLG_(post_clo_init)(void) 1850 { 1851 VG_(clo_vex_control).iropt_unroll_thresh = 0; 1852 VG_(clo_vex_control).guest_chase_thresh = 0; 1853 1854 CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No"); 1855 CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo).separate_callers); 1856 CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo).separate_recursions); 1857 1858 if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) { 1859 VG_(message)(Vg_UserMsg, "Using source line as position.\n"); 1860 CLG_(clo).dump_line = True; 1861 } 1862 1863 CLG_(init_dumps)(); 1864 CLG_(init_command)(); 1865 1866 (*CLG_(cachesim).post_clo_init)(); 1867 1868 CLG_(init_eventsets)(); 1869 CLG_(init_statistics)(& CLG_(stat)); 1870 CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) ); 1871 1872 /* initialize hash tables */ 1873 CLG_(init_obj_table)(); 1874 CLG_(init_cxt_table)(); 1875 CLG_(init_bb_hash)(); 1876 1877 CLG_(init_threads)(); 1878 CLG_(run_thread)(1); 1879 1880 CLG_(instrument_state) = CLG_(clo).instrument_atstart; 1881 1882 if (VG_(clo_verbosity > 0)) { 1883 VG_(message)(Vg_UserMsg, 1884 "For interactive control, run 'callgrind_control -h'.\n"); 1885 } 1886 } 1887 1888 static 1889 void CLG_(pre_clo_init)(void) 1890 { 1891 VG_(details_name) ("Callgrind"); 1892 VG_(details_version) (NULL); 1893 VG_(details_description) ("a call-graph generating cache profiler"); 1894 VG_(details_copyright_author)("Copyright (C) 2002-2011, and GNU GPL'd, " 1895 "by Josef Weidendorfer et al."); 1896 VG_(details_bug_reports_to) (VG_BUGS_TO); 1897 VG_(details_avg_translation_sizeB) ( 500 ); 1898 1899 VG_(basic_tool_funcs) (CLG_(post_clo_init), 1900 CLG_(instrument), 1901 CLG_(fini)); 1902 1903 VG_(needs_superblock_discards)(clg_discard_superblock_info); 1904 1905 1906 VG_(needs_command_line_options)(CLG_(process_cmd_line_option), 1907 CLG_(print_usage), 1908 CLG_(print_debug_usage)); 1909 1910 VG_(needs_client_requests)(CLG_(handle_client_request)); 1911 VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime), 1912 CLG_(post_syscalltime)); 1913 1914 VG_(track_start_client_code) ( & clg_start_client_code_callback ); 1915 VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) ); 1916 VG_(track_post_deliver_signal)( & CLG_(post_signal) ); 1917 1918 CLG_(set_clo_defaults)(); 1919 } 1920 1921 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init)) 1922 1923 /*--------------------------------------------------------------------*/ 1924 /*--- end main.c ---*/ 1925 /*--------------------------------------------------------------------*/ 1926