1 2 /*--------------------------------------------------------------------*/ 3 /*--- Callgrind ---*/ 4 /*--- main.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Callgrind, a Valgrind tool for call graph 9 profiling programs. 10 11 Copyright (C) 2002-2013, Josef Weidendorfer (Josef.Weidendorfer (at) gmx.de) 12 13 This tool is derived from and contains code from Cachegrind 14 Copyright (C) 2002-2013 Nicholas Nethercote (njn (at) valgrind.org) 15 16 This program is free software; you can redistribute it and/or 17 modify it under the terms of the GNU General Public License as 18 published by the Free Software Foundation; either version 2 of the 19 License, or (at your option) any later version. 20 21 This program is distributed in the hope that it will be useful, but 22 WITHOUT ANY WARRANTY; without even the implied warranty of 23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 24 General Public License for more details. 25 26 You should have received a copy of the GNU General Public License 27 along with this program; if not, write to the Free Software 28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 29 02111-1307, USA. 30 31 The GNU General Public License is contained in the file COPYING. 32 */ 33 34 #include "config.h" 35 #include "callgrind.h" 36 #include "global.h" 37 38 #include "pub_tool_threadstate.h" 39 #include "pub_tool_gdbserver.h" 40 41 #include "cg_branchpred.c" 42 43 /*------------------------------------------------------------*/ 44 /*--- Global variables ---*/ 45 /*------------------------------------------------------------*/ 46 47 /* for all threads */ 48 CommandLineOptions CLG_(clo); 49 Statistics CLG_(stat); 50 Bool CLG_(instrument_state) = True; /* Instrumentation on ? */ 51 52 /* thread and signal handler specific */ 53 exec_state CLG_(current_state); 54 55 /* min of L1 and LL cache line sizes. This only gets set to a 56 non-zero value if we are doing cache simulation. */ 57 Int CLG_(min_line_size) = 0; 58 59 60 /*------------------------------------------------------------*/ 61 /*--- Statistics ---*/ 62 /*------------------------------------------------------------*/ 63 64 static void CLG_(init_statistics)(Statistics* s) 65 { 66 s->call_counter = 0; 67 s->jcnd_counter = 0; 68 s->jump_counter = 0; 69 s->rec_call_counter = 0; 70 s->ret_counter = 0; 71 s->bb_executions = 0; 72 73 s->context_counter = 0; 74 s->bb_retranslations = 0; 75 76 s->distinct_objs = 0; 77 s->distinct_files = 0; 78 s->distinct_fns = 0; 79 s->distinct_contexts = 0; 80 s->distinct_bbs = 0; 81 s->distinct_bbccs = 0; 82 s->distinct_instrs = 0; 83 s->distinct_skips = 0; 84 85 s->bb_hash_resizes = 0; 86 s->bbcc_hash_resizes = 0; 87 s->jcc_hash_resizes = 0; 88 s->cxt_hash_resizes = 0; 89 s->fn_array_resizes = 0; 90 s->call_stack_resizes = 0; 91 s->fn_stack_resizes = 0; 92 93 s->full_debug_BBs = 0; 94 s->file_line_debug_BBs = 0; 95 s->fn_name_debug_BBs = 0; 96 s->no_debug_BBs = 0; 97 s->bbcc_lru_misses = 0; 98 s->jcc_lru_misses = 0; 99 s->cxt_lru_misses = 0; 100 s->bbcc_clones = 0; 101 } 102 103 104 /*------------------------------------------------------------*/ 105 /*--- Simple callbacks (not cache similator) ---*/ 106 /*------------------------------------------------------------*/ 107 108 VG_REGPARM(1) 109 static void log_global_event(InstrInfo* ii) 110 { 111 ULong* cost_Bus; 112 113 CLG_DEBUG(6, "log_global_event: Ir %#lx/%u\n", 114 CLG_(bb_base) + ii->instr_offset, ii->instr_size); 115 116 if (!CLG_(current_state).collect) return; 117 118 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BUS))>0 ); 119 120 CLG_(current_state).cost[ fullOffset(EG_BUS) ]++; 121 122 if (CLG_(current_state).nonskipped) 123 cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS); 124 else 125 cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS]; 126 cost_Bus[0]++; 127 } 128 129 130 /* For branches, we consult two different predictors, one which 131 predicts taken/untaken for conditional branches, and the other 132 which predicts the branch target address for indirect branches 133 (jump-to-register style ones). */ 134 135 static VG_REGPARM(2) 136 void log_cond_branch(InstrInfo* ii, Word taken) 137 { 138 Bool miss; 139 Int fullOffset_Bc; 140 ULong* cost_Bc; 141 142 CLG_DEBUG(6, "log_cond_branch: Ir %#lx, taken %lu\n", 143 CLG_(bb_base) + ii->instr_offset, taken); 144 145 miss = 1 & do_cond_branch_predict(CLG_(bb_base) + ii->instr_offset, taken); 146 147 if (!CLG_(current_state).collect) return; 148 149 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BC))>0 ); 150 151 if (CLG_(current_state).nonskipped) 152 cost_Bc = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BC); 153 else 154 cost_Bc = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BC]; 155 156 fullOffset_Bc = fullOffset(EG_BC); 157 CLG_(current_state).cost[ fullOffset_Bc ]++; 158 cost_Bc[0]++; 159 if (miss) { 160 CLG_(current_state).cost[ fullOffset_Bc+1 ]++; 161 cost_Bc[1]++; 162 } 163 } 164 165 static VG_REGPARM(2) 166 void log_ind_branch(InstrInfo* ii, UWord actual_dst) 167 { 168 Bool miss; 169 Int fullOffset_Bi; 170 ULong* cost_Bi; 171 172 CLG_DEBUG(6, "log_ind_branch: Ir %#lx, dst %#lx\n", 173 CLG_(bb_base) + ii->instr_offset, actual_dst); 174 175 miss = 1 & do_ind_branch_predict(CLG_(bb_base) + ii->instr_offset, actual_dst); 176 177 if (!CLG_(current_state).collect) return; 178 179 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BI))>0 ); 180 181 if (CLG_(current_state).nonskipped) 182 cost_Bi = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BI); 183 else 184 cost_Bi = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BI]; 185 186 fullOffset_Bi = fullOffset(EG_BI); 187 CLG_(current_state).cost[ fullOffset_Bi ]++; 188 cost_Bi[0]++; 189 if (miss) { 190 CLG_(current_state).cost[ fullOffset_Bi+1 ]++; 191 cost_Bi[1]++; 192 } 193 } 194 195 /*------------------------------------------------------------*/ 196 /*--- Instrumentation structures and event queue handling ---*/ 197 /*------------------------------------------------------------*/ 198 199 /* Maintain an ordered list of memory events which are outstanding, in 200 the sense that no IR has yet been generated to do the relevant 201 helper calls. The BB is scanned top to bottom and memory events 202 are added to the end of the list, merging with the most recent 203 notified event where possible (Dw immediately following Dr and 204 having the same size and EA can be merged). 205 206 This merging is done so that for architectures which have 207 load-op-store instructions (x86, amd64), the insn is treated as if 208 it makes just one memory reference (a modify), rather than two (a 209 read followed by a write at the same address). 210 211 At various points the list will need to be flushed, that is, IR 212 generated from it. That must happen before any possible exit from 213 the block (the end, or an IRStmt_Exit). Flushing also takes place 214 when there is no space to add a new event. 215 216 If we require the simulation statistics to be up to date with 217 respect to possible memory exceptions, then the list would have to 218 be flushed before each memory reference. That would however lose 219 performance by inhibiting event-merging during flushing. 220 221 Flushing the list consists of walking it start to end and emitting 222 instrumentation IR for each event, in the order in which they 223 appear. It may be possible to emit a single call for two adjacent 224 events in order to reduce the number of helper function calls made. 225 For example, it could well be profitable to handle two adjacent Ir 226 events with a single helper call. */ 227 228 typedef 229 IRExpr 230 IRAtom; 231 232 typedef 233 enum { 234 Ev_Ir, // Instruction read 235 Ev_Dr, // Data read 236 Ev_Dw, // Data write 237 Ev_Dm, // Data modify (read then write) 238 Ev_Bc, // branch conditional 239 Ev_Bi, // branch indirect (to unknown destination) 240 Ev_G // Global bus event 241 } 242 EventTag; 243 244 typedef 245 struct { 246 EventTag tag; 247 InstrInfo* inode; 248 union { 249 struct { 250 } Ir; 251 struct { 252 IRAtom* ea; 253 Int szB; 254 } Dr; 255 struct { 256 IRAtom* ea; 257 Int szB; 258 } Dw; 259 struct { 260 IRAtom* ea; 261 Int szB; 262 } Dm; 263 struct { 264 IRAtom* taken; /* :: Ity_I1 */ 265 } Bc; 266 struct { 267 IRAtom* dst; 268 } Bi; 269 struct { 270 } G; 271 } Ev; 272 } 273 Event; 274 275 static void init_Event ( Event* ev ) { 276 VG_(memset)(ev, 0, sizeof(Event)); 277 } 278 279 static IRAtom* get_Event_dea ( Event* ev ) { 280 switch (ev->tag) { 281 case Ev_Dr: return ev->Ev.Dr.ea; 282 case Ev_Dw: return ev->Ev.Dw.ea; 283 case Ev_Dm: return ev->Ev.Dm.ea; 284 default: tl_assert(0); 285 } 286 } 287 288 static Int get_Event_dszB ( Event* ev ) { 289 switch (ev->tag) { 290 case Ev_Dr: return ev->Ev.Dr.szB; 291 case Ev_Dw: return ev->Ev.Dw.szB; 292 case Ev_Dm: return ev->Ev.Dm.szB; 293 default: tl_assert(0); 294 } 295 } 296 297 298 /* Up to this many unnotified events are allowed. Number is 299 arbitrary. Larger numbers allow more event merging to occur, but 300 potentially induce more spilling due to extending live ranges of 301 address temporaries. */ 302 #define N_EVENTS 16 303 304 305 /* A struct which holds all the running state during instrumentation. 306 Mostly to avoid passing loads of parameters everywhere. */ 307 typedef struct { 308 /* The current outstanding-memory-event list. */ 309 Event events[N_EVENTS]; 310 Int events_used; 311 312 /* The array of InstrInfo's is part of BB struct. */ 313 BB* bb; 314 315 /* BB seen before (ie. re-instrumentation) */ 316 Bool seen_before; 317 318 /* Number InstrInfo bins 'used' so far. */ 319 UInt ii_index; 320 321 // current offset of guest instructions from BB start 322 UInt instr_offset; 323 324 /* The output SB being constructed. */ 325 IRSB* sbOut; 326 } ClgState; 327 328 329 static void showEvent ( Event* ev ) 330 { 331 switch (ev->tag) { 332 case Ev_Ir: 333 VG_(printf)("Ir (InstrInfo %p) at +%d\n", 334 ev->inode, ev->inode->instr_offset); 335 break; 336 case Ev_Dr: 337 VG_(printf)("Dr (InstrInfo %p) at +%d %d EA=", 338 ev->inode, ev->inode->instr_offset, ev->Ev.Dr.szB); 339 ppIRExpr(ev->Ev.Dr.ea); 340 VG_(printf)("\n"); 341 break; 342 case Ev_Dw: 343 VG_(printf)("Dw (InstrInfo %p) at +%d %d EA=", 344 ev->inode, ev->inode->instr_offset, ev->Ev.Dw.szB); 345 ppIRExpr(ev->Ev.Dw.ea); 346 VG_(printf)("\n"); 347 break; 348 case Ev_Dm: 349 VG_(printf)("Dm (InstrInfo %p) at +%d %d EA=", 350 ev->inode, ev->inode->instr_offset, ev->Ev.Dm.szB); 351 ppIRExpr(ev->Ev.Dm.ea); 352 VG_(printf)("\n"); 353 break; 354 case Ev_Bc: 355 VG_(printf)("Bc %p GA=", ev->inode); 356 ppIRExpr(ev->Ev.Bc.taken); 357 VG_(printf)("\n"); 358 break; 359 case Ev_Bi: 360 VG_(printf)("Bi %p DST=", ev->inode); 361 ppIRExpr(ev->Ev.Bi.dst); 362 VG_(printf)("\n"); 363 break; 364 case Ev_G: 365 VG_(printf)("G %p\n", ev->inode); 366 break; 367 default: 368 tl_assert(0); 369 break; 370 } 371 } 372 373 /* Generate code for all outstanding memory events, and mark the queue 374 empty. Code is generated into cgs->sbOut, and this activity 375 'consumes' slots in cgs->bb. */ 376 377 static void flushEvents ( ClgState* clgs ) 378 { 379 Int i, regparms, inew; 380 const HChar* helperName; 381 void* helperAddr; 382 IRExpr** argv; 383 IRExpr* i_node_expr; 384 IRDirty* di; 385 Event* ev; 386 Event* ev2; 387 Event* ev3; 388 389 if (!clgs->seen_before) { 390 // extend event sets as needed 391 // available sets: D0 Dr 392 for(i=0; i<clgs->events_used; i++) { 393 ev = &clgs->events[i]; 394 switch(ev->tag) { 395 case Ev_Ir: 396 // Ir event always is first for a guest instruction 397 CLG_ASSERT(ev->inode->eventset == 0); 398 ev->inode->eventset = CLG_(sets).base; 399 break; 400 case Ev_Dr: 401 // extend event set by Dr counters 402 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 403 EG_DR); 404 break; 405 case Ev_Dw: 406 case Ev_Dm: 407 // extend event set by Dw counters 408 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 409 EG_DW); 410 break; 411 case Ev_Bc: 412 // extend event set by Bc counters 413 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 414 EG_BC); 415 break; 416 case Ev_Bi: 417 // extend event set by Bi counters 418 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 419 EG_BI); 420 break; 421 case Ev_G: 422 // extend event set by Bus counter 423 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 424 EG_BUS); 425 break; 426 default: 427 tl_assert(0); 428 } 429 } 430 } 431 432 for(i = 0; i < clgs->events_used; i = inew) { 433 434 helperName = NULL; 435 helperAddr = NULL; 436 argv = NULL; 437 regparms = 0; 438 439 /* generate IR to notify event i and possibly the ones 440 immediately following it. */ 441 tl_assert(i >= 0 && i < clgs->events_used); 442 443 ev = &clgs->events[i]; 444 ev2 = ( i < clgs->events_used-1 ? &clgs->events[i+1] : NULL ); 445 ev3 = ( i < clgs->events_used-2 ? &clgs->events[i+2] : NULL ); 446 447 CLG_DEBUGIF(5) { 448 VG_(printf)(" flush "); 449 showEvent( ev ); 450 } 451 452 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode ); 453 454 /* Decide on helper fn to call and args to pass it, and advance 455 i appropriately. 456 Dm events have same effect as Dw events */ 457 switch (ev->tag) { 458 case Ev_Ir: 459 /* Merge an Ir with a following Dr. */ 460 if (ev2 && ev2->tag == Ev_Dr) { 461 /* Why is this true? It's because we're merging an Ir 462 with a following Dr. The Ir derives from the 463 instruction's IMark and the Dr from data 464 references which follow it. In short it holds 465 because each insn starts with an IMark, hence an 466 Ev_Ir, and so these Dr must pertain to the 467 immediately preceding Ir. Same applies to analogous 468 assertions in the subsequent cases. */ 469 tl_assert(ev2->inode == ev->inode); 470 helperName = CLG_(cachesim).log_1I1Dr_name; 471 helperAddr = CLG_(cachesim).log_1I1Dr; 472 argv = mkIRExprVec_3( i_node_expr, 473 get_Event_dea(ev2), 474 mkIRExpr_HWord( get_Event_dszB(ev2) ) ); 475 regparms = 3; 476 inew = i+2; 477 } 478 /* Merge an Ir with a following Dw/Dm. */ 479 else 480 if (ev2 && (ev2->tag == Ev_Dw || ev2->tag == Ev_Dm)) { 481 tl_assert(ev2->inode == ev->inode); 482 helperName = CLG_(cachesim).log_1I1Dw_name; 483 helperAddr = CLG_(cachesim).log_1I1Dw; 484 argv = mkIRExprVec_3( i_node_expr, 485 get_Event_dea(ev2), 486 mkIRExpr_HWord( get_Event_dszB(ev2) ) ); 487 regparms = 3; 488 inew = i+2; 489 } 490 /* Merge an Ir with two following Irs. */ 491 else 492 if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) { 493 helperName = CLG_(cachesim).log_3I0D_name; 494 helperAddr = CLG_(cachesim).log_3I0D; 495 argv = mkIRExprVec_3( i_node_expr, 496 mkIRExpr_HWord( (HWord)ev2->inode ), 497 mkIRExpr_HWord( (HWord)ev3->inode ) ); 498 regparms = 3; 499 inew = i+3; 500 } 501 /* Merge an Ir with one following Ir. */ 502 else 503 if (ev2 && ev2->tag == Ev_Ir) { 504 helperName = CLG_(cachesim).log_2I0D_name; 505 helperAddr = CLG_(cachesim).log_2I0D; 506 argv = mkIRExprVec_2( i_node_expr, 507 mkIRExpr_HWord( (HWord)ev2->inode ) ); 508 regparms = 2; 509 inew = i+2; 510 } 511 /* No merging possible; emit as-is. */ 512 else { 513 helperName = CLG_(cachesim).log_1I0D_name; 514 helperAddr = CLG_(cachesim).log_1I0D; 515 argv = mkIRExprVec_1( i_node_expr ); 516 regparms = 1; 517 inew = i+1; 518 } 519 break; 520 case Ev_Dr: 521 /* Data read or modify */ 522 helperName = CLG_(cachesim).log_0I1Dr_name; 523 helperAddr = CLG_(cachesim).log_0I1Dr; 524 argv = mkIRExprVec_3( i_node_expr, 525 get_Event_dea(ev), 526 mkIRExpr_HWord( get_Event_dszB(ev) ) ); 527 regparms = 3; 528 inew = i+1; 529 break; 530 case Ev_Dw: 531 case Ev_Dm: 532 /* Data write */ 533 helperName = CLG_(cachesim).log_0I1Dw_name; 534 helperAddr = CLG_(cachesim).log_0I1Dw; 535 argv = mkIRExprVec_3( i_node_expr, 536 get_Event_dea(ev), 537 mkIRExpr_HWord( get_Event_dszB(ev) ) ); 538 regparms = 3; 539 inew = i+1; 540 break; 541 case Ev_Bc: 542 /* Conditional branch */ 543 helperName = "log_cond_branch"; 544 helperAddr = &log_cond_branch; 545 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken ); 546 regparms = 2; 547 inew = i+1; 548 break; 549 case Ev_Bi: 550 /* Branch to an unknown destination */ 551 helperName = "log_ind_branch"; 552 helperAddr = &log_ind_branch; 553 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst ); 554 regparms = 2; 555 inew = i+1; 556 break; 557 case Ev_G: 558 /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */ 559 helperName = "log_global_event"; 560 helperAddr = &log_global_event; 561 argv = mkIRExprVec_1( i_node_expr ); 562 regparms = 1; 563 inew = i+1; 564 break; 565 default: 566 tl_assert(0); 567 } 568 569 CLG_DEBUGIF(5) { 570 if (inew > i+1) { 571 VG_(printf)(" merge "); 572 showEvent( ev2 ); 573 } 574 if (inew > i+2) { 575 VG_(printf)(" merge "); 576 showEvent( ev3 ); 577 } 578 if (helperAddr) 579 VG_(printf)(" call %s (%p)\n", 580 helperName, helperAddr); 581 } 582 583 /* helper could be unset depending on the simulator used */ 584 if (helperAddr == 0) continue; 585 586 /* Add the helper. */ 587 tl_assert(helperName); 588 tl_assert(helperAddr); 589 tl_assert(argv); 590 di = unsafeIRDirty_0_N( regparms, 591 helperName, VG_(fnptr_to_fnentry)( helperAddr ), 592 argv ); 593 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) ); 594 } 595 596 clgs->events_used = 0; 597 } 598 599 static void addEvent_Ir ( ClgState* clgs, InstrInfo* inode ) 600 { 601 Event* evt; 602 tl_assert(clgs->seen_before || (inode->eventset == 0)); 603 if (!CLG_(clo).simulate_cache) return; 604 605 if (clgs->events_used == N_EVENTS) 606 flushEvents(clgs); 607 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 608 evt = &clgs->events[clgs->events_used]; 609 init_Event(evt); 610 evt->tag = Ev_Ir; 611 evt->inode = inode; 612 clgs->events_used++; 613 } 614 615 static 616 void addEvent_Dr ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea ) 617 { 618 Event* evt; 619 tl_assert(isIRAtom(ea)); 620 tl_assert(datasize >= 1); 621 if (!CLG_(clo).simulate_cache) return; 622 tl_assert(datasize <= CLG_(min_line_size)); 623 624 if (clgs->events_used == N_EVENTS) 625 flushEvents(clgs); 626 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 627 evt = &clgs->events[clgs->events_used]; 628 init_Event(evt); 629 evt->tag = Ev_Dr; 630 evt->inode = inode; 631 evt->Ev.Dr.szB = datasize; 632 evt->Ev.Dr.ea = ea; 633 clgs->events_used++; 634 } 635 636 static 637 void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea ) 638 { 639 Event* lastEvt; 640 Event* evt; 641 tl_assert(isIRAtom(ea)); 642 tl_assert(datasize >= 1); 643 if (!CLG_(clo).simulate_cache) return; 644 tl_assert(datasize <= CLG_(min_line_size)); 645 646 /* Is it possible to merge this write with the preceding read? */ 647 lastEvt = &clgs->events[clgs->events_used-1]; 648 if (clgs->events_used > 0 649 && lastEvt->tag == Ev_Dr 650 && lastEvt->Ev.Dr.szB == datasize 651 && lastEvt->inode == inode 652 && eqIRAtom(lastEvt->Ev.Dr.ea, ea)) 653 { 654 lastEvt->tag = Ev_Dm; 655 return; 656 } 657 658 /* No. Add as normal. */ 659 if (clgs->events_used == N_EVENTS) 660 flushEvents(clgs); 661 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 662 evt = &clgs->events[clgs->events_used]; 663 init_Event(evt); 664 evt->tag = Ev_Dw; 665 evt->inode = inode; 666 evt->Ev.Dw.szB = datasize; 667 evt->Ev.Dw.ea = ea; 668 clgs->events_used++; 669 } 670 671 static 672 void addEvent_D_guarded ( ClgState* clgs, InstrInfo* inode, 673 Int datasize, IRAtom* ea, IRAtom* guard, 674 Bool isWrite ) 675 { 676 tl_assert(isIRAtom(ea)); 677 tl_assert(guard); 678 tl_assert(isIRAtom(guard)); 679 tl_assert(datasize >= 1); 680 if (!CLG_(clo).simulate_cache) return; 681 tl_assert(datasize <= CLG_(min_line_size)); 682 683 /* Adding guarded memory actions and merging them with the existing 684 queue is too complex. Simply flush the queue and add this 685 action immediately. Since guarded loads and stores are pretty 686 rare, this is not thought likely to cause any noticeable 687 performance loss as a result of the loss of event-merging 688 opportunities. */ 689 tl_assert(clgs->events_used >= 0); 690 flushEvents(clgs); 691 tl_assert(clgs->events_used == 0); 692 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */ 693 IRExpr* i_node_expr; 694 const HChar* helperName; 695 void* helperAddr; 696 IRExpr** argv; 697 Int regparms; 698 IRDirty* di; 699 i_node_expr = mkIRExpr_HWord( (HWord)inode ); 700 helperName = isWrite ? CLG_(cachesim).log_0I1Dw_name 701 : CLG_(cachesim).log_0I1Dr_name; 702 helperAddr = isWrite ? CLG_(cachesim).log_0I1Dw 703 : CLG_(cachesim).log_0I1Dr; 704 argv = mkIRExprVec_3( i_node_expr, 705 ea, mkIRExpr_HWord( datasize ) ); 706 regparms = 3; 707 di = unsafeIRDirty_0_N( 708 regparms, 709 helperName, VG_(fnptr_to_fnentry)( helperAddr ), 710 argv ); 711 di->guard = guard; 712 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) ); 713 } 714 715 static 716 void addEvent_Bc ( ClgState* clgs, InstrInfo* inode, IRAtom* guard ) 717 { 718 Event* evt; 719 tl_assert(isIRAtom(guard)); 720 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, guard) 721 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64)); 722 if (!CLG_(clo).simulate_branch) return; 723 724 if (clgs->events_used == N_EVENTS) 725 flushEvents(clgs); 726 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 727 evt = &clgs->events[clgs->events_used]; 728 init_Event(evt); 729 evt->tag = Ev_Bc; 730 evt->inode = inode; 731 evt->Ev.Bc.taken = guard; 732 clgs->events_used++; 733 } 734 735 static 736 void addEvent_Bi ( ClgState* clgs, InstrInfo* inode, IRAtom* whereTo ) 737 { 738 Event* evt; 739 tl_assert(isIRAtom(whereTo)); 740 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, whereTo) 741 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64)); 742 if (!CLG_(clo).simulate_branch) return; 743 744 if (clgs->events_used == N_EVENTS) 745 flushEvents(clgs); 746 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 747 evt = &clgs->events[clgs->events_used]; 748 init_Event(evt); 749 evt->tag = Ev_Bi; 750 evt->inode = inode; 751 evt->Ev.Bi.dst = whereTo; 752 clgs->events_used++; 753 } 754 755 static 756 void addEvent_G ( ClgState* clgs, InstrInfo* inode ) 757 { 758 Event* evt; 759 if (!CLG_(clo).collect_bus) return; 760 761 if (clgs->events_used == N_EVENTS) 762 flushEvents(clgs); 763 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 764 evt = &clgs->events[clgs->events_used]; 765 init_Event(evt); 766 evt->tag = Ev_G; 767 evt->inode = inode; 768 clgs->events_used++; 769 } 770 771 /* Initialise or check (if already seen before) an InstrInfo for next insn. 772 We only can set instr_offset/instr_size here. The required event set and 773 resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest 774 instructions. The event set is extended as required on flush of the event 775 queue (when Dm events were determined), cost offsets are determined at 776 end of BB instrumentation. */ 777 static 778 InstrInfo* next_InstrInfo ( ClgState* clgs, UInt instr_size ) 779 { 780 InstrInfo* ii; 781 tl_assert(clgs->ii_index >= 0); 782 tl_assert(clgs->ii_index < clgs->bb->instr_count); 783 ii = &clgs->bb->instr[ clgs->ii_index ]; 784 785 if (clgs->seen_before) { 786 CLG_ASSERT(ii->instr_offset == clgs->instr_offset); 787 CLG_ASSERT(ii->instr_size == instr_size); 788 } 789 else { 790 ii->instr_offset = clgs->instr_offset; 791 ii->instr_size = instr_size; 792 ii->cost_offset = 0; 793 ii->eventset = 0; 794 } 795 796 clgs->ii_index++; 797 clgs->instr_offset += instr_size; 798 CLG_(stat).distinct_instrs++; 799 800 return ii; 801 } 802 803 // return total number of cost values needed for this BB 804 static 805 UInt update_cost_offsets( ClgState* clgs ) 806 { 807 Int i; 808 InstrInfo* ii; 809 UInt cost_offset = 0; 810 811 CLG_ASSERT(clgs->bb->instr_count == clgs->ii_index); 812 for(i=0; i<clgs->ii_index; i++) { 813 ii = &clgs->bb->instr[i]; 814 if (clgs->seen_before) { 815 CLG_ASSERT(ii->cost_offset == cost_offset); 816 } else 817 ii->cost_offset = cost_offset; 818 cost_offset += ii->eventset ? ii->eventset->size : 0; 819 } 820 821 return cost_offset; 822 } 823 824 /*------------------------------------------------------------*/ 825 /*--- Instrumentation ---*/ 826 /*------------------------------------------------------------*/ 827 828 #if defined(VG_BIGENDIAN) 829 # define CLGEndness Iend_BE 830 #elif defined(VG_LITTLEENDIAN) 831 # define CLGEndness Iend_LE 832 #else 833 # error "Unknown endianness" 834 #endif 835 836 static 837 Addr IRConst2Addr(IRConst* con) 838 { 839 Addr addr; 840 841 if (sizeof(Addr) == 4) { 842 CLG_ASSERT( con->tag == Ico_U32 ); 843 addr = con->Ico.U32; 844 } 845 else if (sizeof(Addr) == 8) { 846 CLG_ASSERT( con->tag == Ico_U64 ); 847 addr = con->Ico.U64; 848 } 849 else 850 VG_(tool_panic)("Callgrind: invalid Addr type"); 851 852 return addr; 853 } 854 855 /* First pass over a BB to instrument, counting instructions and jumps 856 * This is needed for the size of the BB struct to allocate 857 * 858 * Called from CLG_(get_bb) 859 */ 860 void CLG_(collectBlockInfo)(IRSB* sbIn, 861 /*INOUT*/ UInt* instrs, 862 /*INOUT*/ UInt* cjmps, 863 /*INOUT*/ Bool* cjmp_inverted) 864 { 865 Int i; 866 IRStmt* st; 867 Addr instrAddr =0, jumpDst; 868 UInt instrLen = 0; 869 Bool toNextInstr = False; 870 871 // Ist_Exit has to be ignored in preamble code, before first IMark: 872 // preamble code is added by VEX for self modifying code, and has 873 // nothing to do with client code 874 Bool inPreamble = True; 875 876 if (!sbIn) return; 877 878 for (i = 0; i < sbIn->stmts_used; i++) { 879 st = sbIn->stmts[i]; 880 if (Ist_IMark == st->tag) { 881 inPreamble = False; 882 883 instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr); 884 instrLen = st->Ist.IMark.len; 885 886 (*instrs)++; 887 toNextInstr = False; 888 } 889 if (inPreamble) continue; 890 if (Ist_Exit == st->tag) { 891 jumpDst = IRConst2Addr(st->Ist.Exit.dst); 892 toNextInstr = (jumpDst == instrAddr + instrLen); 893 894 (*cjmps)++; 895 } 896 } 897 898 /* if the last instructions of BB conditionally jumps to next instruction 899 * (= first instruction of next BB in memory), this is a inverted by VEX. 900 */ 901 *cjmp_inverted = toNextInstr; 902 } 903 904 static 905 void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy) 906 { 907 addStmtToIRSB( bbOut, 908 IRStmt_Store(CLGEndness, 909 IRExpr_Const(hWordTy == Ity_I32 ? 910 IRConst_U32( addr ) : 911 IRConst_U64( addr )), 912 IRExpr_Const(IRConst_U32(val)) )); 913 } 914 915 916 /* add helper call to setup_bbcc, with pointer to BB struct as argument 917 * 918 * precondition for setup_bbcc: 919 * - jmps_passed has number of cond.jumps passed in last executed BB 920 * - current_bbcc has a pointer to the BBCC of the last executed BB 921 * Thus, if bbcc_jmpkind is != -1 (JmpNone), 922 * current_bbcc->bb->jmp_addr 923 * gives the address of the jump source. 924 * 925 * the setup does 2 things: 926 * - trace call: 927 * * Unwind own call stack, i.e sync our ESP with real ESP 928 * This is for ESP manipulation (longjmps, C++ exec handling) and RET 929 * * For CALLs or JMPs crossing objects, record call arg + 930 * push are on own call stack 931 * 932 * - prepare for cache log functions: 933 * set current_bbcc to BBCC that gets the costs for this BB execution 934 * attached 935 */ 936 static 937 void addBBSetupCall(ClgState* clgs) 938 { 939 IRDirty* di; 940 IRExpr *arg1, **argv; 941 942 arg1 = mkIRExpr_HWord( (HWord)clgs->bb ); 943 argv = mkIRExprVec_1(arg1); 944 di = unsafeIRDirty_0_N( 1, "setup_bbcc", 945 VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ), 946 argv); 947 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) ); 948 } 949 950 951 static 952 IRSB* CLG_(instrument)( VgCallbackClosure* closure, 953 IRSB* sbIn, 954 VexGuestLayout* layout, 955 VexGuestExtents* vge, 956 VexArchInfo* archinfo_host, 957 IRType gWordTy, IRType hWordTy ) 958 { 959 Int i; 960 IRStmt* st; 961 Addr origAddr; 962 InstrInfo* curr_inode = NULL; 963 ClgState clgs; 964 UInt cJumps = 0; 965 IRTypeEnv* tyenv = sbIn->tyenv; 966 967 if (gWordTy != hWordTy) { 968 /* We don't currently support this case. */ 969 VG_(tool_panic)("host/guest word size mismatch"); 970 } 971 972 // No instrumentation if it is switched off 973 if (! CLG_(instrument_state)) { 974 CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n", 975 (Addr)closure->readdr); 976 return sbIn; 977 } 978 979 CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr)closure->readdr); 980 981 /* Set up SB for instrumented IR */ 982 clgs.sbOut = deepCopyIRSBExceptStmts(sbIn); 983 984 // Copy verbatim any IR preamble preceding the first IMark 985 i = 0; 986 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) { 987 addStmtToIRSB( clgs.sbOut, sbIn->stmts[i] ); 988 i++; 989 } 990 991 // Get the first statement, and origAddr from it 992 CLG_ASSERT(sbIn->stmts_used >0); 993 CLG_ASSERT(i < sbIn->stmts_used); 994 st = sbIn->stmts[i]; 995 CLG_ASSERT(Ist_IMark == st->tag); 996 997 origAddr = (Addr)st->Ist.IMark.addr + (Addr)st->Ist.IMark.delta; 998 CLG_ASSERT(origAddr == st->Ist.IMark.addr 999 + st->Ist.IMark.delta); // XXX: check no overflow 1000 1001 /* Get BB struct (creating if necessary). 1002 * JS: The hash table is keyed with orig_addr_noredir -- important! 1003 * JW: Why? If it is because of different chasing of the redirection, 1004 * this is not needed, as chasing is switched off in callgrind 1005 */ 1006 clgs.bb = CLG_(get_bb)(origAddr, sbIn, &(clgs.seen_before)); 1007 1008 addBBSetupCall(&clgs); 1009 1010 // Set up running state 1011 clgs.events_used = 0; 1012 clgs.ii_index = 0; 1013 clgs.instr_offset = 0; 1014 1015 for (/*use current i*/; i < sbIn->stmts_used; i++) { 1016 1017 st = sbIn->stmts[i]; 1018 CLG_ASSERT(isFlatIRStmt(st)); 1019 1020 switch (st->tag) { 1021 case Ist_NoOp: 1022 case Ist_AbiHint: 1023 case Ist_Put: 1024 case Ist_PutI: 1025 case Ist_MBE: 1026 break; 1027 1028 case Ist_IMark: { 1029 Addr64 cia = st->Ist.IMark.addr + st->Ist.IMark.delta; 1030 Int isize = st->Ist.IMark.len; 1031 CLG_ASSERT(clgs.instr_offset == (Addr)cia - origAddr); 1032 // If Vex fails to decode an instruction, the size will be zero. 1033 // Pretend otherwise. 1034 if (isize == 0) isize = VG_MIN_INSTR_SZB; 1035 1036 // Sanity-check size. 1037 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB) 1038 || VG_CLREQ_SZB == isize ); 1039 1040 // Init the inode, record it as the current one. 1041 // Subsequent Dr/Dw/Dm events from the same instruction will 1042 // also use it. 1043 curr_inode = next_InstrInfo (&clgs, isize); 1044 1045 addEvent_Ir( &clgs, curr_inode ); 1046 break; 1047 } 1048 1049 case Ist_WrTmp: { 1050 IRExpr* data = st->Ist.WrTmp.data; 1051 if (data->tag == Iex_Load) { 1052 IRExpr* aexpr = data->Iex.Load.addr; 1053 // Note also, endianness info is ignored. I guess 1054 // that's not interesting. 1055 addEvent_Dr( &clgs, curr_inode, 1056 sizeofIRType(data->Iex.Load.ty), aexpr ); 1057 } 1058 break; 1059 } 1060 1061 case Ist_Store: { 1062 IRExpr* data = st->Ist.Store.data; 1063 IRExpr* aexpr = st->Ist.Store.addr; 1064 addEvent_Dw( &clgs, curr_inode, 1065 sizeofIRType(typeOfIRExpr(sbIn->tyenv, data)), aexpr ); 1066 break; 1067 } 1068 1069 case Ist_StoreG: { 1070 IRStoreG* sg = st->Ist.StoreG.details; 1071 IRExpr* data = sg->data; 1072 IRExpr* addr = sg->addr; 1073 IRType type = typeOfIRExpr(tyenv, data); 1074 tl_assert(type != Ity_INVALID); 1075 addEvent_D_guarded( &clgs, curr_inode, 1076 sizeofIRType(type), addr, sg->guard, 1077 True/*isWrite*/ ); 1078 break; 1079 } 1080 1081 case Ist_LoadG: { 1082 IRLoadG* lg = st->Ist.LoadG.details; 1083 IRType type = Ity_INVALID; /* loaded type */ 1084 IRType typeWide = Ity_INVALID; /* after implicit widening */ 1085 IRExpr* addr = lg->addr; 1086 typeOfIRLoadGOp(lg->cvt, &typeWide, &type); 1087 tl_assert(type != Ity_INVALID); 1088 addEvent_D_guarded( &clgs, curr_inode, 1089 sizeofIRType(type), addr, lg->guard, 1090 False/*!isWrite*/ ); 1091 break; 1092 } 1093 1094 case Ist_Dirty: { 1095 Int dataSize; 1096 IRDirty* d = st->Ist.Dirty.details; 1097 if (d->mFx != Ifx_None) { 1098 /* This dirty helper accesses memory. Collect the details. */ 1099 tl_assert(d->mAddr != NULL); 1100 tl_assert(d->mSize != 0); 1101 dataSize = d->mSize; 1102 // Large (eg. 28B, 108B, 512B on x86) data-sized 1103 // instructions will be done inaccurately, but they're 1104 // very rare and this avoids errors from hitting more 1105 // than two cache lines in the simulation. 1106 if (CLG_(clo).simulate_cache && dataSize > CLG_(min_line_size)) 1107 dataSize = CLG_(min_line_size); 1108 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) 1109 addEvent_Dr( &clgs, curr_inode, dataSize, d->mAddr ); 1110 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) 1111 addEvent_Dw( &clgs, curr_inode, dataSize, d->mAddr ); 1112 } else { 1113 tl_assert(d->mAddr == NULL); 1114 tl_assert(d->mSize == 0); 1115 } 1116 break; 1117 } 1118 1119 case Ist_CAS: { 1120 /* We treat it as a read and a write of the location. I 1121 think that is the same behaviour as it was before IRCAS 1122 was introduced, since prior to that point, the Vex 1123 front ends would translate a lock-prefixed instruction 1124 into a (normal) read followed by a (normal) write. */ 1125 Int dataSize; 1126 IRCAS* cas = st->Ist.CAS.details; 1127 CLG_ASSERT(cas->addr && isIRAtom(cas->addr)); 1128 CLG_ASSERT(cas->dataLo); 1129 dataSize = sizeofIRType(typeOfIRExpr(sbIn->tyenv, cas->dataLo)); 1130 if (cas->dataHi != NULL) 1131 dataSize *= 2; /* since this is a doubleword-cas */ 1132 addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr ); 1133 addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr ); 1134 addEvent_G( &clgs, curr_inode ); 1135 break; 1136 } 1137 1138 case Ist_LLSC: { 1139 IRType dataTy; 1140 if (st->Ist.LLSC.storedata == NULL) { 1141 /* LL */ 1142 dataTy = typeOfIRTemp(sbIn->tyenv, st->Ist.LLSC.result); 1143 addEvent_Dr( &clgs, curr_inode, 1144 sizeofIRType(dataTy), st->Ist.LLSC.addr ); 1145 /* flush events before LL, should help SC to succeed */ 1146 flushEvents( &clgs ); 1147 } else { 1148 /* SC */ 1149 dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata); 1150 addEvent_Dw( &clgs, curr_inode, 1151 sizeofIRType(dataTy), st->Ist.LLSC.addr ); 1152 /* I don't know whether the global-bus-lock cost should 1153 be attributed to the LL or the SC, but it doesn't 1154 really matter since they always have to be used in 1155 pairs anyway. Hence put it (quite arbitrarily) on 1156 the SC. */ 1157 addEvent_G( &clgs, curr_inode ); 1158 } 1159 break; 1160 } 1161 1162 case Ist_Exit: { 1163 Bool guest_exit, inverted; 1164 1165 /* VEX code generation sometimes inverts conditional branches. 1166 * As Callgrind counts (conditional) jumps, it has to correct 1167 * inversions. The heuristic is the following: 1168 * (1) Callgrind switches off SB chasing and unrolling, and 1169 * therefore it assumes that a candidate for inversion only is 1170 * the last conditional branch in an SB. 1171 * (2) inversion is assumed if the branch jumps to the address of 1172 * the next guest instruction in memory. 1173 * This heuristic is precalculated in CLG_(collectBlockInfo)(). 1174 * 1175 * Branching behavior is also used for branch prediction. Note that 1176 * above heuristic is different from what Cachegrind does. 1177 * Cachegrind uses (2) for all branches. 1178 */ 1179 if (cJumps+1 == clgs.bb->cjmp_count) 1180 inverted = clgs.bb->cjmp_inverted; 1181 else 1182 inverted = False; 1183 1184 // call branch predictor only if this is a branch in guest code 1185 guest_exit = (st->Ist.Exit.jk == Ijk_Boring) || 1186 (st->Ist.Exit.jk == Ijk_Call) || 1187 (st->Ist.Exit.jk == Ijk_Ret); 1188 1189 if (guest_exit) { 1190 /* Stuff to widen the guard expression to a host word, so 1191 we can pass it to the branch predictor simulation 1192 functions easily. */ 1193 IRType tyW = hWordTy; 1194 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64; 1195 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64; 1196 IRTemp guard1 = newIRTemp(clgs.sbOut->tyenv, Ity_I1); 1197 IRTemp guardW = newIRTemp(clgs.sbOut->tyenv, tyW); 1198 IRTemp guard = newIRTemp(clgs.sbOut->tyenv, tyW); 1199 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1)) 1200 : IRExpr_Const(IRConst_U64(1)); 1201 1202 /* Widen the guard expression. */ 1203 addStmtToIRSB( clgs.sbOut, 1204 IRStmt_WrTmp( guard1, st->Ist.Exit.guard )); 1205 addStmtToIRSB( clgs.sbOut, 1206 IRStmt_WrTmp( guardW, 1207 IRExpr_Unop(widen, 1208 IRExpr_RdTmp(guard1))) ); 1209 /* If the exit is inverted, invert the sense of the guard. */ 1210 addStmtToIRSB( 1211 clgs.sbOut, 1212 IRStmt_WrTmp( 1213 guard, 1214 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one) 1215 : IRExpr_RdTmp(guardW) 1216 )); 1217 /* And post the event. */ 1218 addEvent_Bc( &clgs, curr_inode, IRExpr_RdTmp(guard) ); 1219 } 1220 1221 /* We may never reach the next statement, so need to flush 1222 all outstanding transactions now. */ 1223 flushEvents( &clgs ); 1224 1225 CLG_ASSERT(clgs.ii_index>0); 1226 if (!clgs.seen_before) { 1227 ClgJumpKind jk; 1228 1229 if (st->Ist.Exit.jk == Ijk_Call) jk = jk_Call; 1230 else if (st->Ist.Exit.jk == Ijk_Ret) jk = jk_Return; 1231 else { 1232 if (IRConst2Addr(st->Ist.Exit.dst) == 1233 origAddr + curr_inode->instr_offset + curr_inode->instr_size) 1234 jk = jk_None; 1235 else 1236 jk = jk_Jump; 1237 } 1238 1239 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1; 1240 clgs.bb->jmp[cJumps].jmpkind = jk; 1241 } 1242 1243 /* Update global variable jmps_passed before the jump 1244 * A correction is needed if VEX inverted the last jump condition 1245 */ 1246 UInt val = inverted ? cJumps+1 : cJumps; 1247 addConstMemStoreStmt( clgs.sbOut, 1248 (UWord) &CLG_(current_state).jmps_passed, 1249 val, hWordTy); 1250 cJumps++; 1251 1252 break; 1253 } 1254 1255 default: 1256 tl_assert(0); 1257 break; 1258 } 1259 1260 /* Copy the original statement */ 1261 addStmtToIRSB( clgs.sbOut, st ); 1262 1263 CLG_DEBUGIF(5) { 1264 VG_(printf)(" pass "); 1265 ppIRStmt(st); 1266 VG_(printf)("\n"); 1267 } 1268 } 1269 1270 /* Deal with branches to unknown destinations. Except ignore ones 1271 which are function returns as we assume the return stack 1272 predictor never mispredicts. */ 1273 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) { 1274 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); } 1275 switch (sbIn->next->tag) { 1276 case Iex_Const: 1277 break; /* boring - branch to known address */ 1278 case Iex_RdTmp: 1279 /* looks like an indirect branch (branch to unknown) */ 1280 addEvent_Bi( &clgs, curr_inode, sbIn->next ); 1281 break; 1282 default: 1283 /* shouldn't happen - if the incoming IR is properly 1284 flattened, should only have tmp and const cases to 1285 consider. */ 1286 tl_assert(0); 1287 } 1288 } 1289 1290 /* At the end of the bb. Flush outstandings. */ 1291 flushEvents( &clgs ); 1292 1293 /* Update global variable jmps_passed at end of SB. 1294 * As CLG_(current_state).jmps_passed is reset to 0 in setup_bbcc, 1295 * this can be omitted if there is no conditional jump in this SB. 1296 * A correction is needed if VEX inverted the last jump condition 1297 */ 1298 if (cJumps>0) { 1299 UInt jmps_passed = cJumps; 1300 if (clgs.bb->cjmp_inverted) jmps_passed--; 1301 addConstMemStoreStmt( clgs.sbOut, 1302 (UWord) &CLG_(current_state).jmps_passed, 1303 jmps_passed, hWordTy); 1304 } 1305 CLG_ASSERT(clgs.bb->cjmp_count == cJumps); 1306 CLG_ASSERT(clgs.bb->instr_count == clgs.ii_index); 1307 1308 /* Info for final exit from BB */ 1309 { 1310 ClgJumpKind jk; 1311 1312 if (sbIn->jumpkind == Ijk_Call) jk = jk_Call; 1313 else if (sbIn->jumpkind == Ijk_Ret) jk = jk_Return; 1314 else { 1315 jk = jk_Jump; 1316 if ((sbIn->next->tag == Iex_Const) && 1317 (IRConst2Addr(sbIn->next->Iex.Const.con) == 1318 origAddr + clgs.instr_offset)) 1319 jk = jk_None; 1320 } 1321 clgs.bb->jmp[cJumps].jmpkind = jk; 1322 /* Instruction index of the call/ret at BB end 1323 * (it is wrong for fall-through, but does not matter) */ 1324 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1; 1325 } 1326 1327 /* swap information of last exit with final exit if inverted */ 1328 if (clgs.bb->cjmp_inverted) { 1329 ClgJumpKind jk; 1330 UInt instr; 1331 1332 jk = clgs.bb->jmp[cJumps].jmpkind; 1333 clgs.bb->jmp[cJumps].jmpkind = clgs.bb->jmp[cJumps-1].jmpkind; 1334 clgs.bb->jmp[cJumps-1].jmpkind = jk; 1335 instr = clgs.bb->jmp[cJumps].instr; 1336 clgs.bb->jmp[cJumps].instr = clgs.bb->jmp[cJumps-1].instr; 1337 clgs.bb->jmp[cJumps-1].instr = instr; 1338 } 1339 1340 if (clgs.seen_before) { 1341 CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs)); 1342 CLG_ASSERT(clgs.bb->instr_len == clgs.instr_offset); 1343 } 1344 else { 1345 clgs.bb->cost_count = update_cost_offsets(&clgs); 1346 clgs.bb->instr_len = clgs.instr_offset; 1347 } 1348 1349 CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n", 1350 origAddr, clgs.bb->instr_len, 1351 clgs.bb->cjmp_count, clgs.bb->cost_count); 1352 if (cJumps>0) { 1353 CLG_DEBUG(3, " [ "); 1354 for (i=0;i<cJumps;i++) 1355 CLG_DEBUG(3, "%d ", clgs.bb->jmp[i].instr); 1356 CLG_DEBUG(3, "], last inverted: %s \n", 1357 clgs.bb->cjmp_inverted ? "yes":"no"); 1358 } 1359 1360 return clgs.sbOut; 1361 } 1362 1363 /*--------------------------------------------------------------------*/ 1364 /*--- Discarding BB info ---*/ 1365 /*--------------------------------------------------------------------*/ 1366 1367 // Called when a translation is removed from the translation cache for 1368 // any reason at all: to free up space, because the guest code was 1369 // unmapped or modified, or for any arbitrary reason. 1370 static 1371 void clg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge ) 1372 { 1373 Addr orig_addr = (Addr)orig_addr64; 1374 1375 tl_assert(vge.n_used > 0); 1376 1377 if (0) 1378 VG_(printf)( "discard_superblock_info: %p, %p, %llu\n", 1379 (void*)(Addr)orig_addr, 1380 (void*)(Addr)vge.base[0], (ULong)vge.len[0]); 1381 1382 // Get BB info, remove from table, free BB info. Simple! Note that we 1383 // use orig_addr, not the first instruction address in vge. 1384 CLG_(delete_bb)(orig_addr); 1385 } 1386 1387 1388 /*------------------------------------------------------------*/ 1389 /*--- CLG_(fini)() and related function ---*/ 1390 /*------------------------------------------------------------*/ 1391 1392 1393 1394 static void zero_thread_cost(thread_info* t) 1395 { 1396 Int i; 1397 1398 for(i = 0; i < CLG_(current_call_stack).sp; i++) { 1399 if (!CLG_(current_call_stack).entry[i].jcc) continue; 1400 1401 /* reset call counters to current for active calls */ 1402 CLG_(copy_cost)( CLG_(sets).full, 1403 CLG_(current_call_stack).entry[i].enter_cost, 1404 CLG_(current_state).cost ); 1405 CLG_(current_call_stack).entry[i].jcc->call_counter = 0; 1406 } 1407 1408 CLG_(forall_bbccs)(CLG_(zero_bbcc)); 1409 1410 /* set counter for last dump */ 1411 CLG_(copy_cost)( CLG_(sets).full, 1412 t->lastdump_cost, CLG_(current_state).cost ); 1413 } 1414 1415 void CLG_(zero_all_cost)(Bool only_current_thread) 1416 { 1417 if (VG_(clo_verbosity) > 1) 1418 VG_(message)(Vg_DebugMsg, " Zeroing costs...\n"); 1419 1420 if (only_current_thread) 1421 zero_thread_cost(CLG_(get_current_thread)()); 1422 else 1423 CLG_(forall_threads)(zero_thread_cost); 1424 1425 if (VG_(clo_verbosity) > 1) 1426 VG_(message)(Vg_DebugMsg, " ...done\n"); 1427 } 1428 1429 static 1430 void unwind_thread(thread_info* t) 1431 { 1432 /* unwind signal handlers */ 1433 while(CLG_(current_state).sig !=0) 1434 CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig); 1435 1436 /* unwind regular call stack */ 1437 while(CLG_(current_call_stack).sp>0) 1438 CLG_(pop_call_stack)(); 1439 1440 /* reset context and function stack for context generation */ 1441 CLG_(init_exec_state)( &CLG_(current_state) ); 1442 CLG_(current_fn_stack).top = CLG_(current_fn_stack).bottom; 1443 } 1444 1445 static 1446 void zero_state_cost(thread_info* t) 1447 { 1448 CLG_(zero_cost)( CLG_(sets).full, CLG_(current_state).cost ); 1449 } 1450 1451 /* Ups, this can go very wrong... 1452 FIXME: We should export this function or provide other means to get a handle */ 1453 extern void VG_(discard_translations) ( Addr64 start, ULong range, const HChar* who ); 1454 1455 void CLG_(set_instrument_state)(const HChar* reason, Bool state) 1456 { 1457 if (CLG_(instrument_state) == state) { 1458 CLG_DEBUG(2, "%s: instrumentation already %s\n", 1459 reason, state ? "ON" : "OFF"); 1460 return; 1461 } 1462 CLG_(instrument_state) = state; 1463 CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n", 1464 reason, state ? "ON" : "OFF"); 1465 1466 VG_(discard_translations)( (Addr64)0x1000, (ULong) ~0xfffl, "callgrind"); 1467 1468 /* reset internal state: call stacks, simulator */ 1469 CLG_(forall_threads)(unwind_thread); 1470 CLG_(forall_threads)(zero_state_cost); 1471 (*CLG_(cachesim).clear)(); 1472 1473 if (VG_(clo_verbosity) > 1) 1474 VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n", 1475 reason, state ? "ON" : "OFF"); 1476 } 1477 1478 /* helper for dump_state_togdb */ 1479 static void dump_state_of_thread_togdb(thread_info* ti) 1480 { 1481 static HChar buf[512]; 1482 static FullCost sum = 0, tmp = 0; 1483 Int t, p, i; 1484 BBCC *from, *to; 1485 call_entry* ce; 1486 1487 t = CLG_(current_tid); 1488 CLG_(init_cost_lz)( CLG_(sets).full, &sum ); 1489 CLG_(copy_cost_lz)( CLG_(sets).full, &tmp, ti->lastdump_cost ); 1490 CLG_(add_diff_cost)( CLG_(sets).full, sum, ti->lastdump_cost, 1491 ti->states.entry[0]->cost); 1492 CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost, tmp ); 1493 CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), sum); 1494 VG_(gdb_printf)("events-%d: %s\n", t, buf); 1495 VG_(gdb_printf)("frames-%d: %d\n", t, CLG_(current_call_stack).sp); 1496 1497 ce = 0; 1498 for(i = 0; i < CLG_(current_call_stack).sp; i++) { 1499 ce = CLG_(get_call_entry)(i); 1500 /* if this frame is skipped, we don't have counters */ 1501 if (!ce->jcc) continue; 1502 1503 from = ce->jcc->from; 1504 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, from->cxt->fn[0]->name); 1505 VG_(gdb_printf)("calls-%d-%d: %llu\n",t, i, ce->jcc->call_counter); 1506 1507 /* FIXME: EventSets! */ 1508 CLG_(copy_cost)( CLG_(sets).full, sum, ce->jcc->cost ); 1509 CLG_(copy_cost)( CLG_(sets).full, tmp, ce->enter_cost ); 1510 CLG_(add_diff_cost)( CLG_(sets).full, sum, 1511 ce->enter_cost, CLG_(current_state).cost ); 1512 CLG_(copy_cost)( CLG_(sets).full, ce->enter_cost, tmp ); 1513 1514 p = VG_(sprintf)(buf, "events-%d-%d: ",t, i); 1515 CLG_(sprint_mappingcost)(buf + p, CLG_(dumpmap), sum ); 1516 VG_(gdb_printf)("%s\n", buf); 1517 } 1518 if (ce && ce->jcc) { 1519 to = ce->jcc->to; 1520 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, to->cxt->fn[0]->name ); 1521 } 1522 } 1523 1524 /* Dump current state */ 1525 static void dump_state_togdb(void) 1526 { 1527 static HChar buf[512]; 1528 thread_info** th; 1529 int t, p; 1530 Int orig_tid = CLG_(current_tid); 1531 1532 VG_(gdb_printf)("instrumentation: %s\n", 1533 CLG_(instrument_state) ? "on":"off"); 1534 if (!CLG_(instrument_state)) return; 1535 1536 VG_(gdb_printf)("executed-bbs: %llu\n", CLG_(stat).bb_executions); 1537 VG_(gdb_printf)("executed-calls: %llu\n", CLG_(stat).call_counter); 1538 VG_(gdb_printf)("distinct-bbs: %d\n", CLG_(stat).distinct_bbs); 1539 VG_(gdb_printf)("distinct-calls: %d\n", CLG_(stat).distinct_jccs); 1540 VG_(gdb_printf)("distinct-functions: %d\n", CLG_(stat).distinct_fns); 1541 VG_(gdb_printf)("distinct-contexts: %d\n", CLG_(stat).distinct_contexts); 1542 1543 /* "events:" line. Given here because it will be dynamic in the future */ 1544 p = VG_(sprintf)(buf, "events: "); 1545 CLG_(sprint_eventmapping)(buf+p, CLG_(dumpmap)); 1546 VG_(gdb_printf)("%s\n", buf); 1547 /* "part:" line (number of last part. Is 0 at start */ 1548 VG_(gdb_printf)("part: %d\n", CLG_(get_dump_counter)()); 1549 1550 /* threads */ 1551 th = CLG_(get_threads)(); 1552 p = VG_(sprintf)(buf, "threads:"); 1553 for(t=1;t<VG_N_THREADS;t++) { 1554 if (!th[t]) continue; 1555 p += VG_(sprintf)(buf+p, " %d", t); 1556 } 1557 VG_(gdb_printf)("%s\n", buf); 1558 VG_(gdb_printf)("current-tid: %d\n", orig_tid); 1559 CLG_(forall_threads)(dump_state_of_thread_togdb); 1560 } 1561 1562 1563 static void print_monitor_help ( void ) 1564 { 1565 VG_(gdb_printf) ("\n"); 1566 VG_(gdb_printf) ("callgrind monitor commands:\n"); 1567 VG_(gdb_printf) (" dump [<dump_hint>]\n"); 1568 VG_(gdb_printf) (" dump counters\n"); 1569 VG_(gdb_printf) (" zero\n"); 1570 VG_(gdb_printf) (" zero counters\n"); 1571 VG_(gdb_printf) (" status\n"); 1572 VG_(gdb_printf) (" print status\n"); 1573 VG_(gdb_printf) (" instrumentation [on|off]\n"); 1574 VG_(gdb_printf) (" get/set (if on/off given) instrumentation state\n"); 1575 VG_(gdb_printf) ("\n"); 1576 } 1577 1578 /* return True if request recognised, False otherwise */ 1579 static Bool handle_gdb_monitor_command (ThreadId tid, const HChar *req) 1580 { 1581 HChar* wcmd; 1582 HChar s[VG_(strlen(req)) + 1]; /* copy for strtok_r */ 1583 HChar *ssaveptr; 1584 1585 VG_(strcpy) (s, req); 1586 1587 wcmd = VG_(strtok_r) (s, " ", &ssaveptr); 1588 switch (VG_(keyword_id) ("help dump zero status instrumentation", 1589 wcmd, kwd_report_duplicated_matches)) { 1590 case -2: /* multiple matches */ 1591 return True; 1592 case -1: /* not found */ 1593 return False; 1594 case 0: /* help */ 1595 print_monitor_help(); 1596 return True; 1597 case 1: { /* dump */ 1598 CLG_(dump_profile)(req, False); 1599 return True; 1600 } 1601 case 2: { /* zero */ 1602 CLG_(zero_all_cost)(False); 1603 return True; 1604 } 1605 1606 case 3: { /* status */ 1607 HChar* arg = VG_(strtok_r) (0, " ", &ssaveptr); 1608 if (arg && (VG_(strcmp)(arg, "internal") == 0)) { 1609 /* internal interface to callgrind_control */ 1610 dump_state_togdb(); 1611 return True; 1612 } 1613 1614 if (!CLG_(instrument_state)) { 1615 VG_(gdb_printf)("No status available as instrumentation is switched off\n"); 1616 } else { 1617 // Status information to be improved ... 1618 thread_info** th = CLG_(get_threads)(); 1619 Int t, tcount = 0; 1620 for(t=1;t<VG_N_THREADS;t++) 1621 if (th[t]) tcount++; 1622 VG_(gdb_printf)("%d thread(s) running.\n", tcount); 1623 } 1624 return True; 1625 } 1626 1627 case 4: { /* instrumentation */ 1628 HChar* arg = VG_(strtok_r) (0, " ", &ssaveptr); 1629 if (!arg) { 1630 VG_(gdb_printf)("instrumentation: %s\n", 1631 CLG_(instrument_state) ? "on":"off"); 1632 } 1633 else 1634 CLG_(set_instrument_state)("Command", VG_(strcmp)(arg,"off")!=0); 1635 return True; 1636 } 1637 1638 default: 1639 tl_assert(0); 1640 return False; 1641 } 1642 } 1643 1644 static 1645 Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret) 1646 { 1647 if (!VG_IS_TOOL_USERREQ('C','T',args[0]) 1648 && VG_USERREQ__GDB_MONITOR_COMMAND != args[0]) 1649 return False; 1650 1651 switch(args[0]) { 1652 case VG_USERREQ__DUMP_STATS: 1653 CLG_(dump_profile)("Client Request", True); 1654 *ret = 0; /* meaningless */ 1655 break; 1656 1657 case VG_USERREQ__DUMP_STATS_AT: 1658 { 1659 HChar buf[512]; 1660 VG_(sprintf)(buf,"Client Request: %s", (HChar*)args[1]); 1661 CLG_(dump_profile)(buf, True); 1662 *ret = 0; /* meaningless */ 1663 } 1664 break; 1665 1666 case VG_USERREQ__ZERO_STATS: 1667 CLG_(zero_all_cost)(True); 1668 *ret = 0; /* meaningless */ 1669 break; 1670 1671 case VG_USERREQ__TOGGLE_COLLECT: 1672 CLG_(current_state).collect = !CLG_(current_state).collect; 1673 CLG_DEBUG(2, "Client Request: toggled collection state to %s\n", 1674 CLG_(current_state).collect ? "ON" : "OFF"); 1675 *ret = 0; /* meaningless */ 1676 break; 1677 1678 case VG_USERREQ__START_INSTRUMENTATION: 1679 CLG_(set_instrument_state)("Client Request", True); 1680 *ret = 0; /* meaningless */ 1681 break; 1682 1683 case VG_USERREQ__STOP_INSTRUMENTATION: 1684 CLG_(set_instrument_state)("Client Request", False); 1685 *ret = 0; /* meaningless */ 1686 break; 1687 1688 case VG_USERREQ__GDB_MONITOR_COMMAND: { 1689 Bool handled = handle_gdb_monitor_command (tid, (HChar*)args[1]); 1690 if (handled) 1691 *ret = 1; 1692 else 1693 *ret = 0; 1694 return handled; 1695 } 1696 default: 1697 return False; 1698 } 1699 1700 return True; 1701 } 1702 1703 1704 /* Syscall Timing */ 1705 1706 /* struct timeval syscalltime[VG_N_THREADS]; */ 1707 #if CLG_MICROSYSTIME 1708 #include <sys/time.h> 1709 #include <sys/syscall.h> 1710 extern Int VG_(do_syscall) ( UInt, ... ); 1711 1712 ULong syscalltime[VG_N_THREADS]; 1713 #else 1714 UInt syscalltime[VG_N_THREADS]; 1715 #endif 1716 1717 static 1718 void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno, 1719 UWord* args, UInt nArgs) 1720 { 1721 if (CLG_(clo).collect_systime) { 1722 #if CLG_MICROSYSTIME 1723 struct vki_timeval tv_now; 1724 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL); 1725 syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec; 1726 #else 1727 syscalltime[tid] = VG_(read_millisecond_timer)(); 1728 #endif 1729 } 1730 } 1731 1732 static 1733 void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno, 1734 UWord* args, UInt nArgs, SysRes res) 1735 { 1736 if (CLG_(clo).collect_systime && 1737 CLG_(current_state).bbcc) { 1738 Int o; 1739 #if CLG_MICROSYSTIME 1740 struct vki_timeval tv_now; 1741 ULong diff; 1742 1743 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL); 1744 diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid]; 1745 #else 1746 UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid]; 1747 #endif 1748 1749 /* offset o is for "SysCount", o+1 for "SysTime" */ 1750 o = fullOffset(EG_SYS); 1751 CLG_ASSERT(o>=0); 1752 CLG_DEBUG(0," Time (Off %d) for Syscall %d: %ull\n", o, syscallno, diff); 1753 1754 CLG_(current_state).cost[o] ++; 1755 CLG_(current_state).cost[o+1] += diff; 1756 if (!CLG_(current_state).bbcc->skipped) 1757 CLG_(init_cost_lz)(CLG_(sets).full, 1758 &(CLG_(current_state).bbcc->skipped)); 1759 CLG_(current_state).bbcc->skipped[o] ++; 1760 CLG_(current_state).bbcc->skipped[o+1] += diff; 1761 } 1762 } 1763 1764 static UInt ULong_width(ULong n) 1765 { 1766 UInt w = 0; 1767 while (n > 0) { 1768 n = n / 10; 1769 w++; 1770 } 1771 if (w == 0) w = 1; 1772 return w + (w-1)/3; // add space for commas 1773 } 1774 1775 static 1776 void branchsim_printstat(int l1, int l2, int l3) 1777 { 1778 static HChar buf1[128], buf2[128], buf3[128]; 1779 static HChar fmt[128]; 1780 FullCost total; 1781 ULong Bc_total_b, Bc_total_mp, Bi_total_b, Bi_total_mp; 1782 ULong B_total_b, B_total_mp; 1783 1784 total = CLG_(total_cost); 1785 Bc_total_b = total[ fullOffset(EG_BC) ]; 1786 Bc_total_mp = total[ fullOffset(EG_BC)+1 ]; 1787 Bi_total_b = total[ fullOffset(EG_BI) ]; 1788 Bi_total_mp = total[ fullOffset(EG_BI)+1 ]; 1789 1790 /* Make format string, getting width right for numbers */ 1791 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n", 1792 l1, l2, l3); 1793 1794 if (0 == Bc_total_b) Bc_total_b = 1; 1795 if (0 == Bi_total_b) Bi_total_b = 1; 1796 B_total_b = Bc_total_b + Bi_total_b; 1797 B_total_mp = Bc_total_mp + Bi_total_mp; 1798 1799 VG_(umsg)("\n"); 1800 VG_(umsg)(fmt, "Branches: ", 1801 B_total_b, Bc_total_b, Bi_total_b); 1802 1803 VG_(umsg)(fmt, "Mispredicts: ", 1804 B_total_mp, Bc_total_mp, Bi_total_mp); 1805 1806 VG_(percentify)(B_total_mp, B_total_b, 1, l1+1, buf1); 1807 VG_(percentify)(Bc_total_mp, Bc_total_b, 1, l2+1, buf2); 1808 VG_(percentify)(Bi_total_mp, Bi_total_b, 1, l3+1, buf3); 1809 1810 VG_(umsg)("Mispred rate: %s (%s + %s )\n", buf1, buf2,buf3); 1811 } 1812 1813 static 1814 void clg_print_stats(void) 1815 { 1816 int BB_lookups = 1817 CLG_(stat).full_debug_BBs + 1818 CLG_(stat).fn_name_debug_BBs + 1819 CLG_(stat).file_line_debug_BBs + 1820 CLG_(stat).no_debug_BBs; 1821 1822 /* Hash table stats */ 1823 VG_(message)(Vg_DebugMsg, "Distinct objects: %d\n", 1824 CLG_(stat).distinct_objs); 1825 VG_(message)(Vg_DebugMsg, "Distinct files: %d\n", 1826 CLG_(stat).distinct_files); 1827 VG_(message)(Vg_DebugMsg, "Distinct fns: %d\n", 1828 CLG_(stat).distinct_fns); 1829 VG_(message)(Vg_DebugMsg, "Distinct contexts:%d\n", 1830 CLG_(stat).distinct_contexts); 1831 VG_(message)(Vg_DebugMsg, "Distinct BBs: %d\n", 1832 CLG_(stat).distinct_bbs); 1833 VG_(message)(Vg_DebugMsg, "Cost entries: %d (Chunks %d)\n", 1834 CLG_(costarray_entries), CLG_(costarray_chunks)); 1835 VG_(message)(Vg_DebugMsg, "Distinct BBCCs: %d\n", 1836 CLG_(stat).distinct_bbccs); 1837 VG_(message)(Vg_DebugMsg, "Distinct JCCs: %d\n", 1838 CLG_(stat).distinct_jccs); 1839 VG_(message)(Vg_DebugMsg, "Distinct skips: %d\n", 1840 CLG_(stat).distinct_skips); 1841 VG_(message)(Vg_DebugMsg, "BB lookups: %d\n", 1842 BB_lookups); 1843 if (BB_lookups>0) { 1844 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)\n", 1845 CLG_(stat).full_debug_BBs * 100 / BB_lookups, 1846 CLG_(stat).full_debug_BBs); 1847 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)\n", 1848 CLG_(stat).file_line_debug_BBs * 100 / BB_lookups, 1849 CLG_(stat).file_line_debug_BBs); 1850 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)\n", 1851 CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups, 1852 CLG_(stat).fn_name_debug_BBs); 1853 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)\n", 1854 CLG_(stat).no_debug_BBs * 100 / BB_lookups, 1855 CLG_(stat).no_debug_BBs); 1856 } 1857 VG_(message)(Vg_DebugMsg, "BBCC Clones: %d\n", 1858 CLG_(stat).bbcc_clones); 1859 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d\n", 1860 CLG_(stat).bb_retranslations); 1861 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d\n", 1862 CLG_(stat).distinct_instrs); 1863 VG_(message)(Vg_DebugMsg, ""); 1864 1865 VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d\n", 1866 CLG_(stat).cxt_lru_misses); 1867 VG_(message)(Vg_DebugMsg, "LRU BBCC Misses: %d\n", 1868 CLG_(stat).bbcc_lru_misses); 1869 VG_(message)(Vg_DebugMsg, "LRU JCC Misses: %d\n", 1870 CLG_(stat).jcc_lru_misses); 1871 VG_(message)(Vg_DebugMsg, "BBs Executed: %llu\n", 1872 CLG_(stat).bb_executions); 1873 VG_(message)(Vg_DebugMsg, "Calls: %llu\n", 1874 CLG_(stat).call_counter); 1875 VG_(message)(Vg_DebugMsg, "CondJMP followed: %llu\n", 1876 CLG_(stat).jcnd_counter); 1877 VG_(message)(Vg_DebugMsg, "Boring JMPs: %llu\n", 1878 CLG_(stat).jump_counter); 1879 VG_(message)(Vg_DebugMsg, "Recursive calls: %llu\n", 1880 CLG_(stat).rec_call_counter); 1881 VG_(message)(Vg_DebugMsg, "Returns: %llu\n", 1882 CLG_(stat).ret_counter); 1883 } 1884 1885 1886 static 1887 void finish(void) 1888 { 1889 HChar buf[32+COSTS_LEN]; 1890 HChar fmt[128]; 1891 Int l1, l2, l3; 1892 FullCost total; 1893 1894 CLG_DEBUG(0, "finish()\n"); 1895 1896 (*CLG_(cachesim).finish)(); 1897 1898 /* pop all remaining items from CallStack for correct sum 1899 */ 1900 CLG_(forall_threads)(unwind_thread); 1901 1902 CLG_(dump_profile)(0, False); 1903 1904 if (VG_(clo_verbosity) == 0) return; 1905 1906 if (VG_(clo_stats)) { 1907 VG_(message)(Vg_DebugMsg, "\n"); 1908 clg_print_stats(); 1909 VG_(message)(Vg_DebugMsg, "\n"); 1910 } 1911 1912 CLG_(sprint_eventmapping)(buf, CLG_(dumpmap)); 1913 VG_(message)(Vg_UserMsg, "Events : %s\n", buf); 1914 CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), CLG_(total_cost)); 1915 VG_(message)(Vg_UserMsg, "Collected : %s\n", buf); 1916 VG_(message)(Vg_UserMsg, "\n"); 1917 1918 /* determine value widths for statistics */ 1919 total = CLG_(total_cost); 1920 l1 = ULong_width( total[fullOffset(EG_IR)] ); 1921 l2 = l3 = 0; 1922 if (CLG_(clo).simulate_cache) { 1923 l2 = ULong_width( total[fullOffset(EG_DR)] ); 1924 l3 = ULong_width( total[fullOffset(EG_DW)] ); 1925 } 1926 if (CLG_(clo).simulate_branch) { 1927 int l2b = ULong_width( total[fullOffset(EG_BC)] ); 1928 int l3b = ULong_width( total[fullOffset(EG_BI)] ); 1929 if (l2b > l2) l2 = l2b; 1930 if (l3b > l3) l3 = l3b; 1931 } 1932 1933 /* Make format string, getting width right for numbers */ 1934 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1); 1935 1936 /* Always print this */ 1937 VG_(umsg)(fmt, "I refs: ", total[fullOffset(EG_IR)] ); 1938 1939 if (CLG_(clo).simulate_cache) 1940 (*CLG_(cachesim).printstat)(l1, l2, l3); 1941 1942 if (CLG_(clo).simulate_branch) 1943 branchsim_printstat(l1, l2, l3); 1944 1945 } 1946 1947 1948 void CLG_(fini)(Int exitcode) 1949 { 1950 finish(); 1951 } 1952 1953 1954 /*--------------------------------------------------------------------*/ 1955 /*--- Setup ---*/ 1956 /*--------------------------------------------------------------------*/ 1957 1958 static void clg_start_client_code_callback ( ThreadId tid, ULong blocks_done ) 1959 { 1960 static ULong last_blocks_done = 0; 1961 1962 if (0) 1963 VG_(printf)("%d R %llu\n", (Int)tid, blocks_done); 1964 1965 /* throttle calls to CLG_(run_thread) by number of BBs executed */ 1966 if (blocks_done - last_blocks_done < 5000) return; 1967 last_blocks_done = blocks_done; 1968 1969 CLG_(run_thread)( tid ); 1970 } 1971 1972 static 1973 void CLG_(post_clo_init)(void) 1974 { 1975 if (VG_(clo_vex_control).iropt_register_updates 1976 != VexRegUpdSpAtMemAccess) { 1977 CLG_DEBUG(1, " Using user specified value for " 1978 "--vex-iropt-register-updates\n"); 1979 } else { 1980 CLG_DEBUG(1, 1981 " Using default --vex-iropt-register-updates=" 1982 "sp-at-mem-access\n"); 1983 } 1984 1985 if (VG_(clo_vex_control).iropt_unroll_thresh != 0) { 1986 VG_(message)(Vg_UserMsg, 1987 "callgrind only works with --vex-iropt-unroll-thresh=0\n" 1988 "=> resetting it back to 0\n"); 1989 VG_(clo_vex_control).iropt_unroll_thresh = 0; // cannot be overriden. 1990 } 1991 if (VG_(clo_vex_control).guest_chase_thresh != 0) { 1992 VG_(message)(Vg_UserMsg, 1993 "callgrind only works with --vex-guest-chase-thresh=0\n" 1994 "=> resetting it back to 0\n"); 1995 VG_(clo_vex_control).guest_chase_thresh = 0; // cannot be overriden. 1996 } 1997 1998 CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No"); 1999 CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo).separate_callers); 2000 CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo).separate_recursions); 2001 2002 if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) { 2003 VG_(message)(Vg_UserMsg, "Using source line as position.\n"); 2004 CLG_(clo).dump_line = True; 2005 } 2006 2007 CLG_(init_dumps)(); 2008 2009 (*CLG_(cachesim).post_clo_init)(); 2010 2011 CLG_(init_eventsets)(); 2012 CLG_(init_statistics)(& CLG_(stat)); 2013 CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) ); 2014 2015 /* initialize hash tables */ 2016 CLG_(init_obj_table)(); 2017 CLG_(init_cxt_table)(); 2018 CLG_(init_bb_hash)(); 2019 2020 CLG_(init_threads)(); 2021 CLG_(run_thread)(1); 2022 2023 CLG_(instrument_state) = CLG_(clo).instrument_atstart; 2024 2025 if (VG_(clo_verbosity > 0)) { 2026 VG_(message)(Vg_UserMsg, 2027 "For interactive control, run 'callgrind_control%s%s -h'.\n", 2028 (VG_(arg_vgdb_prefix) ? " " : ""), 2029 (VG_(arg_vgdb_prefix) ? VG_(arg_vgdb_prefix) : "")); 2030 } 2031 } 2032 2033 static 2034 void CLG_(pre_clo_init)(void) 2035 { 2036 VG_(details_name) ("Callgrind"); 2037 VG_(details_version) (NULL); 2038 VG_(details_description) ("a call-graph generating cache profiler"); 2039 VG_(details_copyright_author)("Copyright (C) 2002-2013, and GNU GPL'd, " 2040 "by Josef Weidendorfer et al."); 2041 VG_(details_bug_reports_to) (VG_BUGS_TO); 2042 VG_(details_avg_translation_sizeB) ( 500 ); 2043 2044 VG_(clo_vex_control).iropt_register_updates 2045 = VexRegUpdSpAtMemAccess; // overridable by the user. 2046 VG_(clo_vex_control).iropt_unroll_thresh = 0; // cannot be overriden. 2047 VG_(clo_vex_control).guest_chase_thresh = 0; // cannot be overriden. 2048 2049 VG_(basic_tool_funcs) (CLG_(post_clo_init), 2050 CLG_(instrument), 2051 CLG_(fini)); 2052 2053 VG_(needs_superblock_discards)(clg_discard_superblock_info); 2054 2055 2056 VG_(needs_command_line_options)(CLG_(process_cmd_line_option), 2057 CLG_(print_usage), 2058 CLG_(print_debug_usage)); 2059 2060 VG_(needs_client_requests)(CLG_(handle_client_request)); 2061 VG_(needs_print_stats) (clg_print_stats); 2062 VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime), 2063 CLG_(post_syscalltime)); 2064 2065 VG_(track_start_client_code) ( & clg_start_client_code_callback ); 2066 VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) ); 2067 VG_(track_post_deliver_signal)( & CLG_(post_signal) ); 2068 2069 CLG_(set_clo_defaults)(); 2070 } 2071 2072 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init)) 2073 2074 /*--------------------------------------------------------------------*/ 2075 /*--- end main.c ---*/ 2076 /*--------------------------------------------------------------------*/ 2077