1 2 /*--------------------------------------------------------------------*/ 3 /*--- Callgrind ---*/ 4 /*--- main.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Callgrind, a Valgrind tool for call graph 9 profiling programs. 10 11 Copyright (C) 2002-2010, Josef Weidendorfer (Josef.Weidendorfer (at) gmx.de) 12 13 This tool is derived from and contains code from Cachegrind 14 Copyright (C) 2002-2010 Nicholas Nethercote (njn (at) valgrind.org) 15 16 This program is free software; you can redistribute it and/or 17 modify it under the terms of the GNU General Public License as 18 published by the Free Software Foundation; either version 2 of the 19 License, or (at your option) any later version. 20 21 This program is distributed in the hope that it will be useful, but 22 WITHOUT ANY WARRANTY; without even the implied warranty of 23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 24 General Public License for more details. 25 26 You should have received a copy of the GNU General Public License 27 along with this program; if not, write to the Free Software 28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 29 02111-1307, USA. 30 31 The GNU General Public License is contained in the file COPYING. 32 */ 33 34 #include "config.h" 35 #include "callgrind.h" 36 #include "global.h" 37 38 #include <pub_tool_threadstate.h> 39 40 #include "cg_branchpred.c" 41 42 /*------------------------------------------------------------*/ 43 /*--- Global variables ---*/ 44 /*------------------------------------------------------------*/ 45 46 /* for all threads */ 47 CommandLineOptions CLG_(clo); 48 Statistics CLG_(stat); 49 Bool CLG_(instrument_state) = True; /* Instrumentation on ? */ 50 51 /* thread and signal handler specific */ 52 exec_state CLG_(current_state); 53 54 55 /*------------------------------------------------------------*/ 56 /*--- Statistics ---*/ 57 /*------------------------------------------------------------*/ 58 59 static void CLG_(init_statistics)(Statistics* s) 60 { 61 s->call_counter = 0; 62 s->jcnd_counter = 0; 63 s->jump_counter = 0; 64 s->rec_call_counter = 0; 65 s->ret_counter = 0; 66 s->bb_executions = 0; 67 68 s->context_counter = 0; 69 s->bb_retranslations = 0; 70 71 s->distinct_objs = 0; 72 s->distinct_files = 0; 73 s->distinct_fns = 0; 74 s->distinct_contexts = 0; 75 s->distinct_bbs = 0; 76 s->distinct_bbccs = 0; 77 s->distinct_instrs = 0; 78 s->distinct_skips = 0; 79 80 s->bb_hash_resizes = 0; 81 s->bbcc_hash_resizes = 0; 82 s->jcc_hash_resizes = 0; 83 s->cxt_hash_resizes = 0; 84 s->fn_array_resizes = 0; 85 s->call_stack_resizes = 0; 86 s->fn_stack_resizes = 0; 87 88 s->full_debug_BBs = 0; 89 s->file_line_debug_BBs = 0; 90 s->fn_name_debug_BBs = 0; 91 s->no_debug_BBs = 0; 92 s->bbcc_lru_misses = 0; 93 s->jcc_lru_misses = 0; 94 s->cxt_lru_misses = 0; 95 s->bbcc_clones = 0; 96 } 97 98 99 /*------------------------------------------------------------*/ 100 /*--- Simple callbacks (not cache similator) ---*/ 101 /*------------------------------------------------------------*/ 102 103 VG_REGPARM(1) 104 static void log_global_event(InstrInfo* ii) 105 { 106 ULong* cost_Bus; 107 108 CLG_DEBUG(6, "log_global_event: Ir %#lx/%u\n", 109 CLG_(bb_base) + ii->instr_offset, ii->instr_size); 110 111 if (!CLG_(current_state).collect) return; 112 113 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BUS))>0 ); 114 115 CLG_(current_state).cost[ fullOffset(EG_BUS) ]++; 116 117 if (CLG_(current_state).nonskipped) 118 cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS); 119 else 120 cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS]; 121 cost_Bus[0]++; 122 } 123 124 125 /* For branches, we consult two different predictors, one which 126 predicts taken/untaken for conditional branches, and the other 127 which predicts the branch target address for indirect branches 128 (jump-to-register style ones). */ 129 130 static VG_REGPARM(2) 131 void log_cond_branch(InstrInfo* ii, Word taken) 132 { 133 Bool miss; 134 Int fullOffset_Bc; 135 ULong* cost_Bc; 136 137 CLG_DEBUG(6, "log_cond_branch: Ir %#lx, taken %lu\n", 138 CLG_(bb_base) + ii->instr_offset, taken); 139 140 miss = 1 & do_cond_branch_predict(CLG_(bb_base) + ii->instr_offset, taken); 141 142 if (!CLG_(current_state).collect) return; 143 144 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BC))>0 ); 145 146 if (CLG_(current_state).nonskipped) 147 cost_Bc = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BC); 148 else 149 cost_Bc = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BC]; 150 151 fullOffset_Bc = fullOffset(EG_BC); 152 CLG_(current_state).cost[ fullOffset_Bc ]++; 153 cost_Bc[0]++; 154 if (miss) { 155 CLG_(current_state).cost[ fullOffset_Bc+1 ]++; 156 cost_Bc[1]++; 157 } 158 } 159 160 static VG_REGPARM(2) 161 void log_ind_branch(InstrInfo* ii, UWord actual_dst) 162 { 163 Bool miss; 164 Int fullOffset_Bi; 165 ULong* cost_Bi; 166 167 CLG_DEBUG(6, "log_ind_branch: Ir %#lx, dst %#lx\n", 168 CLG_(bb_base) + ii->instr_offset, actual_dst); 169 170 miss = 1 & do_ind_branch_predict(CLG_(bb_base) + ii->instr_offset, actual_dst); 171 172 if (!CLG_(current_state).collect) return; 173 174 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BI))>0 ); 175 176 if (CLG_(current_state).nonskipped) 177 cost_Bi = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BI); 178 else 179 cost_Bi = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BI]; 180 181 fullOffset_Bi = fullOffset(EG_BI); 182 CLG_(current_state).cost[ fullOffset_Bi ]++; 183 cost_Bi[0]++; 184 if (miss) { 185 CLG_(current_state).cost[ fullOffset_Bi+1 ]++; 186 cost_Bi[1]++; 187 } 188 } 189 190 /*------------------------------------------------------------*/ 191 /*--- Instrumentation structures and event queue handling ---*/ 192 /*------------------------------------------------------------*/ 193 194 /* Maintain an ordered list of memory events which are outstanding, in 195 the sense that no IR has yet been generated to do the relevant 196 helper calls. The BB is scanned top to bottom and memory events 197 are added to the end of the list, merging with the most recent 198 notified event where possible (Dw immediately following Dr and 199 having the same size and EA can be merged). 200 201 This merging is done so that for architectures which have 202 load-op-store instructions (x86, amd64), the insn is treated as if 203 it makes just one memory reference (a modify), rather than two (a 204 read followed by a write at the same address). 205 206 At various points the list will need to be flushed, that is, IR 207 generated from it. That must happen before any possible exit from 208 the block (the end, or an IRStmt_Exit). Flushing also takes place 209 when there is no space to add a new event. 210 211 If we require the simulation statistics to be up to date with 212 respect to possible memory exceptions, then the list would have to 213 be flushed before each memory reference. That would however lose 214 performance by inhibiting event-merging during flushing. 215 216 Flushing the list consists of walking it start to end and emitting 217 instrumentation IR for each event, in the order in which they 218 appear. It may be possible to emit a single call for two adjacent 219 events in order to reduce the number of helper function calls made. 220 For example, it could well be profitable to handle two adjacent Ir 221 events with a single helper call. */ 222 223 typedef 224 IRExpr 225 IRAtom; 226 227 typedef 228 enum { 229 Ev_Ir, // Instruction read 230 Ev_Dr, // Data read 231 Ev_Dw, // Data write 232 Ev_Dm, // Data modify (read then write) 233 Ev_Bc, // branch conditional 234 Ev_Bi, // branch indirect (to unknown destination) 235 Ev_G // Global bus event 236 } 237 EventTag; 238 239 typedef 240 struct { 241 EventTag tag; 242 InstrInfo* inode; 243 union { 244 struct { 245 } Ir; 246 struct { 247 IRAtom* ea; 248 Int szB; 249 } Dr; 250 struct { 251 IRAtom* ea; 252 Int szB; 253 } Dw; 254 struct { 255 IRAtom* ea; 256 Int szB; 257 } Dm; 258 struct { 259 IRAtom* taken; /* :: Ity_I1 */ 260 } Bc; 261 struct { 262 IRAtom* dst; 263 } Bi; 264 struct { 265 } G; 266 } Ev; 267 } 268 Event; 269 270 static void init_Event ( Event* ev ) { 271 VG_(memset)(ev, 0, sizeof(Event)); 272 } 273 274 static IRAtom* get_Event_dea ( Event* ev ) { 275 switch (ev->tag) { 276 case Ev_Dr: return ev->Ev.Dr.ea; 277 case Ev_Dw: return ev->Ev.Dw.ea; 278 case Ev_Dm: return ev->Ev.Dm.ea; 279 default: tl_assert(0); 280 } 281 } 282 283 static Int get_Event_dszB ( Event* ev ) { 284 switch (ev->tag) { 285 case Ev_Dr: return ev->Ev.Dr.szB; 286 case Ev_Dw: return ev->Ev.Dw.szB; 287 case Ev_Dm: return ev->Ev.Dm.szB; 288 default: tl_assert(0); 289 } 290 } 291 292 293 /* Up to this many unnotified events are allowed. Number is 294 arbitrary. Larger numbers allow more event merging to occur, but 295 potentially induce more spilling due to extending live ranges of 296 address temporaries. */ 297 #define N_EVENTS 16 298 299 300 /* A struct which holds all the running state during instrumentation. 301 Mostly to avoid passing loads of parameters everywhere. */ 302 typedef struct { 303 /* The current outstanding-memory-event list. */ 304 Event events[N_EVENTS]; 305 Int events_used; 306 307 /* The array of InstrInfo's is part of BB struct. */ 308 BB* bb; 309 310 /* BB seen before (ie. re-instrumentation) */ 311 Bool seen_before; 312 313 /* Number InstrInfo bins 'used' so far. */ 314 UInt ii_index; 315 316 // current offset of guest instructions from BB start 317 UInt instr_offset; 318 319 /* The output SB being constructed. */ 320 IRSB* sbOut; 321 } ClgState; 322 323 324 static void showEvent ( Event* ev ) 325 { 326 switch (ev->tag) { 327 case Ev_Ir: 328 VG_(printf)("Ir (InstrInfo %p) at +%d\n", 329 ev->inode, ev->inode->instr_offset); 330 break; 331 case Ev_Dr: 332 VG_(printf)("Dr (InstrInfo %p) at +%d %d EA=", 333 ev->inode, ev->inode->instr_offset, ev->Ev.Dr.szB); 334 ppIRExpr(ev->Ev.Dr.ea); 335 VG_(printf)("\n"); 336 break; 337 case Ev_Dw: 338 VG_(printf)("Dw (InstrInfo %p) at +%d %d EA=", 339 ev->inode, ev->inode->instr_offset, ev->Ev.Dw.szB); 340 ppIRExpr(ev->Ev.Dw.ea); 341 VG_(printf)("\n"); 342 break; 343 case Ev_Dm: 344 VG_(printf)("Dm (InstrInfo %p) at +%d %d EA=", 345 ev->inode, ev->inode->instr_offset, ev->Ev.Dm.szB); 346 ppIRExpr(ev->Ev.Dm.ea); 347 VG_(printf)("\n"); 348 break; 349 case Ev_Bc: 350 VG_(printf)("Bc %p GA=", ev->inode); 351 ppIRExpr(ev->Ev.Bc.taken); 352 VG_(printf)("\n"); 353 break; 354 case Ev_Bi: 355 VG_(printf)("Bi %p DST=", ev->inode); 356 ppIRExpr(ev->Ev.Bi.dst); 357 VG_(printf)("\n"); 358 break; 359 case Ev_G: 360 VG_(printf)("G %p\n", ev->inode); 361 break; 362 default: 363 tl_assert(0); 364 break; 365 } 366 } 367 368 /* Generate code for all outstanding memory events, and mark the queue 369 empty. Code is generated into cgs->sbOut, and this activity 370 'consumes' slots in cgs->bb. */ 371 372 static void flushEvents ( ClgState* clgs ) 373 { 374 Int i, regparms, inew; 375 Char* helperName; 376 void* helperAddr; 377 IRExpr** argv; 378 IRExpr* i_node_expr; 379 IRDirty* di; 380 Event* ev; 381 Event* ev2; 382 Event* ev3; 383 384 if (!clgs->seen_before) { 385 // extend event sets as needed 386 // available sets: D0 Dr 387 for(i=0; i<clgs->events_used; i++) { 388 ev = &clgs->events[i]; 389 switch(ev->tag) { 390 case Ev_Ir: 391 // Ir event always is first for a guest instruction 392 CLG_ASSERT(ev->inode->eventset == 0); 393 ev->inode->eventset = CLG_(sets).base; 394 break; 395 case Ev_Dr: 396 // extend event set by Dr counters 397 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 398 EG_DR); 399 break; 400 case Ev_Dw: 401 case Ev_Dm: 402 // extend event set by Dw counters 403 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 404 EG_DW); 405 break; 406 case Ev_Bc: 407 // extend event set by Bc counters 408 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 409 EG_BC); 410 break; 411 case Ev_Bi: 412 // extend event set by Bi counters 413 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 414 EG_BI); 415 break; 416 case Ev_G: 417 // extend event set by Bus counter 418 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 419 EG_BUS); 420 break; 421 default: 422 tl_assert(0); 423 } 424 } 425 } 426 427 for(i = 0; i < clgs->events_used; i = inew) { 428 429 helperName = NULL; 430 helperAddr = NULL; 431 argv = NULL; 432 regparms = 0; 433 434 /* generate IR to notify event i and possibly the ones 435 immediately following it. */ 436 tl_assert(i >= 0 && i < clgs->events_used); 437 438 ev = &clgs->events[i]; 439 ev2 = ( i < clgs->events_used-1 ? &clgs->events[i+1] : NULL ); 440 ev3 = ( i < clgs->events_used-2 ? &clgs->events[i+2] : NULL ); 441 442 CLG_DEBUGIF(5) { 443 VG_(printf)(" flush "); 444 showEvent( ev ); 445 } 446 447 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode ); 448 449 /* Decide on helper fn to call and args to pass it, and advance 450 i appropriately. 451 Dm events have same effect as Dw events */ 452 switch (ev->tag) { 453 case Ev_Ir: 454 /* Merge an Ir with a following Dr. */ 455 if (ev2 && ev2->tag == Ev_Dr) { 456 /* Why is this true? It's because we're merging an Ir 457 with a following Dr. The Ir derives from the 458 instruction's IMark and the Dr from data 459 references which follow it. In short it holds 460 because each insn starts with an IMark, hence an 461 Ev_Ir, and so these Dr must pertain to the 462 immediately preceding Ir. Same applies to analogous 463 assertions in the subsequent cases. */ 464 tl_assert(ev2->inode == ev->inode); 465 helperName = CLG_(cachesim).log_1I1Dr_name; 466 helperAddr = CLG_(cachesim).log_1I1Dr; 467 argv = mkIRExprVec_3( i_node_expr, 468 get_Event_dea(ev2), 469 mkIRExpr_HWord( get_Event_dszB(ev2) ) ); 470 regparms = 3; 471 inew = i+2; 472 } 473 /* Merge an Ir with a following Dw/Dm. */ 474 else 475 if (ev2 && (ev2->tag == Ev_Dw || ev2->tag == Ev_Dm)) { 476 tl_assert(ev2->inode == ev->inode); 477 helperName = CLG_(cachesim).log_1I1Dw_name; 478 helperAddr = CLG_(cachesim).log_1I1Dw; 479 argv = mkIRExprVec_3( i_node_expr, 480 get_Event_dea(ev2), 481 mkIRExpr_HWord( get_Event_dszB(ev2) ) ); 482 regparms = 3; 483 inew = i+2; 484 } 485 /* Merge an Ir with two following Irs. */ 486 else 487 if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) { 488 helperName = CLG_(cachesim).log_3I0D_name; 489 helperAddr = CLG_(cachesim).log_3I0D; 490 argv = mkIRExprVec_3( i_node_expr, 491 mkIRExpr_HWord( (HWord)ev2->inode ), 492 mkIRExpr_HWord( (HWord)ev3->inode ) ); 493 regparms = 3; 494 inew = i+3; 495 } 496 /* Merge an Ir with one following Ir. */ 497 else 498 if (ev2 && ev2->tag == Ev_Ir) { 499 helperName = CLG_(cachesim).log_2I0D_name; 500 helperAddr = CLG_(cachesim).log_2I0D; 501 argv = mkIRExprVec_2( i_node_expr, 502 mkIRExpr_HWord( (HWord)ev2->inode ) ); 503 regparms = 2; 504 inew = i+2; 505 } 506 /* No merging possible; emit as-is. */ 507 else { 508 helperName = CLG_(cachesim).log_1I0D_name; 509 helperAddr = CLG_(cachesim).log_1I0D; 510 argv = mkIRExprVec_1( i_node_expr ); 511 regparms = 1; 512 inew = i+1; 513 } 514 break; 515 case Ev_Dr: 516 /* Data read or modify */ 517 helperName = CLG_(cachesim).log_0I1Dr_name; 518 helperAddr = CLG_(cachesim).log_0I1Dr; 519 argv = mkIRExprVec_3( i_node_expr, 520 get_Event_dea(ev), 521 mkIRExpr_HWord( get_Event_dszB(ev) ) ); 522 regparms = 3; 523 inew = i+1; 524 break; 525 case Ev_Dw: 526 case Ev_Dm: 527 /* Data write */ 528 helperName = CLG_(cachesim).log_0I1Dw_name; 529 helperAddr = CLG_(cachesim).log_0I1Dw; 530 argv = mkIRExprVec_3( i_node_expr, 531 get_Event_dea(ev), 532 mkIRExpr_HWord( get_Event_dszB(ev) ) ); 533 regparms = 3; 534 inew = i+1; 535 break; 536 case Ev_Bc: 537 /* Conditional branch */ 538 helperName = "log_cond_branch"; 539 helperAddr = &log_cond_branch; 540 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken ); 541 regparms = 2; 542 inew = i+1; 543 break; 544 case Ev_Bi: 545 /* Branch to an unknown destination */ 546 helperName = "log_ind_branch"; 547 helperAddr = &log_ind_branch; 548 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst ); 549 regparms = 2; 550 inew = i+1; 551 break; 552 case Ev_G: 553 /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */ 554 helperName = "log_global_event"; 555 helperAddr = &log_global_event; 556 argv = mkIRExprVec_1( i_node_expr ); 557 regparms = 1; 558 inew = i+1; 559 break; 560 default: 561 tl_assert(0); 562 } 563 564 CLG_DEBUGIF(5) { 565 if (inew > i+1) { 566 VG_(printf)(" merge "); 567 showEvent( ev2 ); 568 } 569 if (inew > i+2) { 570 VG_(printf)(" merge "); 571 showEvent( ev3 ); 572 } 573 if (helperAddr) 574 VG_(printf)(" call %s (%p)\n", 575 helperName, helperAddr); 576 } 577 578 /* helper could be unset depending on the simulator used */ 579 if (helperAddr == 0) continue; 580 581 /* Add the helper. */ 582 tl_assert(helperName); 583 tl_assert(helperAddr); 584 tl_assert(argv); 585 di = unsafeIRDirty_0_N( regparms, 586 helperName, VG_(fnptr_to_fnentry)( helperAddr ), 587 argv ); 588 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) ); 589 } 590 591 clgs->events_used = 0; 592 } 593 594 static void addEvent_Ir ( ClgState* clgs, InstrInfo* inode ) 595 { 596 Event* evt; 597 tl_assert(clgs->seen_before || (inode->eventset == 0)); 598 if (!CLG_(clo).simulate_cache) return; 599 600 if (clgs->events_used == N_EVENTS) 601 flushEvents(clgs); 602 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 603 evt = &clgs->events[clgs->events_used]; 604 init_Event(evt); 605 evt->tag = Ev_Ir; 606 evt->inode = inode; 607 clgs->events_used++; 608 } 609 610 static 611 void addEvent_Dr ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea ) 612 { 613 Event* evt; 614 tl_assert(isIRAtom(ea)); 615 tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE); 616 if (!CLG_(clo).simulate_cache) return; 617 618 if (clgs->events_used == N_EVENTS) 619 flushEvents(clgs); 620 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 621 evt = &clgs->events[clgs->events_used]; 622 init_Event(evt); 623 evt->tag = Ev_Dr; 624 evt->inode = inode; 625 evt->Ev.Dr.szB = datasize; 626 evt->Ev.Dr.ea = ea; 627 clgs->events_used++; 628 } 629 630 static 631 void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea ) 632 { 633 Event* lastEvt; 634 Event* evt; 635 tl_assert(isIRAtom(ea)); 636 tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE); 637 if (!CLG_(clo).simulate_cache) return; 638 639 /* Is it possible to merge this write with the preceding read? */ 640 lastEvt = &clgs->events[clgs->events_used-1]; 641 if (clgs->events_used > 0 642 && lastEvt->tag == Ev_Dr 643 && lastEvt->Ev.Dr.szB == datasize 644 && lastEvt->inode == inode 645 && eqIRAtom(lastEvt->Ev.Dr.ea, ea)) 646 { 647 lastEvt->tag = Ev_Dm; 648 return; 649 } 650 651 /* No. Add as normal. */ 652 if (clgs->events_used == N_EVENTS) 653 flushEvents(clgs); 654 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 655 evt = &clgs->events[clgs->events_used]; 656 init_Event(evt); 657 evt->tag = Ev_Dw; 658 evt->inode = inode; 659 evt->Ev.Dw.szB = datasize; 660 evt->Ev.Dw.ea = ea; 661 clgs->events_used++; 662 } 663 664 static 665 void addEvent_Bc ( ClgState* clgs, InstrInfo* inode, IRAtom* guard ) 666 { 667 Event* evt; 668 tl_assert(isIRAtom(guard)); 669 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, guard) 670 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64)); 671 if (!CLG_(clo).simulate_branch) return; 672 673 if (clgs->events_used == N_EVENTS) 674 flushEvents(clgs); 675 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 676 evt = &clgs->events[clgs->events_used]; 677 init_Event(evt); 678 evt->tag = Ev_Bc; 679 evt->inode = inode; 680 evt->Ev.Bc.taken = guard; 681 clgs->events_used++; 682 } 683 684 static 685 void addEvent_Bi ( ClgState* clgs, InstrInfo* inode, IRAtom* whereTo ) 686 { 687 Event* evt; 688 tl_assert(isIRAtom(whereTo)); 689 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, whereTo) 690 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64)); 691 if (!CLG_(clo).simulate_branch) return; 692 693 if (clgs->events_used == N_EVENTS) 694 flushEvents(clgs); 695 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 696 evt = &clgs->events[clgs->events_used]; 697 init_Event(evt); 698 evt->tag = Ev_Bi; 699 evt->inode = inode; 700 evt->Ev.Bi.dst = whereTo; 701 clgs->events_used++; 702 } 703 704 static 705 void addEvent_G ( ClgState* clgs, InstrInfo* inode ) 706 { 707 Event* evt; 708 if (!CLG_(clo).collect_bus) return; 709 710 if (clgs->events_used == N_EVENTS) 711 flushEvents(clgs); 712 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 713 evt = &clgs->events[clgs->events_used]; 714 init_Event(evt); 715 evt->tag = Ev_G; 716 evt->inode = inode; 717 clgs->events_used++; 718 } 719 720 /* Initialise or check (if already seen before) an InstrInfo for next insn. 721 We only can set instr_offset/instr_size here. The required event set and 722 resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest 723 instructions. The event set is extended as required on flush of the event 724 queue (when Dm events were determined), cost offsets are determined at 725 end of BB instrumentation. */ 726 static 727 InstrInfo* next_InstrInfo ( ClgState* clgs, UInt instr_size ) 728 { 729 InstrInfo* ii; 730 tl_assert(clgs->ii_index >= 0); 731 tl_assert(clgs->ii_index < clgs->bb->instr_count); 732 ii = &clgs->bb->instr[ clgs->ii_index ]; 733 734 if (clgs->seen_before) { 735 CLG_ASSERT(ii->instr_offset == clgs->instr_offset); 736 CLG_ASSERT(ii->instr_size == instr_size); 737 } 738 else { 739 ii->instr_offset = clgs->instr_offset; 740 ii->instr_size = instr_size; 741 ii->cost_offset = 0; 742 ii->eventset = 0; 743 } 744 745 clgs->ii_index++; 746 clgs->instr_offset += instr_size; 747 CLG_(stat).distinct_instrs++; 748 749 return ii; 750 } 751 752 // return total number of cost values needed for this BB 753 static 754 UInt update_cost_offsets( ClgState* clgs ) 755 { 756 Int i; 757 InstrInfo* ii; 758 UInt cost_offset = 0; 759 760 CLG_ASSERT(clgs->bb->instr_count == clgs->ii_index); 761 for(i=0; i<clgs->ii_index; i++) { 762 ii = &clgs->bb->instr[i]; 763 if (clgs->seen_before) { 764 CLG_ASSERT(ii->cost_offset == cost_offset); 765 } else 766 ii->cost_offset = cost_offset; 767 cost_offset += ii->eventset ? ii->eventset->size : 0; 768 } 769 770 return cost_offset; 771 } 772 773 /*------------------------------------------------------------*/ 774 /*--- Instrumentation ---*/ 775 /*------------------------------------------------------------*/ 776 777 #if defined(VG_BIGENDIAN) 778 # define CLGEndness Iend_BE 779 #elif defined(VG_LITTLEENDIAN) 780 # define CLGEndness Iend_LE 781 #else 782 # error "Unknown endianness" 783 #endif 784 785 static 786 Addr IRConst2Addr(IRConst* con) 787 { 788 Addr addr; 789 790 if (sizeof(Addr) == 4) { 791 CLG_ASSERT( con->tag == Ico_U32 ); 792 addr = con->Ico.U32; 793 } 794 else if (sizeof(Addr) == 8) { 795 CLG_ASSERT( con->tag == Ico_U64 ); 796 addr = con->Ico.U64; 797 } 798 else 799 VG_(tool_panic)("Callgrind: invalid Addr type"); 800 801 return addr; 802 } 803 804 /* First pass over a BB to instrument, counting instructions and jumps 805 * This is needed for the size of the BB struct to allocate 806 * 807 * Called from CLG_(get_bb) 808 */ 809 void CLG_(collectBlockInfo)(IRSB* sbIn, 810 /*INOUT*/ UInt* instrs, 811 /*INOUT*/ UInt* cjmps, 812 /*INOUT*/ Bool* cjmp_inverted) 813 { 814 Int i; 815 IRStmt* st; 816 Addr instrAddr =0, jumpDst; 817 UInt instrLen = 0; 818 Bool toNextInstr = False; 819 820 // Ist_Exit has to be ignored in preamble code, before first IMark: 821 // preamble code is added by VEX for self modifying code, and has 822 // nothing to do with client code 823 Bool inPreamble = True; 824 825 if (!sbIn) return; 826 827 for (i = 0; i < sbIn->stmts_used; i++) { 828 st = sbIn->stmts[i]; 829 if (Ist_IMark == st->tag) { 830 inPreamble = False; 831 832 instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr); 833 instrLen = st->Ist.IMark.len; 834 835 (*instrs)++; 836 toNextInstr = False; 837 } 838 if (inPreamble) continue; 839 if (Ist_Exit == st->tag) { 840 jumpDst = IRConst2Addr(st->Ist.Exit.dst); 841 toNextInstr = (jumpDst == instrAddr + instrLen); 842 843 (*cjmps)++; 844 } 845 } 846 847 /* if the last instructions of BB conditionally jumps to next instruction 848 * (= first instruction of next BB in memory), this is a inverted by VEX. 849 */ 850 *cjmp_inverted = toNextInstr; 851 } 852 853 static 854 void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy) 855 { 856 addStmtToIRSB( bbOut, 857 IRStmt_Store(CLGEndness, 858 IRExpr_Const(hWordTy == Ity_I32 ? 859 IRConst_U32( addr ) : 860 IRConst_U64( addr )), 861 IRExpr_Const(IRConst_U32(val)) )); 862 } 863 864 865 /* add helper call to setup_bbcc, with pointer to BB struct as argument 866 * 867 * precondition for setup_bbcc: 868 * - jmps_passed has number of cond.jumps passed in last executed BB 869 * - current_bbcc has a pointer to the BBCC of the last executed BB 870 * Thus, if bbcc_jmpkind is != -1 (JmpNone), 871 * current_bbcc->bb->jmp_addr 872 * gives the address of the jump source. 873 * 874 * the setup does 2 things: 875 * - trace call: 876 * * Unwind own call stack, i.e sync our ESP with real ESP 877 * This is for ESP manipulation (longjmps, C++ exec handling) and RET 878 * * For CALLs or JMPs crossing objects, record call arg + 879 * push are on own call stack 880 * 881 * - prepare for cache log functions: 882 * set current_bbcc to BBCC that gets the costs for this BB execution 883 * attached 884 */ 885 static 886 void addBBSetupCall(ClgState* clgs) 887 { 888 IRDirty* di; 889 IRExpr *arg1, **argv; 890 891 arg1 = mkIRExpr_HWord( (HWord)clgs->bb ); 892 argv = mkIRExprVec_1(arg1); 893 di = unsafeIRDirty_0_N( 1, "setup_bbcc", 894 VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ), 895 argv); 896 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) ); 897 } 898 899 900 static 901 IRSB* CLG_(instrument)( VgCallbackClosure* closure, 902 IRSB* sbIn, 903 VexGuestLayout* layout, 904 VexGuestExtents* vge, 905 IRType gWordTy, IRType hWordTy ) 906 { 907 Int i, isize; 908 IRStmt* st; 909 Addr origAddr; 910 Addr64 cia; /* address of current insn */ 911 InstrInfo* curr_inode = NULL; 912 ClgState clgs; 913 UInt cJumps = 0; 914 915 916 if (gWordTy != hWordTy) { 917 /* We don't currently support this case. */ 918 VG_(tool_panic)("host/guest word size mismatch"); 919 } 920 921 // No instrumentation if it is switched off 922 if (! CLG_(instrument_state)) { 923 CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n", 924 (Addr)closure->readdr); 925 return sbIn; 926 } 927 928 CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr)closure->readdr); 929 930 /* Set up SB for instrumented IR */ 931 clgs.sbOut = deepCopyIRSBExceptStmts(sbIn); 932 933 // Copy verbatim any IR preamble preceding the first IMark 934 i = 0; 935 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) { 936 addStmtToIRSB( clgs.sbOut, sbIn->stmts[i] ); 937 i++; 938 } 939 940 // Get the first statement, and origAddr from it 941 CLG_ASSERT(sbIn->stmts_used >0); 942 CLG_ASSERT(i < sbIn->stmts_used); 943 st = sbIn->stmts[i]; 944 CLG_ASSERT(Ist_IMark == st->tag); 945 946 origAddr = (Addr)st->Ist.IMark.addr; 947 cia = st->Ist.IMark.addr; 948 isize = st->Ist.IMark.len; 949 CLG_ASSERT(origAddr == st->Ist.IMark.addr); // XXX: check no overflow 950 951 /* Get BB struct (creating if necessary). 952 * JS: The hash table is keyed with orig_addr_noredir -- important! 953 * JW: Why? If it is because of different chasing of the redirection, 954 * this is not needed, as chasing is switched off in callgrind 955 */ 956 clgs.bb = CLG_(get_bb)(origAddr, sbIn, &(clgs.seen_before)); 957 958 addBBSetupCall(&clgs); 959 960 // Set up running state 961 clgs.events_used = 0; 962 clgs.ii_index = 0; 963 clgs.instr_offset = 0; 964 965 for (/*use current i*/; i < sbIn->stmts_used; i++) { 966 967 st = sbIn->stmts[i]; 968 CLG_ASSERT(isFlatIRStmt(st)); 969 970 switch (st->tag) { 971 case Ist_NoOp: 972 case Ist_AbiHint: 973 case Ist_Put: 974 case Ist_PutI: 975 case Ist_MBE: 976 break; 977 978 case Ist_IMark: { 979 cia = st->Ist.IMark.addr; 980 isize = st->Ist.IMark.len; 981 CLG_ASSERT(clgs.instr_offset == (Addr)cia - origAddr); 982 // If Vex fails to decode an instruction, the size will be zero. 983 // Pretend otherwise. 984 if (isize == 0) isize = VG_MIN_INSTR_SZB; 985 986 // Sanity-check size. 987 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB) 988 || VG_CLREQ_SZB == isize ); 989 990 // Init the inode, record it as the current one. 991 // Subsequent Dr/Dw/Dm events from the same instruction will 992 // also use it. 993 curr_inode = next_InstrInfo (&clgs, isize); 994 995 addEvent_Ir( &clgs, curr_inode ); 996 break; 997 } 998 999 case Ist_WrTmp: { 1000 IRExpr* data = st->Ist.WrTmp.data; 1001 if (data->tag == Iex_Load) { 1002 IRExpr* aexpr = data->Iex.Load.addr; 1003 // Note also, endianness info is ignored. I guess 1004 // that's not interesting. 1005 addEvent_Dr( &clgs, curr_inode, 1006 sizeofIRType(data->Iex.Load.ty), aexpr ); 1007 } 1008 break; 1009 } 1010 1011 case Ist_Store: { 1012 IRExpr* data = st->Ist.Store.data; 1013 IRExpr* aexpr = st->Ist.Store.addr; 1014 addEvent_Dw( &clgs, curr_inode, 1015 sizeofIRType(typeOfIRExpr(sbIn->tyenv, data)), aexpr ); 1016 break; 1017 } 1018 1019 case Ist_Dirty: { 1020 Int dataSize; 1021 IRDirty* d = st->Ist.Dirty.details; 1022 if (d->mFx != Ifx_None) { 1023 /* This dirty helper accesses memory. Collect the details. */ 1024 tl_assert(d->mAddr != NULL); 1025 tl_assert(d->mSize != 0); 1026 dataSize = d->mSize; 1027 // Large (eg. 28B, 108B, 512B on x86) data-sized 1028 // instructions will be done inaccurately, but they're 1029 // very rare and this avoids errors from hitting more 1030 // than two cache lines in the simulation. 1031 if (dataSize > MIN_LINE_SIZE) 1032 dataSize = MIN_LINE_SIZE; 1033 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) 1034 addEvent_Dr( &clgs, curr_inode, dataSize, d->mAddr ); 1035 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) 1036 addEvent_Dw( &clgs, curr_inode, dataSize, d->mAddr ); 1037 } else { 1038 tl_assert(d->mAddr == NULL); 1039 tl_assert(d->mSize == 0); 1040 } 1041 break; 1042 } 1043 1044 case Ist_CAS: { 1045 /* We treat it as a read and a write of the location. I 1046 think that is the same behaviour as it was before IRCAS 1047 was introduced, since prior to that point, the Vex 1048 front ends would translate a lock-prefixed instruction 1049 into a (normal) read followed by a (normal) write. */ 1050 Int dataSize; 1051 IRCAS* cas = st->Ist.CAS.details; 1052 CLG_ASSERT(cas->addr && isIRAtom(cas->addr)); 1053 CLG_ASSERT(cas->dataLo); 1054 dataSize = sizeofIRType(typeOfIRExpr(sbIn->tyenv, cas->dataLo)); 1055 if (cas->dataHi != NULL) 1056 dataSize *= 2; /* since this is a doubleword-cas */ 1057 addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr ); 1058 addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr ); 1059 addEvent_G( &clgs, curr_inode ); 1060 break; 1061 } 1062 1063 case Ist_LLSC: { 1064 IRType dataTy; 1065 if (st->Ist.LLSC.storedata == NULL) { 1066 /* LL */ 1067 dataTy = typeOfIRTemp(sbIn->tyenv, st->Ist.LLSC.result); 1068 addEvent_Dr( &clgs, curr_inode, 1069 sizeofIRType(dataTy), st->Ist.LLSC.addr ); 1070 } else { 1071 /* SC */ 1072 dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata); 1073 addEvent_Dw( &clgs, curr_inode, 1074 sizeofIRType(dataTy), st->Ist.LLSC.addr ); 1075 /* I don't know whether the global-bus-lock cost should 1076 be attributed to the LL or the SC, but it doesn't 1077 really matter since they always have to be used in 1078 pairs anyway. Hence put it (quite arbitrarily) on 1079 the SC. */ 1080 addEvent_G( &clgs, curr_inode ); 1081 } 1082 break; 1083 } 1084 1085 case Ist_Exit: { 1086 Bool guest_exit, inverted; 1087 1088 /* VEX code generation sometimes inverts conditional branches. 1089 * As Callgrind counts (conditional) jumps, it has to correct 1090 * inversions. The heuristic is the following: 1091 * (1) Callgrind switches off SB chasing and unrolling, and 1092 * therefore it assumes that a candidate for inversion only is 1093 * the last conditional branch in an SB. 1094 * (2) inversion is assumed if the branch jumps to the address of 1095 * the next guest instruction in memory. 1096 * This heuristic is precalculated in CLG_(collectBlockInfo)(). 1097 * 1098 * Branching behavior is also used for branch prediction. Note that 1099 * above heuristic is different from what Cachegrind does. 1100 * Cachegrind uses (2) for all branches. 1101 */ 1102 if (cJumps+1 == clgs.bb->cjmp_count) 1103 inverted = clgs.bb->cjmp_inverted; 1104 else 1105 inverted = False; 1106 1107 // call branch predictor only if this is a branch in guest code 1108 guest_exit = (st->Ist.Exit.jk == Ijk_Boring) || 1109 (st->Ist.Exit.jk == Ijk_Call) || 1110 (st->Ist.Exit.jk == Ijk_Ret); 1111 1112 if (guest_exit) { 1113 /* Stuff to widen the guard expression to a host word, so 1114 we can pass it to the branch predictor simulation 1115 functions easily. */ 1116 IRType tyW = hWordTy; 1117 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64; 1118 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64; 1119 IRTemp guard1 = newIRTemp(clgs.sbOut->tyenv, Ity_I1); 1120 IRTemp guardW = newIRTemp(clgs.sbOut->tyenv, tyW); 1121 IRTemp guard = newIRTemp(clgs.sbOut->tyenv, tyW); 1122 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1)) 1123 : IRExpr_Const(IRConst_U64(1)); 1124 1125 /* Widen the guard expression. */ 1126 addStmtToIRSB( clgs.sbOut, 1127 IRStmt_WrTmp( guard1, st->Ist.Exit.guard )); 1128 addStmtToIRSB( clgs.sbOut, 1129 IRStmt_WrTmp( guardW, 1130 IRExpr_Unop(widen, 1131 IRExpr_RdTmp(guard1))) ); 1132 /* If the exit is inverted, invert the sense of the guard. */ 1133 addStmtToIRSB( 1134 clgs.sbOut, 1135 IRStmt_WrTmp( 1136 guard, 1137 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one) 1138 : IRExpr_RdTmp(guardW) 1139 )); 1140 /* And post the event. */ 1141 addEvent_Bc( &clgs, curr_inode, IRExpr_RdTmp(guard) ); 1142 } 1143 1144 /* We may never reach the next statement, so need to flush 1145 all outstanding transactions now. */ 1146 flushEvents( &clgs ); 1147 1148 CLG_ASSERT(clgs.ii_index>0); 1149 if (!clgs.seen_before) { 1150 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1; 1151 clgs.bb->jmp[cJumps].skip = False; 1152 } 1153 1154 /* Update global variable jmps_passed before the jump 1155 * A correction is needed if VEX inverted the last jump condition 1156 */ 1157 addConstMemStoreStmt( clgs.sbOut, 1158 (UWord) &CLG_(current_state).jmps_passed, 1159 inverted ? cJumps+1 : cJumps, hWordTy); 1160 cJumps++; 1161 1162 break; 1163 } 1164 1165 default: 1166 tl_assert(0); 1167 break; 1168 } 1169 1170 /* Copy the original statement */ 1171 addStmtToIRSB( clgs.sbOut, st ); 1172 1173 CLG_DEBUGIF(5) { 1174 VG_(printf)(" pass "); 1175 ppIRStmt(st); 1176 VG_(printf)("\n"); 1177 } 1178 } 1179 1180 /* Deal with branches to unknown destinations. Except ignore ones 1181 which are function returns as we assume the return stack 1182 predictor never mispredicts. */ 1183 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) { 1184 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); } 1185 switch (sbIn->next->tag) { 1186 case Iex_Const: 1187 break; /* boring - branch to known address */ 1188 case Iex_RdTmp: 1189 /* looks like an indirect branch (branch to unknown) */ 1190 addEvent_Bi( &clgs, curr_inode, sbIn->next ); 1191 break; 1192 default: 1193 /* shouldn't happen - if the incoming IR is properly 1194 flattened, should only have tmp and const cases to 1195 consider. */ 1196 tl_assert(0); 1197 } 1198 } 1199 1200 /* At the end of the bb. Flush outstandings. */ 1201 flushEvents( &clgs ); 1202 1203 /* Always update global variable jmps_passed at end of bb. 1204 * A correction is needed if VEX inverted the last jump condition 1205 */ 1206 { 1207 UInt jmps_passed = cJumps; 1208 if (clgs.bb->cjmp_inverted) jmps_passed--; 1209 addConstMemStoreStmt( clgs.sbOut, 1210 (UWord) &CLG_(current_state).jmps_passed, 1211 jmps_passed, hWordTy); 1212 } 1213 CLG_ASSERT(clgs.bb->cjmp_count == cJumps); 1214 CLG_ASSERT(clgs.bb->instr_count = clgs.ii_index); 1215 1216 /* This stores the instr of the call/ret at BB end */ 1217 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1; 1218 1219 if (clgs.seen_before) { 1220 CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs)); 1221 CLG_ASSERT(clgs.bb->instr_len = clgs.instr_offset); 1222 CLG_ASSERT(clgs.bb->jmpkind == sbIn->jumpkind); 1223 } 1224 else { 1225 clgs.bb->cost_count = update_cost_offsets(&clgs); 1226 clgs.bb->instr_len = clgs.instr_offset; 1227 clgs.bb->jmpkind = sbIn->jumpkind; 1228 } 1229 1230 CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n", 1231 origAddr, clgs.bb->instr_len, 1232 clgs.bb->cjmp_count, clgs.bb->cost_count); 1233 if (cJumps>0) { 1234 CLG_DEBUG(3, " [ "); 1235 for (i=0;i<cJumps;i++) 1236 CLG_DEBUG(3, "%d ", clgs.bb->jmp[i].instr); 1237 CLG_DEBUG(3, "], last inverted: %s \n", 1238 clgs.bb->cjmp_inverted ? "yes":"no"); 1239 } 1240 1241 return clgs.sbOut; 1242 } 1243 1244 /*--------------------------------------------------------------------*/ 1245 /*--- Discarding BB info ---*/ 1246 /*--------------------------------------------------------------------*/ 1247 1248 // Called when a translation is removed from the translation cache for 1249 // any reason at all: to free up space, because the guest code was 1250 // unmapped or modified, or for any arbitrary reason. 1251 static 1252 void clg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge ) 1253 { 1254 Addr orig_addr = (Addr)orig_addr64; 1255 1256 tl_assert(vge.n_used > 0); 1257 1258 if (0) 1259 VG_(printf)( "discard_superblock_info: %p, %p, %llu\n", 1260 (void*)(Addr)orig_addr, 1261 (void*)(Addr)vge.base[0], (ULong)vge.len[0]); 1262 1263 // Get BB info, remove from table, free BB info. Simple! Note that we 1264 // use orig_addr, not the first instruction address in vge. 1265 CLG_(delete_bb)(orig_addr); 1266 } 1267 1268 1269 /*------------------------------------------------------------*/ 1270 /*--- CLG_(fini)() and related function ---*/ 1271 /*------------------------------------------------------------*/ 1272 1273 1274 1275 static void zero_thread_cost(thread_info* t) 1276 { 1277 Int i; 1278 1279 for(i = 0; i < CLG_(current_call_stack).sp; i++) { 1280 if (!CLG_(current_call_stack).entry[i].jcc) continue; 1281 1282 /* reset call counters to current for active calls */ 1283 CLG_(copy_cost)( CLG_(sets).full, 1284 CLG_(current_call_stack).entry[i].enter_cost, 1285 CLG_(current_state).cost ); 1286 CLG_(current_call_stack).entry[i].jcc->call_counter = 0; 1287 } 1288 1289 CLG_(forall_bbccs)(CLG_(zero_bbcc)); 1290 1291 /* set counter for last dump */ 1292 CLG_(copy_cost)( CLG_(sets).full, 1293 t->lastdump_cost, CLG_(current_state).cost ); 1294 } 1295 1296 void CLG_(zero_all_cost)(Bool only_current_thread) 1297 { 1298 if (VG_(clo_verbosity) > 1) 1299 VG_(message)(Vg_DebugMsg, " Zeroing costs...\n"); 1300 1301 if (only_current_thread) 1302 zero_thread_cost(CLG_(get_current_thread)()); 1303 else 1304 CLG_(forall_threads)(zero_thread_cost); 1305 1306 if (VG_(clo_verbosity) > 1) 1307 VG_(message)(Vg_DebugMsg, " ...done\n"); 1308 } 1309 1310 static 1311 void unwind_thread(thread_info* t) 1312 { 1313 /* unwind signal handlers */ 1314 while(CLG_(current_state).sig !=0) 1315 CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig); 1316 1317 /* unwind regular call stack */ 1318 while(CLG_(current_call_stack).sp>0) 1319 CLG_(pop_call_stack)(); 1320 1321 /* reset context and function stack for context generation */ 1322 CLG_(init_exec_state)( &CLG_(current_state) ); 1323 CLG_(current_fn_stack).top = CLG_(current_fn_stack).bottom; 1324 } 1325 1326 static 1327 void zero_state_cost(thread_info* t) 1328 { 1329 CLG_(zero_cost)( CLG_(sets).full, CLG_(current_state).cost ); 1330 } 1331 1332 /* Ups, this can go wrong... */ 1333 extern void VG_(discard_translations) ( Addr64 start, ULong range ); 1334 1335 void CLG_(set_instrument_state)(Char* reason, Bool state) 1336 { 1337 if (CLG_(instrument_state) == state) { 1338 CLG_DEBUG(2, "%s: instrumentation already %s\n", 1339 reason, state ? "ON" : "OFF"); 1340 return; 1341 } 1342 CLG_(instrument_state) = state; 1343 CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n", 1344 reason, state ? "ON" : "OFF"); 1345 1346 VG_(discard_translations)( (Addr64)0x1000, (ULong) ~0xfffl); 1347 1348 /* reset internal state: call stacks, simulator */ 1349 CLG_(forall_threads)(unwind_thread); 1350 CLG_(forall_threads)(zero_state_cost); 1351 (*CLG_(cachesim).clear)(); 1352 1353 if (VG_(clo_verbosity) > 1) 1354 VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n", 1355 reason, state ? "ON" : "OFF"); 1356 } 1357 1358 1359 static 1360 Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret) 1361 { 1362 if (!VG_IS_TOOL_USERREQ('C','T',args[0])) 1363 return False; 1364 1365 switch(args[0]) { 1366 case VG_USERREQ__DUMP_STATS: 1367 CLG_(dump_profile)("Client Request", True); 1368 *ret = 0; /* meaningless */ 1369 break; 1370 1371 case VG_USERREQ__DUMP_STATS_AT: 1372 { 1373 Char buf[512]; 1374 VG_(sprintf)(buf,"Client Request: %s", (Char*)args[1]); 1375 CLG_(dump_profile)(buf, True); 1376 *ret = 0; /* meaningless */ 1377 } 1378 break; 1379 1380 case VG_USERREQ__ZERO_STATS: 1381 CLG_(zero_all_cost)(True); 1382 *ret = 0; /* meaningless */ 1383 break; 1384 1385 case VG_USERREQ__TOGGLE_COLLECT: 1386 CLG_(current_state).collect = !CLG_(current_state).collect; 1387 CLG_DEBUG(2, "Client Request: toggled collection state to %s\n", 1388 CLG_(current_state).collect ? "ON" : "OFF"); 1389 *ret = 0; /* meaningless */ 1390 break; 1391 1392 case VG_USERREQ__START_INSTRUMENTATION: 1393 CLG_(set_instrument_state)("Client Request", True); 1394 *ret = 0; /* meaningless */ 1395 break; 1396 1397 case VG_USERREQ__STOP_INSTRUMENTATION: 1398 CLG_(set_instrument_state)("Client Request", False); 1399 *ret = 0; /* meaningless */ 1400 break; 1401 1402 default: 1403 return False; 1404 } 1405 1406 return True; 1407 } 1408 1409 1410 /* Syscall Timing */ 1411 1412 /* struct timeval syscalltime[VG_N_THREADS]; */ 1413 #if CLG_MICROSYSTIME 1414 #include <sys/time.h> 1415 #include <sys/syscall.h> 1416 extern Int VG_(do_syscall) ( UInt, ... ); 1417 1418 ULong syscalltime[VG_N_THREADS]; 1419 #else 1420 UInt syscalltime[VG_N_THREADS]; 1421 #endif 1422 1423 static 1424 void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno, 1425 UWord* args, UInt nArgs) 1426 { 1427 if (CLG_(clo).collect_systime) { 1428 #if CLG_MICROSYSTIME 1429 struct vki_timeval tv_now; 1430 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL); 1431 syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec; 1432 #else 1433 syscalltime[tid] = VG_(read_millisecond_timer)(); 1434 #endif 1435 } 1436 } 1437 1438 static 1439 void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno, 1440 UWord* args, UInt nArgs, SysRes res) 1441 { 1442 if (CLG_(clo).collect_systime && 1443 CLG_(current_state).bbcc) { 1444 Int o; 1445 #if CLG_MICROSYSTIME 1446 struct vki_timeval tv_now; 1447 ULong diff; 1448 1449 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL); 1450 diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid]; 1451 #else 1452 UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid]; 1453 #endif 1454 1455 /* offset o is for "SysCount", o+1 for "SysTime" */ 1456 o = fullOffset(EG_SYS); 1457 CLG_ASSERT(o>=0); 1458 CLG_DEBUG(0," Time (Off %d) for Syscall %d: %ull\n", o, syscallno, diff); 1459 1460 CLG_(current_state).cost[o] ++; 1461 CLG_(current_state).cost[o+1] += diff; 1462 if (!CLG_(current_state).bbcc->skipped) 1463 CLG_(init_cost_lz)(CLG_(sets).full, 1464 &(CLG_(current_state).bbcc->skipped)); 1465 CLG_(current_state).bbcc->skipped[o] ++; 1466 CLG_(current_state).bbcc->skipped[o+1] += diff; 1467 } 1468 } 1469 1470 static UInt ULong_width(ULong n) 1471 { 1472 UInt w = 0; 1473 while (n > 0) { 1474 n = n / 10; 1475 w++; 1476 } 1477 if (w == 0) w = 1; 1478 return w + (w-1)/3; // add space for commas 1479 } 1480 1481 static 1482 void branchsim_printstat(int l1, int l2, int l3) 1483 { 1484 static Char buf1[128], buf2[128], buf3[128], fmt[128]; 1485 FullCost total; 1486 ULong Bc_total_b, Bc_total_mp, Bi_total_b, Bi_total_mp; 1487 ULong B_total_b, B_total_mp; 1488 1489 total = CLG_(total_cost); 1490 Bc_total_b = total[ fullOffset(EG_BC) ]; 1491 Bc_total_mp = total[ fullOffset(EG_BC)+1 ]; 1492 Bi_total_b = total[ fullOffset(EG_BI) ]; 1493 Bi_total_mp = total[ fullOffset(EG_BI)+1 ]; 1494 1495 /* Make format string, getting width right for numbers */ 1496 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n", 1497 l1, l2, l3); 1498 1499 if (0 == Bc_total_b) Bc_total_b = 1; 1500 if (0 == Bi_total_b) Bi_total_b = 1; 1501 B_total_b = Bc_total_b + Bi_total_b; 1502 B_total_mp = Bc_total_mp + Bi_total_mp; 1503 1504 VG_(umsg)("\n"); 1505 VG_(umsg)(fmt, "Branches: ", 1506 B_total_b, Bc_total_b, Bi_total_b); 1507 1508 VG_(umsg)(fmt, "Mispredicts: ", 1509 B_total_mp, Bc_total_mp, Bi_total_mp); 1510 1511 VG_(percentify)(B_total_mp, B_total_b, 1, l1+1, buf1); 1512 VG_(percentify)(Bc_total_mp, Bc_total_b, 1, l2+1, buf2); 1513 VG_(percentify)(Bi_total_mp, Bi_total_b, 1, l3+1, buf3); 1514 1515 VG_(umsg)("Mispred rate: %s (%s + %s )\n", buf1, buf2,buf3); 1516 } 1517 1518 1519 static 1520 void finish(void) 1521 { 1522 Char buf[32+COSTS_LEN], fmt[128]; 1523 Int l1, l2, l3; 1524 FullCost total; 1525 1526 CLG_DEBUG(0, "finish()\n"); 1527 1528 (*CLG_(cachesim).finish)(); 1529 1530 /* pop all remaining items from CallStack for correct sum 1531 */ 1532 CLG_(forall_threads)(unwind_thread); 1533 1534 CLG_(dump_profile)(0, False); 1535 1536 CLG_(finish_command)(); 1537 1538 if (VG_(clo_verbosity) == 0) return; 1539 1540 /* Hash table stats */ 1541 if (VG_(clo_stats)) { 1542 int BB_lookups = 1543 CLG_(stat).full_debug_BBs + 1544 CLG_(stat).fn_name_debug_BBs + 1545 CLG_(stat).file_line_debug_BBs + 1546 CLG_(stat).no_debug_BBs; 1547 1548 VG_(message)(Vg_DebugMsg, "\n"); 1549 VG_(message)(Vg_DebugMsg, "Distinct objects: %d\n", 1550 CLG_(stat).distinct_objs); 1551 VG_(message)(Vg_DebugMsg, "Distinct files: %d\n", 1552 CLG_(stat).distinct_files); 1553 VG_(message)(Vg_DebugMsg, "Distinct fns: %d\n", 1554 CLG_(stat).distinct_fns); 1555 VG_(message)(Vg_DebugMsg, "Distinct contexts:%d\n", 1556 CLG_(stat).distinct_contexts); 1557 VG_(message)(Vg_DebugMsg, "Distinct BBs: %d\n", 1558 CLG_(stat).distinct_bbs); 1559 VG_(message)(Vg_DebugMsg, "Cost entries: %d (Chunks %d)\n", 1560 CLG_(costarray_entries), CLG_(costarray_chunks)); 1561 VG_(message)(Vg_DebugMsg, "Distinct BBCCs: %d\n", 1562 CLG_(stat).distinct_bbccs); 1563 VG_(message)(Vg_DebugMsg, "Distinct JCCs: %d\n", 1564 CLG_(stat).distinct_jccs); 1565 VG_(message)(Vg_DebugMsg, "Distinct skips: %d\n", 1566 CLG_(stat).distinct_skips); 1567 VG_(message)(Vg_DebugMsg, "BB lookups: %d\n", 1568 BB_lookups); 1569 if (BB_lookups>0) { 1570 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)\n", 1571 CLG_(stat).full_debug_BBs * 100 / BB_lookups, 1572 CLG_(stat).full_debug_BBs); 1573 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)\n", 1574 CLG_(stat).file_line_debug_BBs * 100 / BB_lookups, 1575 CLG_(stat).file_line_debug_BBs); 1576 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)\n", 1577 CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups, 1578 CLG_(stat).fn_name_debug_BBs); 1579 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)\n", 1580 CLG_(stat).no_debug_BBs * 100 / BB_lookups, 1581 CLG_(stat).no_debug_BBs); 1582 } 1583 VG_(message)(Vg_DebugMsg, "BBCC Clones: %d\n", 1584 CLG_(stat).bbcc_clones); 1585 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d\n", 1586 CLG_(stat).bb_retranslations); 1587 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d\n", 1588 CLG_(stat).distinct_instrs); 1589 VG_(message)(Vg_DebugMsg, ""); 1590 1591 VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d\n", 1592 CLG_(stat).cxt_lru_misses); 1593 VG_(message)(Vg_DebugMsg, "LRU BBCC Misses: %d\n", 1594 CLG_(stat).bbcc_lru_misses); 1595 VG_(message)(Vg_DebugMsg, "LRU JCC Misses: %d\n", 1596 CLG_(stat).jcc_lru_misses); 1597 VG_(message)(Vg_DebugMsg, "BBs Executed: %llu\n", 1598 CLG_(stat).bb_executions); 1599 VG_(message)(Vg_DebugMsg, "Calls: %llu\n", 1600 CLG_(stat).call_counter); 1601 VG_(message)(Vg_DebugMsg, "CondJMP followed: %llu\n", 1602 CLG_(stat).jcnd_counter); 1603 VG_(message)(Vg_DebugMsg, "Boring JMPs: %llu\n", 1604 CLG_(stat).jump_counter); 1605 VG_(message)(Vg_DebugMsg, "Recursive calls: %llu\n", 1606 CLG_(stat).rec_call_counter); 1607 VG_(message)(Vg_DebugMsg, "Returns: %llu\n", 1608 CLG_(stat).ret_counter); 1609 1610 VG_(message)(Vg_DebugMsg, ""); 1611 } 1612 1613 CLG_(sprint_eventmapping)(buf, CLG_(dumpmap)); 1614 VG_(message)(Vg_UserMsg, "Events : %s\n", buf); 1615 CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), CLG_(total_cost)); 1616 VG_(message)(Vg_UserMsg, "Collected : %s\n", buf); 1617 VG_(message)(Vg_UserMsg, "\n"); 1618 1619 /* determine value widths for statistics */ 1620 total = CLG_(total_cost); 1621 l1 = ULong_width( total[fullOffset(EG_IR)] ); 1622 l2 = l3 = 0; 1623 if (CLG_(clo).simulate_cache) { 1624 l2 = ULong_width( total[fullOffset(EG_DR)] ); 1625 l3 = ULong_width( total[fullOffset(EG_DW)] ); 1626 } 1627 if (CLG_(clo).simulate_branch) { 1628 int l2b = ULong_width( total[fullOffset(EG_BC)] ); 1629 int l3b = ULong_width( total[fullOffset(EG_BI)] ); 1630 if (l2b > l2) l2 = l2b; 1631 if (l3b > l3) l3 = l3b; 1632 } 1633 1634 /* Make format string, getting width right for numbers */ 1635 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1); 1636 1637 /* Always print this */ 1638 VG_(umsg)(fmt, "I refs: ", total[fullOffset(EG_IR)] ); 1639 1640 if (CLG_(clo).simulate_cache) 1641 (*CLG_(cachesim).printstat)(l1, l2, l3); 1642 1643 if (CLG_(clo).simulate_branch) 1644 branchsim_printstat(l1, l2, l3); 1645 1646 } 1647 1648 1649 void CLG_(fini)(Int exitcode) 1650 { 1651 finish(); 1652 } 1653 1654 1655 /*--------------------------------------------------------------------*/ 1656 /*--- Setup ---*/ 1657 /*--------------------------------------------------------------------*/ 1658 1659 static void clg_start_client_code_callback ( ThreadId tid, ULong blocks_done ) 1660 { 1661 static ULong last_blocks_done = 0; 1662 1663 if (0) 1664 VG_(printf)("%d R %llu\n", (Int)tid, blocks_done); 1665 1666 /* throttle calls to CLG_(run_thread) by number of BBs executed */ 1667 if (blocks_done - last_blocks_done < 5000) return; 1668 last_blocks_done = blocks_done; 1669 1670 CLG_(run_thread)( tid ); 1671 } 1672 1673 static 1674 void CLG_(post_clo_init)(void) 1675 { 1676 VG_(clo_vex_control).iropt_unroll_thresh = 0; 1677 VG_(clo_vex_control).guest_chase_thresh = 0; 1678 1679 CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No"); 1680 CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo).separate_callers); 1681 CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo).separate_recursions); 1682 1683 if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) { 1684 VG_(message)(Vg_UserMsg, "Using source line as position.\n"); 1685 CLG_(clo).dump_line = True; 1686 } 1687 1688 CLG_(init_dumps)(); 1689 CLG_(init_command)(); 1690 1691 (*CLG_(cachesim).post_clo_init)(); 1692 1693 CLG_(init_eventsets)(); 1694 CLG_(init_statistics)(& CLG_(stat)); 1695 CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) ); 1696 1697 /* initialize hash tables */ 1698 CLG_(init_obj_table)(); 1699 CLG_(init_cxt_table)(); 1700 CLG_(init_bb_hash)(); 1701 1702 CLG_(init_threads)(); 1703 CLG_(run_thread)(1); 1704 1705 CLG_(instrument_state) = CLG_(clo).instrument_atstart; 1706 1707 if (VG_(clo_verbosity > 0)) { 1708 VG_(message)(Vg_UserMsg, 1709 "For interactive control, run 'callgrind_control -h'.\n"); 1710 } 1711 } 1712 1713 static 1714 void CLG_(pre_clo_init)(void) 1715 { 1716 VG_(details_name) ("Callgrind"); 1717 VG_(details_version) (NULL); 1718 VG_(details_description) ("a call-graph generating cache profiler"); 1719 VG_(details_copyright_author)("Copyright (C) 2002-2010, and GNU GPL'd, " 1720 "by Josef Weidendorfer et al."); 1721 VG_(details_bug_reports_to) (VG_BUGS_TO); 1722 VG_(details_avg_translation_sizeB) ( 500 ); 1723 1724 VG_(basic_tool_funcs) (CLG_(post_clo_init), 1725 CLG_(instrument), 1726 CLG_(fini)); 1727 1728 VG_(needs_superblock_discards)(clg_discard_superblock_info); 1729 1730 1731 VG_(needs_command_line_options)(CLG_(process_cmd_line_option), 1732 CLG_(print_usage), 1733 CLG_(print_debug_usage)); 1734 1735 VG_(needs_client_requests)(CLG_(handle_client_request)); 1736 VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime), 1737 CLG_(post_syscalltime)); 1738 1739 VG_(track_start_client_code) ( & clg_start_client_code_callback ); 1740 VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) ); 1741 VG_(track_post_deliver_signal)( & CLG_(post_signal) ); 1742 1743 CLG_(set_clo_defaults)(); 1744 } 1745 1746 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init)) 1747 1748 /*--------------------------------------------------------------------*/ 1749 /*--- end main.c ---*/ 1750 /*--------------------------------------------------------------------*/ 1751