1 2 /*--------------------------------------------------------------------*/ 3 /*--- Callgrind ---*/ 4 /*--- main.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Callgrind, a Valgrind tool for call graph 9 profiling programs. 10 11 Copyright (C) 2002-2015, Josef Weidendorfer (Josef.Weidendorfer (at) gmx.de) 12 13 This tool is derived from and contains code from Cachegrind 14 Copyright (C) 2002-2015 Nicholas Nethercote (njn (at) valgrind.org) 15 16 This program is free software; you can redistribute it and/or 17 modify it under the terms of the GNU General Public License as 18 published by the Free Software Foundation; either version 2 of the 19 License, or (at your option) any later version. 20 21 This program is distributed in the hope that it will be useful, but 22 WITHOUT ANY WARRANTY; without even the implied warranty of 23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 24 General Public License for more details. 25 26 You should have received a copy of the GNU General Public License 27 along with this program; if not, write to the Free Software 28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 29 02111-1307, USA. 30 31 The GNU General Public License is contained in the file COPYING. 32 */ 33 34 #include "config.h" 35 #include "callgrind.h" 36 #include "global.h" 37 38 #include "pub_tool_threadstate.h" 39 #include "pub_tool_gdbserver.h" 40 #include "pub_tool_transtab.h" // VG_(discard_translations_safely) 41 42 #include "cg_branchpred.c" 43 44 /*------------------------------------------------------------*/ 45 /*--- Global variables ---*/ 46 /*------------------------------------------------------------*/ 47 48 /* for all threads */ 49 CommandLineOptions CLG_(clo); 50 Statistics CLG_(stat); 51 Bool CLG_(instrument_state) = True; /* Instrumentation on ? */ 52 53 /* thread and signal handler specific */ 54 exec_state CLG_(current_state); 55 56 /* min of L1 and LL cache line sizes. This only gets set to a 57 non-zero value if we are doing cache simulation. */ 58 Int CLG_(min_line_size) = 0; 59 60 61 /*------------------------------------------------------------*/ 62 /*--- Statistics ---*/ 63 /*------------------------------------------------------------*/ 64 65 static void CLG_(init_statistics)(Statistics* s) 66 { 67 s->call_counter = 0; 68 s->jcnd_counter = 0; 69 s->jump_counter = 0; 70 s->rec_call_counter = 0; 71 s->ret_counter = 0; 72 s->bb_executions = 0; 73 74 s->context_counter = 0; 75 s->bb_retranslations = 0; 76 77 s->distinct_objs = 0; 78 s->distinct_files = 0; 79 s->distinct_fns = 0; 80 s->distinct_contexts = 0; 81 s->distinct_bbs = 0; 82 s->distinct_bbccs = 0; 83 s->distinct_instrs = 0; 84 s->distinct_skips = 0; 85 86 s->bb_hash_resizes = 0; 87 s->bbcc_hash_resizes = 0; 88 s->jcc_hash_resizes = 0; 89 s->cxt_hash_resizes = 0; 90 s->fn_array_resizes = 0; 91 s->call_stack_resizes = 0; 92 s->fn_stack_resizes = 0; 93 94 s->full_debug_BBs = 0; 95 s->file_line_debug_BBs = 0; 96 s->fn_name_debug_BBs = 0; 97 s->no_debug_BBs = 0; 98 s->bbcc_lru_misses = 0; 99 s->jcc_lru_misses = 0; 100 s->cxt_lru_misses = 0; 101 s->bbcc_clones = 0; 102 } 103 104 105 /*------------------------------------------------------------*/ 106 /*--- Simple callbacks (not cache similator) ---*/ 107 /*------------------------------------------------------------*/ 108 109 VG_REGPARM(1) 110 static void log_global_event(InstrInfo* ii) 111 { 112 ULong* cost_Bus; 113 114 CLG_DEBUG(6, "log_global_event: Ir %#lx/%u\n", 115 CLG_(bb_base) + ii->instr_offset, ii->instr_size); 116 117 if (!CLG_(current_state).collect) return; 118 119 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BUS))>0 ); 120 121 CLG_(current_state).cost[ fullOffset(EG_BUS) ]++; 122 123 if (CLG_(current_state).nonskipped) 124 cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS); 125 else 126 cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS]; 127 cost_Bus[0]++; 128 } 129 130 131 /* For branches, we consult two different predictors, one which 132 predicts taken/untaken for conditional branches, and the other 133 which predicts the branch target address for indirect branches 134 (jump-to-register style ones). */ 135 136 static VG_REGPARM(2) 137 void log_cond_branch(InstrInfo* ii, Word taken) 138 { 139 Bool miss; 140 Int fullOffset_Bc; 141 ULong* cost_Bc; 142 143 CLG_DEBUG(6, "log_cond_branch: Ir %#lx, taken %ld\n", 144 CLG_(bb_base) + ii->instr_offset, taken); 145 146 miss = 1 & do_cond_branch_predict(CLG_(bb_base) + ii->instr_offset, taken); 147 148 if (!CLG_(current_state).collect) return; 149 150 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BC))>0 ); 151 152 if (CLG_(current_state).nonskipped) 153 cost_Bc = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BC); 154 else 155 cost_Bc = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BC]; 156 157 fullOffset_Bc = fullOffset(EG_BC); 158 CLG_(current_state).cost[ fullOffset_Bc ]++; 159 cost_Bc[0]++; 160 if (miss) { 161 CLG_(current_state).cost[ fullOffset_Bc+1 ]++; 162 cost_Bc[1]++; 163 } 164 } 165 166 static VG_REGPARM(2) 167 void log_ind_branch(InstrInfo* ii, UWord actual_dst) 168 { 169 Bool miss; 170 Int fullOffset_Bi; 171 ULong* cost_Bi; 172 173 CLG_DEBUG(6, "log_ind_branch: Ir %#lx, dst %#lx\n", 174 CLG_(bb_base) + ii->instr_offset, actual_dst); 175 176 miss = 1 & do_ind_branch_predict(CLG_(bb_base) + ii->instr_offset, actual_dst); 177 178 if (!CLG_(current_state).collect) return; 179 180 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BI))>0 ); 181 182 if (CLG_(current_state).nonskipped) 183 cost_Bi = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BI); 184 else 185 cost_Bi = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BI]; 186 187 fullOffset_Bi = fullOffset(EG_BI); 188 CLG_(current_state).cost[ fullOffset_Bi ]++; 189 cost_Bi[0]++; 190 if (miss) { 191 CLG_(current_state).cost[ fullOffset_Bi+1 ]++; 192 cost_Bi[1]++; 193 } 194 } 195 196 /*------------------------------------------------------------*/ 197 /*--- Instrumentation structures and event queue handling ---*/ 198 /*------------------------------------------------------------*/ 199 200 /* Maintain an ordered list of memory events which are outstanding, in 201 the sense that no IR has yet been generated to do the relevant 202 helper calls. The BB is scanned top to bottom and memory events 203 are added to the end of the list, merging with the most recent 204 notified event where possible (Dw immediately following Dr and 205 having the same size and EA can be merged). 206 207 This merging is done so that for architectures which have 208 load-op-store instructions (x86, amd64), the insn is treated as if 209 it makes just one memory reference (a modify), rather than two (a 210 read followed by a write at the same address). 211 212 At various points the list will need to be flushed, that is, IR 213 generated from it. That must happen before any possible exit from 214 the block (the end, or an IRStmt_Exit). Flushing also takes place 215 when there is no space to add a new event. 216 217 If we require the simulation statistics to be up to date with 218 respect to possible memory exceptions, then the list would have to 219 be flushed before each memory reference. That would however lose 220 performance by inhibiting event-merging during flushing. 221 222 Flushing the list consists of walking it start to end and emitting 223 instrumentation IR for each event, in the order in which they 224 appear. It may be possible to emit a single call for two adjacent 225 events in order to reduce the number of helper function calls made. 226 For example, it could well be profitable to handle two adjacent Ir 227 events with a single helper call. */ 228 229 typedef 230 IRExpr 231 IRAtom; 232 233 typedef 234 enum { 235 Ev_Ir, // Instruction read 236 Ev_Dr, // Data read 237 Ev_Dw, // Data write 238 Ev_Dm, // Data modify (read then write) 239 Ev_Bc, // branch conditional 240 Ev_Bi, // branch indirect (to unknown destination) 241 Ev_G // Global bus event 242 } 243 EventTag; 244 245 typedef 246 struct { 247 EventTag tag; 248 InstrInfo* inode; 249 union { 250 struct { 251 } Ir; 252 struct { 253 IRAtom* ea; 254 Int szB; 255 } Dr; 256 struct { 257 IRAtom* ea; 258 Int szB; 259 } Dw; 260 struct { 261 IRAtom* ea; 262 Int szB; 263 } Dm; 264 struct { 265 IRAtom* taken; /* :: Ity_I1 */ 266 } Bc; 267 struct { 268 IRAtom* dst; 269 } Bi; 270 struct { 271 } G; 272 } Ev; 273 } 274 Event; 275 276 static void init_Event ( Event* ev ) { 277 VG_(memset)(ev, 0, sizeof(Event)); 278 } 279 280 static IRAtom* get_Event_dea ( Event* ev ) { 281 switch (ev->tag) { 282 case Ev_Dr: return ev->Ev.Dr.ea; 283 case Ev_Dw: return ev->Ev.Dw.ea; 284 case Ev_Dm: return ev->Ev.Dm.ea; 285 default: tl_assert(0); 286 } 287 } 288 289 static Int get_Event_dszB ( Event* ev ) { 290 switch (ev->tag) { 291 case Ev_Dr: return ev->Ev.Dr.szB; 292 case Ev_Dw: return ev->Ev.Dw.szB; 293 case Ev_Dm: return ev->Ev.Dm.szB; 294 default: tl_assert(0); 295 } 296 } 297 298 299 /* Up to this many unnotified events are allowed. Number is 300 arbitrary. Larger numbers allow more event merging to occur, but 301 potentially induce more spilling due to extending live ranges of 302 address temporaries. */ 303 #define N_EVENTS 16 304 305 306 /* A struct which holds all the running state during instrumentation. 307 Mostly to avoid passing loads of parameters everywhere. */ 308 typedef struct { 309 /* The current outstanding-memory-event list. */ 310 Event events[N_EVENTS]; 311 Int events_used; 312 313 /* The array of InstrInfo's is part of BB struct. */ 314 BB* bb; 315 316 /* BB seen before (ie. re-instrumentation) */ 317 Bool seen_before; 318 319 /* Number InstrInfo bins 'used' so far. */ 320 UInt ii_index; 321 322 // current offset of guest instructions from BB start 323 UInt instr_offset; 324 325 /* The output SB being constructed. */ 326 IRSB* sbOut; 327 } ClgState; 328 329 330 static void showEvent ( Event* ev ) 331 { 332 switch (ev->tag) { 333 case Ev_Ir: 334 VG_(printf)("Ir (InstrInfo %p) at +%u\n", 335 ev->inode, ev->inode->instr_offset); 336 break; 337 case Ev_Dr: 338 VG_(printf)("Dr (InstrInfo %p) at +%u %d EA=", 339 ev->inode, ev->inode->instr_offset, ev->Ev.Dr.szB); 340 ppIRExpr(ev->Ev.Dr.ea); 341 VG_(printf)("\n"); 342 break; 343 case Ev_Dw: 344 VG_(printf)("Dw (InstrInfo %p) at +%u %d EA=", 345 ev->inode, ev->inode->instr_offset, ev->Ev.Dw.szB); 346 ppIRExpr(ev->Ev.Dw.ea); 347 VG_(printf)("\n"); 348 break; 349 case Ev_Dm: 350 VG_(printf)("Dm (InstrInfo %p) at +%u %d EA=", 351 ev->inode, ev->inode->instr_offset, ev->Ev.Dm.szB); 352 ppIRExpr(ev->Ev.Dm.ea); 353 VG_(printf)("\n"); 354 break; 355 case Ev_Bc: 356 VG_(printf)("Bc %p GA=", ev->inode); 357 ppIRExpr(ev->Ev.Bc.taken); 358 VG_(printf)("\n"); 359 break; 360 case Ev_Bi: 361 VG_(printf)("Bi %p DST=", ev->inode); 362 ppIRExpr(ev->Ev.Bi.dst); 363 VG_(printf)("\n"); 364 break; 365 case Ev_G: 366 VG_(printf)("G %p\n", ev->inode); 367 break; 368 default: 369 tl_assert(0); 370 break; 371 } 372 } 373 374 /* Generate code for all outstanding memory events, and mark the queue 375 empty. Code is generated into cgs->sbOut, and this activity 376 'consumes' slots in cgs->bb. */ 377 378 static void flushEvents ( ClgState* clgs ) 379 { 380 Int i, regparms, inew; 381 const HChar* helperName; 382 void* helperAddr; 383 IRExpr** argv; 384 IRExpr* i_node_expr; 385 IRDirty* di; 386 Event* ev; 387 Event* ev2; 388 Event* ev3; 389 390 if (!clgs->seen_before) { 391 // extend event sets as needed 392 // available sets: D0 Dr 393 for(i=0; i<clgs->events_used; i++) { 394 ev = &clgs->events[i]; 395 switch(ev->tag) { 396 case Ev_Ir: 397 // Ir event always is first for a guest instruction 398 CLG_ASSERT(ev->inode->eventset == 0); 399 ev->inode->eventset = CLG_(sets).base; 400 break; 401 case Ev_Dr: 402 // extend event set by Dr counters 403 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 404 EG_DR); 405 break; 406 case Ev_Dw: 407 case Ev_Dm: 408 // extend event set by Dw counters 409 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 410 EG_DW); 411 break; 412 case Ev_Bc: 413 // extend event set by Bc counters 414 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 415 EG_BC); 416 break; 417 case Ev_Bi: 418 // extend event set by Bi counters 419 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 420 EG_BI); 421 break; 422 case Ev_G: 423 // extend event set by Bus counter 424 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset, 425 EG_BUS); 426 break; 427 default: 428 tl_assert(0); 429 } 430 } 431 } 432 433 for(i = 0; i < clgs->events_used; i = inew) { 434 435 helperName = NULL; 436 helperAddr = NULL; 437 argv = NULL; 438 regparms = 0; 439 440 /* generate IR to notify event i and possibly the ones 441 immediately following it. */ 442 tl_assert(i >= 0 && i < clgs->events_used); 443 444 ev = &clgs->events[i]; 445 ev2 = ( i < clgs->events_used-1 ? &clgs->events[i+1] : NULL ); 446 ev3 = ( i < clgs->events_used-2 ? &clgs->events[i+2] : NULL ); 447 448 CLG_DEBUGIF(5) { 449 VG_(printf)(" flush "); 450 showEvent( ev ); 451 } 452 453 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode ); 454 455 /* Decide on helper fn to call and args to pass it, and advance 456 i appropriately. 457 Dm events have same effect as Dw events */ 458 switch (ev->tag) { 459 case Ev_Ir: 460 /* Merge an Ir with a following Dr. */ 461 if (ev2 && ev2->tag == Ev_Dr) { 462 /* Why is this true? It's because we're merging an Ir 463 with a following Dr. The Ir derives from the 464 instruction's IMark and the Dr from data 465 references which follow it. In short it holds 466 because each insn starts with an IMark, hence an 467 Ev_Ir, and so these Dr must pertain to the 468 immediately preceding Ir. Same applies to analogous 469 assertions in the subsequent cases. */ 470 tl_assert(ev2->inode == ev->inode); 471 helperName = CLG_(cachesim).log_1I1Dr_name; 472 helperAddr = CLG_(cachesim).log_1I1Dr; 473 argv = mkIRExprVec_3( i_node_expr, 474 get_Event_dea(ev2), 475 mkIRExpr_HWord( get_Event_dszB(ev2) ) ); 476 regparms = 3; 477 inew = i+2; 478 } 479 /* Merge an Ir with a following Dw/Dm. */ 480 else 481 if (ev2 && (ev2->tag == Ev_Dw || ev2->tag == Ev_Dm)) { 482 tl_assert(ev2->inode == ev->inode); 483 helperName = CLG_(cachesim).log_1I1Dw_name; 484 helperAddr = CLG_(cachesim).log_1I1Dw; 485 argv = mkIRExprVec_3( i_node_expr, 486 get_Event_dea(ev2), 487 mkIRExpr_HWord( get_Event_dszB(ev2) ) ); 488 regparms = 3; 489 inew = i+2; 490 } 491 /* Merge an Ir with two following Irs. */ 492 else 493 if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) { 494 helperName = CLG_(cachesim).log_3I0D_name; 495 helperAddr = CLG_(cachesim).log_3I0D; 496 argv = mkIRExprVec_3( i_node_expr, 497 mkIRExpr_HWord( (HWord)ev2->inode ), 498 mkIRExpr_HWord( (HWord)ev3->inode ) ); 499 regparms = 3; 500 inew = i+3; 501 } 502 /* Merge an Ir with one following Ir. */ 503 else 504 if (ev2 && ev2->tag == Ev_Ir) { 505 helperName = CLG_(cachesim).log_2I0D_name; 506 helperAddr = CLG_(cachesim).log_2I0D; 507 argv = mkIRExprVec_2( i_node_expr, 508 mkIRExpr_HWord( (HWord)ev2->inode ) ); 509 regparms = 2; 510 inew = i+2; 511 } 512 /* No merging possible; emit as-is. */ 513 else { 514 helperName = CLG_(cachesim).log_1I0D_name; 515 helperAddr = CLG_(cachesim).log_1I0D; 516 argv = mkIRExprVec_1( i_node_expr ); 517 regparms = 1; 518 inew = i+1; 519 } 520 break; 521 case Ev_Dr: 522 /* Data read or modify */ 523 helperName = CLG_(cachesim).log_0I1Dr_name; 524 helperAddr = CLG_(cachesim).log_0I1Dr; 525 argv = mkIRExprVec_3( i_node_expr, 526 get_Event_dea(ev), 527 mkIRExpr_HWord( get_Event_dszB(ev) ) ); 528 regparms = 3; 529 inew = i+1; 530 break; 531 case Ev_Dw: 532 case Ev_Dm: 533 /* Data write */ 534 helperName = CLG_(cachesim).log_0I1Dw_name; 535 helperAddr = CLG_(cachesim).log_0I1Dw; 536 argv = mkIRExprVec_3( i_node_expr, 537 get_Event_dea(ev), 538 mkIRExpr_HWord( get_Event_dszB(ev) ) ); 539 regparms = 3; 540 inew = i+1; 541 break; 542 case Ev_Bc: 543 /* Conditional branch */ 544 helperName = "log_cond_branch"; 545 helperAddr = &log_cond_branch; 546 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken ); 547 regparms = 2; 548 inew = i+1; 549 break; 550 case Ev_Bi: 551 /* Branch to an unknown destination */ 552 helperName = "log_ind_branch"; 553 helperAddr = &log_ind_branch; 554 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst ); 555 regparms = 2; 556 inew = i+1; 557 break; 558 case Ev_G: 559 /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */ 560 helperName = "log_global_event"; 561 helperAddr = &log_global_event; 562 argv = mkIRExprVec_1( i_node_expr ); 563 regparms = 1; 564 inew = i+1; 565 break; 566 default: 567 tl_assert(0); 568 } 569 570 CLG_DEBUGIF(5) { 571 if (inew > i+1) { 572 VG_(printf)(" merge "); 573 showEvent( ev2 ); 574 } 575 if (inew > i+2) { 576 VG_(printf)(" merge "); 577 showEvent( ev3 ); 578 } 579 if (helperAddr) 580 VG_(printf)(" call %s (%p)\n", 581 helperName, helperAddr); 582 } 583 584 /* helper could be unset depending on the simulator used */ 585 if (helperAddr == 0) continue; 586 587 /* Add the helper. */ 588 tl_assert(helperName); 589 tl_assert(helperAddr); 590 tl_assert(argv); 591 di = unsafeIRDirty_0_N( regparms, 592 helperName, VG_(fnptr_to_fnentry)( helperAddr ), 593 argv ); 594 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) ); 595 } 596 597 clgs->events_used = 0; 598 } 599 600 static void addEvent_Ir ( ClgState* clgs, InstrInfo* inode ) 601 { 602 Event* evt; 603 tl_assert(clgs->seen_before || (inode->eventset == 0)); 604 if (!CLG_(clo).simulate_cache) return; 605 606 if (clgs->events_used == N_EVENTS) 607 flushEvents(clgs); 608 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 609 evt = &clgs->events[clgs->events_used]; 610 init_Event(evt); 611 evt->tag = Ev_Ir; 612 evt->inode = inode; 613 clgs->events_used++; 614 } 615 616 static 617 void addEvent_Dr ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea ) 618 { 619 Event* evt; 620 tl_assert(isIRAtom(ea)); 621 tl_assert(datasize >= 1); 622 if (!CLG_(clo).simulate_cache) return; 623 tl_assert(datasize <= CLG_(min_line_size)); 624 625 if (clgs->events_used == N_EVENTS) 626 flushEvents(clgs); 627 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 628 evt = &clgs->events[clgs->events_used]; 629 init_Event(evt); 630 evt->tag = Ev_Dr; 631 evt->inode = inode; 632 evt->Ev.Dr.szB = datasize; 633 evt->Ev.Dr.ea = ea; 634 clgs->events_used++; 635 } 636 637 static 638 void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea ) 639 { 640 Event* lastEvt; 641 Event* evt; 642 tl_assert(isIRAtom(ea)); 643 tl_assert(datasize >= 1); 644 if (!CLG_(clo).simulate_cache) return; 645 tl_assert(datasize <= CLG_(min_line_size)); 646 647 /* Is it possible to merge this write with the preceding read? */ 648 lastEvt = &clgs->events[clgs->events_used-1]; 649 if (clgs->events_used > 0 650 && lastEvt->tag == Ev_Dr 651 && lastEvt->Ev.Dr.szB == datasize 652 && lastEvt->inode == inode 653 && eqIRAtom(lastEvt->Ev.Dr.ea, ea)) 654 { 655 lastEvt->tag = Ev_Dm; 656 return; 657 } 658 659 /* No. Add as normal. */ 660 if (clgs->events_used == N_EVENTS) 661 flushEvents(clgs); 662 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 663 evt = &clgs->events[clgs->events_used]; 664 init_Event(evt); 665 evt->tag = Ev_Dw; 666 evt->inode = inode; 667 evt->Ev.Dw.szB = datasize; 668 evt->Ev.Dw.ea = ea; 669 clgs->events_used++; 670 } 671 672 static 673 void addEvent_D_guarded ( ClgState* clgs, InstrInfo* inode, 674 Int datasize, IRAtom* ea, IRAtom* guard, 675 Bool isWrite ) 676 { 677 tl_assert(isIRAtom(ea)); 678 tl_assert(guard); 679 tl_assert(isIRAtom(guard)); 680 tl_assert(datasize >= 1); 681 if (!CLG_(clo).simulate_cache) return; 682 tl_assert(datasize <= CLG_(min_line_size)); 683 684 /* Adding guarded memory actions and merging them with the existing 685 queue is too complex. Simply flush the queue and add this 686 action immediately. Since guarded loads and stores are pretty 687 rare, this is not thought likely to cause any noticeable 688 performance loss as a result of the loss of event-merging 689 opportunities. */ 690 tl_assert(clgs->events_used >= 0); 691 flushEvents(clgs); 692 tl_assert(clgs->events_used == 0); 693 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */ 694 IRExpr* i_node_expr; 695 const HChar* helperName; 696 void* helperAddr; 697 IRExpr** argv; 698 Int regparms; 699 IRDirty* di; 700 i_node_expr = mkIRExpr_HWord( (HWord)inode ); 701 helperName = isWrite ? CLG_(cachesim).log_0I1Dw_name 702 : CLG_(cachesim).log_0I1Dr_name; 703 helperAddr = isWrite ? CLG_(cachesim).log_0I1Dw 704 : CLG_(cachesim).log_0I1Dr; 705 argv = mkIRExprVec_3( i_node_expr, 706 ea, mkIRExpr_HWord( datasize ) ); 707 regparms = 3; 708 di = unsafeIRDirty_0_N( 709 regparms, 710 helperName, VG_(fnptr_to_fnentry)( helperAddr ), 711 argv ); 712 di->guard = guard; 713 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) ); 714 } 715 716 static 717 void addEvent_Bc ( ClgState* clgs, InstrInfo* inode, IRAtom* guard ) 718 { 719 Event* evt; 720 tl_assert(isIRAtom(guard)); 721 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, guard) 722 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64)); 723 if (!CLG_(clo).simulate_branch) return; 724 725 if (clgs->events_used == N_EVENTS) 726 flushEvents(clgs); 727 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 728 evt = &clgs->events[clgs->events_used]; 729 init_Event(evt); 730 evt->tag = Ev_Bc; 731 evt->inode = inode; 732 evt->Ev.Bc.taken = guard; 733 clgs->events_used++; 734 } 735 736 static 737 void addEvent_Bi ( ClgState* clgs, InstrInfo* inode, IRAtom* whereTo ) 738 { 739 Event* evt; 740 tl_assert(isIRAtom(whereTo)); 741 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, whereTo) 742 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64)); 743 if (!CLG_(clo).simulate_branch) return; 744 745 if (clgs->events_used == N_EVENTS) 746 flushEvents(clgs); 747 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 748 evt = &clgs->events[clgs->events_used]; 749 init_Event(evt); 750 evt->tag = Ev_Bi; 751 evt->inode = inode; 752 evt->Ev.Bi.dst = whereTo; 753 clgs->events_used++; 754 } 755 756 static 757 void addEvent_G ( ClgState* clgs, InstrInfo* inode ) 758 { 759 Event* evt; 760 if (!CLG_(clo).collect_bus) return; 761 762 if (clgs->events_used == N_EVENTS) 763 flushEvents(clgs); 764 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); 765 evt = &clgs->events[clgs->events_used]; 766 init_Event(evt); 767 evt->tag = Ev_G; 768 evt->inode = inode; 769 clgs->events_used++; 770 } 771 772 /* Initialise or check (if already seen before) an InstrInfo for next insn. 773 We only can set instr_offset/instr_size here. The required event set and 774 resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest 775 instructions. The event set is extended as required on flush of the event 776 queue (when Dm events were determined), cost offsets are determined at 777 end of BB instrumentation. */ 778 static 779 InstrInfo* next_InstrInfo ( ClgState* clgs, UInt instr_size ) 780 { 781 InstrInfo* ii; 782 tl_assert(clgs->ii_index >= 0); 783 tl_assert(clgs->ii_index < clgs->bb->instr_count); 784 ii = &clgs->bb->instr[ clgs->ii_index ]; 785 786 if (clgs->seen_before) { 787 CLG_ASSERT(ii->instr_offset == clgs->instr_offset); 788 CLG_ASSERT(ii->instr_size == instr_size); 789 } 790 else { 791 ii->instr_offset = clgs->instr_offset; 792 ii->instr_size = instr_size; 793 ii->cost_offset = 0; 794 ii->eventset = 0; 795 } 796 797 clgs->ii_index++; 798 clgs->instr_offset += instr_size; 799 CLG_(stat).distinct_instrs++; 800 801 return ii; 802 } 803 804 // return total number of cost values needed for this BB 805 static 806 UInt update_cost_offsets( ClgState* clgs ) 807 { 808 Int i; 809 InstrInfo* ii; 810 UInt cost_offset = 0; 811 812 CLG_ASSERT(clgs->bb->instr_count == clgs->ii_index); 813 for(i=0; i<clgs->ii_index; i++) { 814 ii = &clgs->bb->instr[i]; 815 if (clgs->seen_before) { 816 CLG_ASSERT(ii->cost_offset == cost_offset); 817 } else 818 ii->cost_offset = cost_offset; 819 cost_offset += ii->eventset ? ii->eventset->size : 0; 820 } 821 822 return cost_offset; 823 } 824 825 /*------------------------------------------------------------*/ 826 /*--- Instrumentation ---*/ 827 /*------------------------------------------------------------*/ 828 829 #if defined(VG_BIGENDIAN) 830 # define CLGEndness Iend_BE 831 #elif defined(VG_LITTLEENDIAN) 832 # define CLGEndness Iend_LE 833 #else 834 # error "Unknown endianness" 835 #endif 836 837 static 838 Addr IRConst2Addr(IRConst* con) 839 { 840 Addr addr; 841 842 if (sizeof(Addr) == 4) { 843 CLG_ASSERT( con->tag == Ico_U32 ); 844 addr = con->Ico.U32; 845 } 846 else if (sizeof(Addr) == 8) { 847 CLG_ASSERT( con->tag == Ico_U64 ); 848 addr = con->Ico.U64; 849 } 850 else 851 VG_(tool_panic)("Callgrind: invalid Addr type"); 852 853 return addr; 854 } 855 856 /* First pass over a BB to instrument, counting instructions and jumps 857 * This is needed for the size of the BB struct to allocate 858 * 859 * Called from CLG_(get_bb) 860 */ 861 void CLG_(collectBlockInfo)(IRSB* sbIn, 862 /*INOUT*/ UInt* instrs, 863 /*INOUT*/ UInt* cjmps, 864 /*INOUT*/ Bool* cjmp_inverted) 865 { 866 Int i; 867 IRStmt* st; 868 Addr instrAddr =0, jumpDst; 869 UInt instrLen = 0; 870 Bool toNextInstr = False; 871 872 // Ist_Exit has to be ignored in preamble code, before first IMark: 873 // preamble code is added by VEX for self modifying code, and has 874 // nothing to do with client code 875 Bool inPreamble = True; 876 877 if (!sbIn) return; 878 879 for (i = 0; i < sbIn->stmts_used; i++) { 880 st = sbIn->stmts[i]; 881 if (Ist_IMark == st->tag) { 882 inPreamble = False; 883 884 instrAddr = st->Ist.IMark.addr; 885 instrLen = st->Ist.IMark.len; 886 887 (*instrs)++; 888 toNextInstr = False; 889 } 890 if (inPreamble) continue; 891 if (Ist_Exit == st->tag) { 892 jumpDst = IRConst2Addr(st->Ist.Exit.dst); 893 toNextInstr = (jumpDst == instrAddr + instrLen); 894 895 (*cjmps)++; 896 } 897 } 898 899 /* if the last instructions of BB conditionally jumps to next instruction 900 * (= first instruction of next BB in memory), this is a inverted by VEX. 901 */ 902 *cjmp_inverted = toNextInstr; 903 } 904 905 static 906 void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy) 907 { 908 addStmtToIRSB( bbOut, 909 IRStmt_Store(CLGEndness, 910 IRExpr_Const(hWordTy == Ity_I32 ? 911 IRConst_U32( addr ) : 912 IRConst_U64( addr )), 913 IRExpr_Const(IRConst_U32(val)) )); 914 } 915 916 917 /* add helper call to setup_bbcc, with pointer to BB struct as argument 918 * 919 * precondition for setup_bbcc: 920 * - jmps_passed has number of cond.jumps passed in last executed BB 921 * - current_bbcc has a pointer to the BBCC of the last executed BB 922 * Thus, if bbcc_jmpkind is != -1 (JmpNone), 923 * current_bbcc->bb->jmp_addr 924 * gives the address of the jump source. 925 * 926 * the setup does 2 things: 927 * - trace call: 928 * * Unwind own call stack, i.e sync our ESP with real ESP 929 * This is for ESP manipulation (longjmps, C++ exec handling) and RET 930 * * For CALLs or JMPs crossing objects, record call arg + 931 * push are on own call stack 932 * 933 * - prepare for cache log functions: 934 * set current_bbcc to BBCC that gets the costs for this BB execution 935 * attached 936 */ 937 static 938 void addBBSetupCall(ClgState* clgs) 939 { 940 IRDirty* di; 941 IRExpr *arg1, **argv; 942 943 arg1 = mkIRExpr_HWord( (HWord)clgs->bb ); 944 argv = mkIRExprVec_1(arg1); 945 di = unsafeIRDirty_0_N( 1, "setup_bbcc", 946 VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ), 947 argv); 948 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) ); 949 } 950 951 952 static 953 IRSB* CLG_(instrument)( VgCallbackClosure* closure, 954 IRSB* sbIn, 955 const VexGuestLayout* layout, 956 const VexGuestExtents* vge, 957 const VexArchInfo* archinfo_host, 958 IRType gWordTy, IRType hWordTy ) 959 { 960 Int i; 961 IRStmt* st; 962 Addr origAddr; 963 InstrInfo* curr_inode = NULL; 964 ClgState clgs; 965 UInt cJumps = 0; 966 IRTypeEnv* tyenv = sbIn->tyenv; 967 968 if (gWordTy != hWordTy) { 969 /* We don't currently support this case. */ 970 VG_(tool_panic)("host/guest word size mismatch"); 971 } 972 973 // No instrumentation if it is switched off 974 if (! CLG_(instrument_state)) { 975 CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n", 976 (Addr)closure->readdr); 977 return sbIn; 978 } 979 980 CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr)closure->readdr); 981 982 /* Set up SB for instrumented IR */ 983 clgs.sbOut = deepCopyIRSBExceptStmts(sbIn); 984 985 // Copy verbatim any IR preamble preceding the first IMark 986 i = 0; 987 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) { 988 addStmtToIRSB( clgs.sbOut, sbIn->stmts[i] ); 989 i++; 990 } 991 992 // Get the first statement, and origAddr from it 993 CLG_ASSERT(sbIn->stmts_used >0); 994 CLG_ASSERT(i < sbIn->stmts_used); 995 st = sbIn->stmts[i]; 996 CLG_ASSERT(Ist_IMark == st->tag); 997 998 origAddr = st->Ist.IMark.addr + st->Ist.IMark.delta; 999 CLG_ASSERT(origAddr == st->Ist.IMark.addr 1000 + st->Ist.IMark.delta); // XXX: check no overflow 1001 1002 /* Get BB struct (creating if necessary). 1003 * JS: The hash table is keyed with orig_addr_noredir -- important! 1004 * JW: Why? If it is because of different chasing of the redirection, 1005 * this is not needed, as chasing is switched off in callgrind 1006 */ 1007 clgs.bb = CLG_(get_bb)(origAddr, sbIn, &(clgs.seen_before)); 1008 1009 addBBSetupCall(&clgs); 1010 1011 // Set up running state 1012 clgs.events_used = 0; 1013 clgs.ii_index = 0; 1014 clgs.instr_offset = 0; 1015 1016 for (/*use current i*/; i < sbIn->stmts_used; i++) { 1017 1018 st = sbIn->stmts[i]; 1019 CLG_ASSERT(isFlatIRStmt(st)); 1020 1021 switch (st->tag) { 1022 case Ist_NoOp: 1023 case Ist_AbiHint: 1024 case Ist_Put: 1025 case Ist_PutI: 1026 case Ist_MBE: 1027 break; 1028 1029 case Ist_IMark: { 1030 Addr cia = st->Ist.IMark.addr + st->Ist.IMark.delta; 1031 UInt isize = st->Ist.IMark.len; 1032 CLG_ASSERT(clgs.instr_offset == cia - origAddr); 1033 // If Vex fails to decode an instruction, the size will be zero. 1034 // Pretend otherwise. 1035 if (isize == 0) isize = VG_MIN_INSTR_SZB; 1036 1037 // Sanity-check size. 1038 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB) 1039 || VG_CLREQ_SZB == isize ); 1040 1041 // Init the inode, record it as the current one. 1042 // Subsequent Dr/Dw/Dm events from the same instruction will 1043 // also use it. 1044 curr_inode = next_InstrInfo (&clgs, isize); 1045 1046 addEvent_Ir( &clgs, curr_inode ); 1047 break; 1048 } 1049 1050 case Ist_WrTmp: { 1051 IRExpr* data = st->Ist.WrTmp.data; 1052 if (data->tag == Iex_Load) { 1053 IRExpr* aexpr = data->Iex.Load.addr; 1054 // Note also, endianness info is ignored. I guess 1055 // that's not interesting. 1056 addEvent_Dr( &clgs, curr_inode, 1057 sizeofIRType(data->Iex.Load.ty), aexpr ); 1058 } 1059 break; 1060 } 1061 1062 case Ist_Store: { 1063 IRExpr* data = st->Ist.Store.data; 1064 IRExpr* aexpr = st->Ist.Store.addr; 1065 addEvent_Dw( &clgs, curr_inode, 1066 sizeofIRType(typeOfIRExpr(sbIn->tyenv, data)), aexpr ); 1067 break; 1068 } 1069 1070 case Ist_StoreG: { 1071 IRStoreG* sg = st->Ist.StoreG.details; 1072 IRExpr* data = sg->data; 1073 IRExpr* addr = sg->addr; 1074 IRType type = typeOfIRExpr(tyenv, data); 1075 tl_assert(type != Ity_INVALID); 1076 addEvent_D_guarded( &clgs, curr_inode, 1077 sizeofIRType(type), addr, sg->guard, 1078 True/*isWrite*/ ); 1079 break; 1080 } 1081 1082 case Ist_LoadG: { 1083 IRLoadG* lg = st->Ist.LoadG.details; 1084 IRType type = Ity_INVALID; /* loaded type */ 1085 IRType typeWide = Ity_INVALID; /* after implicit widening */ 1086 IRExpr* addr = lg->addr; 1087 typeOfIRLoadGOp(lg->cvt, &typeWide, &type); 1088 tl_assert(type != Ity_INVALID); 1089 addEvent_D_guarded( &clgs, curr_inode, 1090 sizeofIRType(type), addr, lg->guard, 1091 False/*!isWrite*/ ); 1092 break; 1093 } 1094 1095 case Ist_Dirty: { 1096 Int dataSize; 1097 IRDirty* d = st->Ist.Dirty.details; 1098 if (d->mFx != Ifx_None) { 1099 /* This dirty helper accesses memory. Collect the details. */ 1100 tl_assert(d->mAddr != NULL); 1101 tl_assert(d->mSize != 0); 1102 dataSize = d->mSize; 1103 // Large (eg. 28B, 108B, 512B on x86) data-sized 1104 // instructions will be done inaccurately, but they're 1105 // very rare and this avoids errors from hitting more 1106 // than two cache lines in the simulation. 1107 if (CLG_(clo).simulate_cache && dataSize > CLG_(min_line_size)) 1108 dataSize = CLG_(min_line_size); 1109 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) 1110 addEvent_Dr( &clgs, curr_inode, dataSize, d->mAddr ); 1111 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) 1112 addEvent_Dw( &clgs, curr_inode, dataSize, d->mAddr ); 1113 } else { 1114 tl_assert(d->mAddr == NULL); 1115 tl_assert(d->mSize == 0); 1116 } 1117 break; 1118 } 1119 1120 case Ist_CAS: { 1121 /* We treat it as a read and a write of the location. I 1122 think that is the same behaviour as it was before IRCAS 1123 was introduced, since prior to that point, the Vex 1124 front ends would translate a lock-prefixed instruction 1125 into a (normal) read followed by a (normal) write. */ 1126 Int dataSize; 1127 IRCAS* cas = st->Ist.CAS.details; 1128 CLG_ASSERT(cas->addr && isIRAtom(cas->addr)); 1129 CLG_ASSERT(cas->dataLo); 1130 dataSize = sizeofIRType(typeOfIRExpr(sbIn->tyenv, cas->dataLo)); 1131 if (cas->dataHi != NULL) 1132 dataSize *= 2; /* since this is a doubleword-cas */ 1133 addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr ); 1134 addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr ); 1135 addEvent_G( &clgs, curr_inode ); 1136 break; 1137 } 1138 1139 case Ist_LLSC: { 1140 IRType dataTy; 1141 if (st->Ist.LLSC.storedata == NULL) { 1142 /* LL */ 1143 dataTy = typeOfIRTemp(sbIn->tyenv, st->Ist.LLSC.result); 1144 addEvent_Dr( &clgs, curr_inode, 1145 sizeofIRType(dataTy), st->Ist.LLSC.addr ); 1146 /* flush events before LL, should help SC to succeed */ 1147 flushEvents( &clgs ); 1148 } else { 1149 /* SC */ 1150 dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata); 1151 addEvent_Dw( &clgs, curr_inode, 1152 sizeofIRType(dataTy), st->Ist.LLSC.addr ); 1153 /* I don't know whether the global-bus-lock cost should 1154 be attributed to the LL or the SC, but it doesn't 1155 really matter since they always have to be used in 1156 pairs anyway. Hence put it (quite arbitrarily) on 1157 the SC. */ 1158 addEvent_G( &clgs, curr_inode ); 1159 } 1160 break; 1161 } 1162 1163 case Ist_Exit: { 1164 Bool guest_exit, inverted; 1165 1166 /* VEX code generation sometimes inverts conditional branches. 1167 * As Callgrind counts (conditional) jumps, it has to correct 1168 * inversions. The heuristic is the following: 1169 * (1) Callgrind switches off SB chasing and unrolling, and 1170 * therefore it assumes that a candidate for inversion only is 1171 * the last conditional branch in an SB. 1172 * (2) inversion is assumed if the branch jumps to the address of 1173 * the next guest instruction in memory. 1174 * This heuristic is precalculated in CLG_(collectBlockInfo)(). 1175 * 1176 * Branching behavior is also used for branch prediction. Note that 1177 * above heuristic is different from what Cachegrind does. 1178 * Cachegrind uses (2) for all branches. 1179 */ 1180 if (cJumps+1 == clgs.bb->cjmp_count) 1181 inverted = clgs.bb->cjmp_inverted; 1182 else 1183 inverted = False; 1184 1185 // call branch predictor only if this is a branch in guest code 1186 guest_exit = (st->Ist.Exit.jk == Ijk_Boring) || 1187 (st->Ist.Exit.jk == Ijk_Call) || 1188 (st->Ist.Exit.jk == Ijk_Ret); 1189 1190 if (guest_exit) { 1191 /* Stuff to widen the guard expression to a host word, so 1192 we can pass it to the branch predictor simulation 1193 functions easily. */ 1194 IRType tyW = hWordTy; 1195 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64; 1196 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64; 1197 IRTemp guard1 = newIRTemp(clgs.sbOut->tyenv, Ity_I1); 1198 IRTemp guardW = newIRTemp(clgs.sbOut->tyenv, tyW); 1199 IRTemp guard = newIRTemp(clgs.sbOut->tyenv, tyW); 1200 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1)) 1201 : IRExpr_Const(IRConst_U64(1)); 1202 1203 /* Widen the guard expression. */ 1204 addStmtToIRSB( clgs.sbOut, 1205 IRStmt_WrTmp( guard1, st->Ist.Exit.guard )); 1206 addStmtToIRSB( clgs.sbOut, 1207 IRStmt_WrTmp( guardW, 1208 IRExpr_Unop(widen, 1209 IRExpr_RdTmp(guard1))) ); 1210 /* If the exit is inverted, invert the sense of the guard. */ 1211 addStmtToIRSB( 1212 clgs.sbOut, 1213 IRStmt_WrTmp( 1214 guard, 1215 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one) 1216 : IRExpr_RdTmp(guardW) 1217 )); 1218 /* And post the event. */ 1219 addEvent_Bc( &clgs, curr_inode, IRExpr_RdTmp(guard) ); 1220 } 1221 1222 /* We may never reach the next statement, so need to flush 1223 all outstanding transactions now. */ 1224 flushEvents( &clgs ); 1225 1226 CLG_ASSERT(clgs.ii_index>0); 1227 if (!clgs.seen_before) { 1228 ClgJumpKind jk; 1229 1230 if (st->Ist.Exit.jk == Ijk_Call) jk = jk_Call; 1231 else if (st->Ist.Exit.jk == Ijk_Ret) jk = jk_Return; 1232 else { 1233 if (IRConst2Addr(st->Ist.Exit.dst) == 1234 origAddr + curr_inode->instr_offset + curr_inode->instr_size) 1235 jk = jk_None; 1236 else 1237 jk = jk_Jump; 1238 } 1239 1240 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1; 1241 clgs.bb->jmp[cJumps].jmpkind = jk; 1242 } 1243 1244 /* Update global variable jmps_passed before the jump 1245 * A correction is needed if VEX inverted the last jump condition 1246 */ 1247 UInt val = inverted ? cJumps+1 : cJumps; 1248 addConstMemStoreStmt( clgs.sbOut, 1249 (UWord) &CLG_(current_state).jmps_passed, 1250 val, hWordTy); 1251 cJumps++; 1252 1253 break; 1254 } 1255 1256 default: 1257 tl_assert(0); 1258 break; 1259 } 1260 1261 /* Copy the original statement */ 1262 addStmtToIRSB( clgs.sbOut, st ); 1263 1264 CLG_DEBUGIF(5) { 1265 VG_(printf)(" pass "); 1266 ppIRStmt(st); 1267 VG_(printf)("\n"); 1268 } 1269 } 1270 1271 /* Deal with branches to unknown destinations. Except ignore ones 1272 which are function returns as we assume the return stack 1273 predictor never mispredicts. */ 1274 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) { 1275 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); } 1276 switch (sbIn->next->tag) { 1277 case Iex_Const: 1278 break; /* boring - branch to known address */ 1279 case Iex_RdTmp: 1280 /* looks like an indirect branch (branch to unknown) */ 1281 addEvent_Bi( &clgs, curr_inode, sbIn->next ); 1282 break; 1283 default: 1284 /* shouldn't happen - if the incoming IR is properly 1285 flattened, should only have tmp and const cases to 1286 consider. */ 1287 tl_assert(0); 1288 } 1289 } 1290 1291 /* At the end of the bb. Flush outstandings. */ 1292 flushEvents( &clgs ); 1293 1294 /* Update global variable jmps_passed at end of SB. 1295 * As CLG_(current_state).jmps_passed is reset to 0 in setup_bbcc, 1296 * this can be omitted if there is no conditional jump in this SB. 1297 * A correction is needed if VEX inverted the last jump condition 1298 */ 1299 if (cJumps>0) { 1300 UInt jmps_passed = cJumps; 1301 if (clgs.bb->cjmp_inverted) jmps_passed--; 1302 addConstMemStoreStmt( clgs.sbOut, 1303 (UWord) &CLG_(current_state).jmps_passed, 1304 jmps_passed, hWordTy); 1305 } 1306 CLG_ASSERT(clgs.bb->cjmp_count == cJumps); 1307 CLG_ASSERT(clgs.bb->instr_count == clgs.ii_index); 1308 1309 /* Info for final exit from BB */ 1310 { 1311 ClgJumpKind jk; 1312 1313 if (sbIn->jumpkind == Ijk_Call) jk = jk_Call; 1314 else if (sbIn->jumpkind == Ijk_Ret) jk = jk_Return; 1315 else { 1316 jk = jk_Jump; 1317 if ((sbIn->next->tag == Iex_Const) && 1318 (IRConst2Addr(sbIn->next->Iex.Const.con) == 1319 origAddr + clgs.instr_offset)) 1320 jk = jk_None; 1321 } 1322 clgs.bb->jmp[cJumps].jmpkind = jk; 1323 /* Instruction index of the call/ret at BB end 1324 * (it is wrong for fall-through, but does not matter) */ 1325 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1; 1326 } 1327 1328 /* swap information of last exit with final exit if inverted */ 1329 if (clgs.bb->cjmp_inverted) { 1330 ClgJumpKind jk; 1331 UInt instr; 1332 1333 jk = clgs.bb->jmp[cJumps].jmpkind; 1334 clgs.bb->jmp[cJumps].jmpkind = clgs.bb->jmp[cJumps-1].jmpkind; 1335 clgs.bb->jmp[cJumps-1].jmpkind = jk; 1336 instr = clgs.bb->jmp[cJumps].instr; 1337 clgs.bb->jmp[cJumps].instr = clgs.bb->jmp[cJumps-1].instr; 1338 clgs.bb->jmp[cJumps-1].instr = instr; 1339 } 1340 1341 if (clgs.seen_before) { 1342 CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs)); 1343 CLG_ASSERT(clgs.bb->instr_len == clgs.instr_offset); 1344 } 1345 else { 1346 clgs.bb->cost_count = update_cost_offsets(&clgs); 1347 clgs.bb->instr_len = clgs.instr_offset; 1348 } 1349 1350 CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n", 1351 origAddr, clgs.bb->instr_len, 1352 clgs.bb->cjmp_count, clgs.bb->cost_count); 1353 if (cJumps>0) { 1354 CLG_DEBUG(3, " [ "); 1355 for (i=0;i<cJumps;i++) 1356 CLG_DEBUG(3, "%u ", clgs.bb->jmp[i].instr); 1357 CLG_DEBUG(3, "], last inverted: %s \n", 1358 clgs.bb->cjmp_inverted ? "yes":"no"); 1359 } 1360 1361 return clgs.sbOut; 1362 } 1363 1364 /*--------------------------------------------------------------------*/ 1365 /*--- Discarding BB info ---*/ 1366 /*--------------------------------------------------------------------*/ 1367 1368 // Called when a translation is removed from the translation cache for 1369 // any reason at all: to free up space, because the guest code was 1370 // unmapped or modified, or for any arbitrary reason. 1371 static 1372 void clg_discard_superblock_info ( Addr orig_addr, VexGuestExtents vge ) 1373 { 1374 tl_assert(vge.n_used > 0); 1375 1376 if (0) 1377 VG_(printf)( "discard_superblock_info: %p, %p, %llu\n", 1378 (void*)orig_addr, 1379 (void*)vge.base[0], (ULong)vge.len[0]); 1380 1381 // Get BB info, remove from table, free BB info. Simple! 1382 // When created, the BB is keyed by the first instruction address, 1383 // (not orig_addr, but eventually redirected address). Thus, we 1384 // use the first instruction address in vge. 1385 CLG_(delete_bb)(vge.base[0]); 1386 } 1387 1388 1389 /*------------------------------------------------------------*/ 1390 /*--- CLG_(fini)() and related function ---*/ 1391 /*------------------------------------------------------------*/ 1392 1393 1394 1395 static void zero_thread_cost(thread_info* t) 1396 { 1397 Int i; 1398 1399 for(i = 0; i < CLG_(current_call_stack).sp; i++) { 1400 if (!CLG_(current_call_stack).entry[i].jcc) continue; 1401 1402 /* reset call counters to current for active calls */ 1403 CLG_(copy_cost)( CLG_(sets).full, 1404 CLG_(current_call_stack).entry[i].enter_cost, 1405 CLG_(current_state).cost ); 1406 CLG_(current_call_stack).entry[i].jcc->call_counter = 0; 1407 } 1408 1409 CLG_(forall_bbccs)(CLG_(zero_bbcc)); 1410 1411 /* set counter for last dump */ 1412 CLG_(copy_cost)( CLG_(sets).full, 1413 t->lastdump_cost, CLG_(current_state).cost ); 1414 } 1415 1416 void CLG_(zero_all_cost)(Bool only_current_thread) 1417 { 1418 if (VG_(clo_verbosity) > 1) 1419 VG_(message)(Vg_DebugMsg, " Zeroing costs...\n"); 1420 1421 if (only_current_thread) 1422 zero_thread_cost(CLG_(get_current_thread)()); 1423 else 1424 CLG_(forall_threads)(zero_thread_cost); 1425 1426 if (VG_(clo_verbosity) > 1) 1427 VG_(message)(Vg_DebugMsg, " ...done\n"); 1428 } 1429 1430 static 1431 void unwind_thread(thread_info* t) 1432 { 1433 /* unwind signal handlers */ 1434 while(CLG_(current_state).sig !=0) 1435 CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig); 1436 1437 /* unwind regular call stack */ 1438 while(CLG_(current_call_stack).sp>0) 1439 CLG_(pop_call_stack)(); 1440 1441 /* reset context and function stack for context generation */ 1442 CLG_(init_exec_state)( &CLG_(current_state) ); 1443 CLG_(current_fn_stack).top = CLG_(current_fn_stack).bottom; 1444 } 1445 1446 static 1447 void zero_state_cost(thread_info* t) 1448 { 1449 CLG_(zero_cost)( CLG_(sets).full, CLG_(current_state).cost ); 1450 } 1451 1452 void CLG_(set_instrument_state)(const HChar* reason, Bool state) 1453 { 1454 if (CLG_(instrument_state) == state) { 1455 CLG_DEBUG(2, "%s: instrumentation already %s\n", 1456 reason, state ? "ON" : "OFF"); 1457 return; 1458 } 1459 CLG_(instrument_state) = state; 1460 CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n", 1461 reason, state ? "ON" : "OFF"); 1462 1463 VG_(discard_translations_safely)( (Addr)0x1000, ~(SizeT)0xfff, "callgrind"); 1464 1465 /* reset internal state: call stacks, simulator */ 1466 CLG_(forall_threads)(unwind_thread); 1467 CLG_(forall_threads)(zero_state_cost); 1468 (*CLG_(cachesim).clear)(); 1469 1470 if (VG_(clo_verbosity) > 1) 1471 VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n", 1472 reason, state ? "ON" : "OFF"); 1473 } 1474 1475 /* helper for dump_state_togdb */ 1476 static void dump_state_of_thread_togdb(thread_info* ti) 1477 { 1478 static FullCost sum = 0, tmp = 0; 1479 Int t, i; 1480 BBCC *from, *to; 1481 call_entry* ce; 1482 HChar *mcost; 1483 1484 t = CLG_(current_tid); 1485 CLG_(init_cost_lz)( CLG_(sets).full, &sum ); 1486 CLG_(copy_cost_lz)( CLG_(sets).full, &tmp, ti->lastdump_cost ); 1487 CLG_(add_diff_cost)( CLG_(sets).full, sum, ti->lastdump_cost, 1488 ti->states.entry[0]->cost); 1489 CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost, tmp ); 1490 mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), sum); 1491 VG_(gdb_printf)("events-%d: %s\n", t, mcost); 1492 VG_(free)(mcost); 1493 VG_(gdb_printf)("frames-%d: %d\n", t, CLG_(current_call_stack).sp); 1494 1495 ce = 0; 1496 for(i = 0; i < CLG_(current_call_stack).sp; i++) { 1497 ce = CLG_(get_call_entry)(i); 1498 /* if this frame is skipped, we don't have counters */ 1499 if (!ce->jcc) continue; 1500 1501 from = ce->jcc->from; 1502 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, from->cxt->fn[0]->name); 1503 VG_(gdb_printf)("calls-%d-%d: %llu\n",t, i, ce->jcc->call_counter); 1504 1505 /* FIXME: EventSets! */ 1506 CLG_(copy_cost)( CLG_(sets).full, sum, ce->jcc->cost ); 1507 CLG_(copy_cost)( CLG_(sets).full, tmp, ce->enter_cost ); 1508 CLG_(add_diff_cost)( CLG_(sets).full, sum, 1509 ce->enter_cost, CLG_(current_state).cost ); 1510 CLG_(copy_cost)( CLG_(sets).full, ce->enter_cost, tmp ); 1511 1512 mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), sum); 1513 VG_(gdb_printf)("events-%d-%d: %s\n",t, i, mcost); 1514 VG_(free)(mcost); 1515 } 1516 if (ce && ce->jcc) { 1517 to = ce->jcc->to; 1518 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, to->cxt->fn[0]->name ); 1519 } 1520 } 1521 1522 /* Dump current state */ 1523 static void dump_state_togdb(void) 1524 { 1525 thread_info** th; 1526 int t; 1527 Int orig_tid = CLG_(current_tid); 1528 1529 VG_(gdb_printf)("instrumentation: %s\n", 1530 CLG_(instrument_state) ? "on":"off"); 1531 if (!CLG_(instrument_state)) return; 1532 1533 VG_(gdb_printf)("executed-bbs: %llu\n", CLG_(stat).bb_executions); 1534 VG_(gdb_printf)("executed-calls: %llu\n", CLG_(stat).call_counter); 1535 VG_(gdb_printf)("distinct-bbs: %d\n", CLG_(stat).distinct_bbs); 1536 VG_(gdb_printf)("distinct-calls: %d\n", CLG_(stat).distinct_jccs); 1537 VG_(gdb_printf)("distinct-functions: %d\n", CLG_(stat).distinct_fns); 1538 VG_(gdb_printf)("distinct-contexts: %d\n", CLG_(stat).distinct_contexts); 1539 1540 /* "events:" line. Given here because it will be dynamic in the future */ 1541 HChar *evmap = CLG_(eventmapping_as_string)(CLG_(dumpmap)); 1542 VG_(gdb_printf)("events: %s\n", evmap); 1543 VG_(free)(evmap); 1544 /* "part:" line (number of last part. Is 0 at start */ 1545 VG_(gdb_printf)("part: %d\n", CLG_(get_dump_counter)()); 1546 1547 /* threads */ 1548 th = CLG_(get_threads)(); 1549 VG_(gdb_printf)("threads:"); 1550 for(t=1;t<VG_N_THREADS;t++) { 1551 if (!th[t]) continue; 1552 VG_(gdb_printf)(" %d", t); 1553 } 1554 VG_(gdb_printf)("\n"); 1555 VG_(gdb_printf)("current-tid: %d\n", orig_tid); 1556 CLG_(forall_threads)(dump_state_of_thread_togdb); 1557 } 1558 1559 1560 static void print_monitor_help ( void ) 1561 { 1562 VG_(gdb_printf) ("\n"); 1563 VG_(gdb_printf) ("callgrind monitor commands:\n"); 1564 VG_(gdb_printf) (" dump [<dump_hint>]\n"); 1565 VG_(gdb_printf) (" dump counters\n"); 1566 VG_(gdb_printf) (" zero\n"); 1567 VG_(gdb_printf) (" zero counters\n"); 1568 VG_(gdb_printf) (" status\n"); 1569 VG_(gdb_printf) (" print status\n"); 1570 VG_(gdb_printf) (" instrumentation [on|off]\n"); 1571 VG_(gdb_printf) (" get/set (if on/off given) instrumentation state\n"); 1572 VG_(gdb_printf) ("\n"); 1573 } 1574 1575 /* return True if request recognised, False otherwise */ 1576 static Bool handle_gdb_monitor_command (ThreadId tid, const HChar *req) 1577 { 1578 HChar* wcmd; 1579 HChar s[VG_(strlen(req)) + 1]; /* copy for strtok_r */ 1580 HChar *ssaveptr; 1581 1582 VG_(strcpy) (s, req); 1583 1584 wcmd = VG_(strtok_r) (s, " ", &ssaveptr); 1585 switch (VG_(keyword_id) ("help dump zero status instrumentation", 1586 wcmd, kwd_report_duplicated_matches)) { 1587 case -2: /* multiple matches */ 1588 return True; 1589 case -1: /* not found */ 1590 return False; 1591 case 0: /* help */ 1592 print_monitor_help(); 1593 return True; 1594 case 1: { /* dump */ 1595 CLG_(dump_profile)(req, False); 1596 return True; 1597 } 1598 case 2: { /* zero */ 1599 CLG_(zero_all_cost)(False); 1600 return True; 1601 } 1602 1603 case 3: { /* status */ 1604 HChar* arg = VG_(strtok_r) (0, " ", &ssaveptr); 1605 if (arg && (VG_(strcmp)(arg, "internal") == 0)) { 1606 /* internal interface to callgrind_control */ 1607 dump_state_togdb(); 1608 return True; 1609 } 1610 1611 if (!CLG_(instrument_state)) { 1612 VG_(gdb_printf)("No status available as instrumentation is switched off\n"); 1613 } else { 1614 // Status information to be improved ... 1615 thread_info** th = CLG_(get_threads)(); 1616 Int t, tcount = 0; 1617 for(t=1;t<VG_N_THREADS;t++) 1618 if (th[t]) tcount++; 1619 VG_(gdb_printf)("%d thread(s) running.\n", tcount); 1620 } 1621 return True; 1622 } 1623 1624 case 4: { /* instrumentation */ 1625 HChar* arg = VG_(strtok_r) (0, " ", &ssaveptr); 1626 if (!arg) { 1627 VG_(gdb_printf)("instrumentation: %s\n", 1628 CLG_(instrument_state) ? "on":"off"); 1629 } 1630 else 1631 CLG_(set_instrument_state)("Command", VG_(strcmp)(arg,"off")!=0); 1632 return True; 1633 } 1634 1635 default: 1636 tl_assert(0); 1637 return False; 1638 } 1639 } 1640 1641 static 1642 Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret) 1643 { 1644 if (!VG_IS_TOOL_USERREQ('C','T',args[0]) 1645 && VG_USERREQ__GDB_MONITOR_COMMAND != args[0]) 1646 return False; 1647 1648 switch(args[0]) { 1649 case VG_USERREQ__DUMP_STATS: 1650 CLG_(dump_profile)("Client Request", True); 1651 *ret = 0; /* meaningless */ 1652 break; 1653 1654 case VG_USERREQ__DUMP_STATS_AT: 1655 { 1656 const HChar *arg = (HChar*)args[1]; 1657 HChar buf[30 + VG_(strlen)(arg)]; // large enough 1658 VG_(sprintf)(buf,"Client Request: %s", arg); 1659 CLG_(dump_profile)(buf, True); 1660 *ret = 0; /* meaningless */ 1661 } 1662 break; 1663 1664 case VG_USERREQ__ZERO_STATS: 1665 CLG_(zero_all_cost)(True); 1666 *ret = 0; /* meaningless */ 1667 break; 1668 1669 case VG_USERREQ__TOGGLE_COLLECT: 1670 CLG_(current_state).collect = !CLG_(current_state).collect; 1671 CLG_DEBUG(2, "Client Request: toggled collection state to %s\n", 1672 CLG_(current_state).collect ? "ON" : "OFF"); 1673 *ret = 0; /* meaningless */ 1674 break; 1675 1676 case VG_USERREQ__START_INSTRUMENTATION: 1677 CLG_(set_instrument_state)("Client Request", True); 1678 *ret = 0; /* meaningless */ 1679 break; 1680 1681 case VG_USERREQ__STOP_INSTRUMENTATION: 1682 CLG_(set_instrument_state)("Client Request", False); 1683 *ret = 0; /* meaningless */ 1684 break; 1685 1686 case VG_USERREQ__GDB_MONITOR_COMMAND: { 1687 Bool handled = handle_gdb_monitor_command (tid, (HChar*)args[1]); 1688 if (handled) 1689 *ret = 1; 1690 else 1691 *ret = 0; 1692 return handled; 1693 } 1694 default: 1695 return False; 1696 } 1697 1698 return True; 1699 } 1700 1701 1702 /* Syscall Timing */ 1703 1704 /* struct timeval syscalltime[VG_N_THREADS]; */ 1705 #if CLG_MICROSYSTIME 1706 ULong *syscalltime; 1707 #else 1708 UInt *syscalltime; 1709 #endif 1710 1711 static 1712 void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno, 1713 UWord* args, UInt nArgs) 1714 { 1715 if (CLG_(clo).collect_systime) { 1716 #if CLG_MICROSYSTIME 1717 struct vki_timeval tv_now; 1718 VG_(gettimeofday)(&tv_now, NULL); 1719 syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec; 1720 #else 1721 syscalltime[tid] = VG_(read_millisecond_timer)(); 1722 #endif 1723 } 1724 } 1725 1726 static 1727 void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno, 1728 UWord* args, UInt nArgs, SysRes res) 1729 { 1730 if (CLG_(clo).collect_systime && 1731 CLG_(current_state).bbcc) { 1732 Int o; 1733 #if CLG_MICROSYSTIME 1734 struct vki_timeval tv_now; 1735 ULong diff; 1736 1737 VG_(gettimeofday)(&tv_now, NULL); 1738 diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid]; 1739 #else 1740 UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid]; 1741 #endif 1742 1743 /* offset o is for "SysCount", o+1 for "SysTime" */ 1744 o = fullOffset(EG_SYS); 1745 CLG_ASSERT(o>=0); 1746 CLG_DEBUG(0," Time (Off %d) for Syscall %u: %llu\n", o, syscallno, 1747 (ULong)diff); 1748 1749 CLG_(current_state).cost[o] ++; 1750 CLG_(current_state).cost[o+1] += diff; 1751 if (!CLG_(current_state).bbcc->skipped) 1752 CLG_(init_cost_lz)(CLG_(sets).full, 1753 &(CLG_(current_state).bbcc->skipped)); 1754 CLG_(current_state).bbcc->skipped[o] ++; 1755 CLG_(current_state).bbcc->skipped[o+1] += diff; 1756 } 1757 } 1758 1759 static UInt ULong_width(ULong n) 1760 { 1761 UInt w = 0; 1762 while (n > 0) { 1763 n = n / 10; 1764 w++; 1765 } 1766 if (w == 0) w = 1; 1767 return w + (w-1)/3; // add space for commas 1768 } 1769 1770 static 1771 void branchsim_printstat(int l1, int l2, int l3) 1772 { 1773 static HChar fmt[128]; // large enough 1774 FullCost total; 1775 ULong Bc_total_b, Bc_total_mp, Bi_total_b, Bi_total_mp; 1776 ULong B_total_b, B_total_mp; 1777 1778 total = CLG_(total_cost); 1779 Bc_total_b = total[ fullOffset(EG_BC) ]; 1780 Bc_total_mp = total[ fullOffset(EG_BC)+1 ]; 1781 Bi_total_b = total[ fullOffset(EG_BI) ]; 1782 Bi_total_mp = total[ fullOffset(EG_BI)+1 ]; 1783 1784 /* Make format string, getting width right for numbers */ 1785 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n", 1786 l1, l2, l3); 1787 1788 if (0 == Bc_total_b) Bc_total_b = 1; 1789 if (0 == Bi_total_b) Bi_total_b = 1; 1790 B_total_b = Bc_total_b + Bi_total_b; 1791 B_total_mp = Bc_total_mp + Bi_total_mp; 1792 1793 VG_(umsg)("\n"); 1794 VG_(umsg)(fmt, "Branches: ", 1795 B_total_b, Bc_total_b, Bi_total_b); 1796 1797 VG_(umsg)(fmt, "Mispredicts: ", 1798 B_total_mp, Bc_total_mp, Bi_total_mp); 1799 1800 VG_(umsg)("Mispred rate: %*.1f%% (%*.1f%% + %*.1f%% )\n", 1801 l1, B_total_mp * 100.0 / B_total_b, 1802 l2, Bc_total_mp * 100.0 / Bc_total_b, 1803 l3, Bi_total_mp * 100.0 / Bi_total_b); 1804 } 1805 1806 static 1807 void clg_print_stats(void) 1808 { 1809 int BB_lookups = 1810 CLG_(stat).full_debug_BBs + 1811 CLG_(stat).fn_name_debug_BBs + 1812 CLG_(stat).file_line_debug_BBs + 1813 CLG_(stat).no_debug_BBs; 1814 1815 /* Hash table stats */ 1816 VG_(message)(Vg_DebugMsg, "Distinct objects: %d\n", 1817 CLG_(stat).distinct_objs); 1818 VG_(message)(Vg_DebugMsg, "Distinct files: %d\n", 1819 CLG_(stat).distinct_files); 1820 VG_(message)(Vg_DebugMsg, "Distinct fns: %d\n", 1821 CLG_(stat).distinct_fns); 1822 VG_(message)(Vg_DebugMsg, "Distinct contexts:%d\n", 1823 CLG_(stat).distinct_contexts); 1824 VG_(message)(Vg_DebugMsg, "Distinct BBs: %d\n", 1825 CLG_(stat).distinct_bbs); 1826 VG_(message)(Vg_DebugMsg, "Cost entries: %u (Chunks %u)\n", 1827 CLG_(costarray_entries), CLG_(costarray_chunks)); 1828 VG_(message)(Vg_DebugMsg, "Distinct BBCCs: %d\n", 1829 CLG_(stat).distinct_bbccs); 1830 VG_(message)(Vg_DebugMsg, "Distinct JCCs: %d\n", 1831 CLG_(stat).distinct_jccs); 1832 VG_(message)(Vg_DebugMsg, "Distinct skips: %d\n", 1833 CLG_(stat).distinct_skips); 1834 VG_(message)(Vg_DebugMsg, "BB lookups: %d\n", 1835 BB_lookups); 1836 if (BB_lookups>0) { 1837 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)\n", 1838 CLG_(stat).full_debug_BBs * 100 / BB_lookups, 1839 CLG_(stat).full_debug_BBs); 1840 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)\n", 1841 CLG_(stat).file_line_debug_BBs * 100 / BB_lookups, 1842 CLG_(stat).file_line_debug_BBs); 1843 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)\n", 1844 CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups, 1845 CLG_(stat).fn_name_debug_BBs); 1846 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)\n", 1847 CLG_(stat).no_debug_BBs * 100 / BB_lookups, 1848 CLG_(stat).no_debug_BBs); 1849 } 1850 VG_(message)(Vg_DebugMsg, "BBCC Clones: %d\n", 1851 CLG_(stat).bbcc_clones); 1852 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d\n", 1853 CLG_(stat).bb_retranslations); 1854 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d\n", 1855 CLG_(stat).distinct_instrs); 1856 1857 VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d\n", 1858 CLG_(stat).cxt_lru_misses); 1859 VG_(message)(Vg_DebugMsg, "LRU BBCC Misses: %d\n", 1860 CLG_(stat).bbcc_lru_misses); 1861 VG_(message)(Vg_DebugMsg, "LRU JCC Misses: %d\n", 1862 CLG_(stat).jcc_lru_misses); 1863 VG_(message)(Vg_DebugMsg, "BBs Executed: %llu\n", 1864 CLG_(stat).bb_executions); 1865 VG_(message)(Vg_DebugMsg, "Calls: %llu\n", 1866 CLG_(stat).call_counter); 1867 VG_(message)(Vg_DebugMsg, "CondJMP followed: %llu\n", 1868 CLG_(stat).jcnd_counter); 1869 VG_(message)(Vg_DebugMsg, "Boring JMPs: %llu\n", 1870 CLG_(stat).jump_counter); 1871 VG_(message)(Vg_DebugMsg, "Recursive calls: %llu\n", 1872 CLG_(stat).rec_call_counter); 1873 VG_(message)(Vg_DebugMsg, "Returns: %llu\n", 1874 CLG_(stat).ret_counter); 1875 } 1876 1877 1878 static 1879 void finish(void) 1880 { 1881 HChar fmt[128]; // large enough 1882 Int l1, l2, l3; 1883 FullCost total; 1884 1885 CLG_DEBUG(0, "finish()\n"); 1886 1887 (*CLG_(cachesim).finish)(); 1888 1889 /* pop all remaining items from CallStack for correct sum 1890 */ 1891 CLG_(forall_threads)(unwind_thread); 1892 1893 CLG_(dump_profile)(0, False); 1894 1895 if (VG_(clo_verbosity) == 0) return; 1896 1897 if (VG_(clo_stats)) { 1898 VG_(message)(Vg_DebugMsg, "\n"); 1899 clg_print_stats(); 1900 VG_(message)(Vg_DebugMsg, "\n"); 1901 } 1902 1903 HChar *evmap = CLG_(eventmapping_as_string)(CLG_(dumpmap)); 1904 VG_(message)(Vg_UserMsg, "Events : %s\n", evmap); 1905 VG_(free)(evmap); 1906 HChar *mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), CLG_(total_cost)); 1907 VG_(message)(Vg_UserMsg, "Collected : %s\n", mcost); 1908 VG_(free)(mcost); 1909 VG_(message)(Vg_UserMsg, "\n"); 1910 1911 /* determine value widths for statistics */ 1912 total = CLG_(total_cost); 1913 l1 = ULong_width( total[fullOffset(EG_IR)] ); 1914 l2 = l3 = 0; 1915 if (CLG_(clo).simulate_cache) { 1916 l2 = ULong_width( total[fullOffset(EG_DR)] ); 1917 l3 = ULong_width( total[fullOffset(EG_DW)] ); 1918 } 1919 if (CLG_(clo).simulate_branch) { 1920 int l2b = ULong_width( total[fullOffset(EG_BC)] ); 1921 int l3b = ULong_width( total[fullOffset(EG_BI)] ); 1922 if (l2b > l2) l2 = l2b; 1923 if (l3b > l3) l3 = l3b; 1924 } 1925 1926 /* Make format string, getting width right for numbers */ 1927 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1); 1928 1929 /* Always print this */ 1930 VG_(umsg)(fmt, "I refs: ", total[fullOffset(EG_IR)] ); 1931 1932 if (CLG_(clo).simulate_cache) 1933 (*CLG_(cachesim).printstat)(l1, l2, l3); 1934 1935 if (CLG_(clo).simulate_branch) 1936 branchsim_printstat(l1, l2, l3); 1937 1938 } 1939 1940 1941 void CLG_(fini)(Int exitcode) 1942 { 1943 finish(); 1944 } 1945 1946 1947 /*--------------------------------------------------------------------*/ 1948 /*--- Setup ---*/ 1949 /*--------------------------------------------------------------------*/ 1950 1951 static void clg_start_client_code_callback ( ThreadId tid, ULong blocks_done ) 1952 { 1953 static ULong last_blocks_done = 0; 1954 1955 if (0) 1956 VG_(printf)("%d R %llu\n", (Int)tid, blocks_done); 1957 1958 /* throttle calls to CLG_(run_thread) by number of BBs executed */ 1959 if (blocks_done - last_blocks_done < 5000) return; 1960 last_blocks_done = blocks_done; 1961 1962 CLG_(run_thread)( tid ); 1963 } 1964 1965 static 1966 void CLG_(post_clo_init)(void) 1967 { 1968 if (VG_(clo_vex_control).iropt_register_updates_default 1969 != VexRegUpdSpAtMemAccess) { 1970 CLG_DEBUG(1, " Using user specified value for " 1971 "--vex-iropt-register-updates\n"); 1972 } else { 1973 CLG_DEBUG(1, 1974 " Using default --vex-iropt-register-updates=" 1975 "sp-at-mem-access\n"); 1976 } 1977 1978 if (VG_(clo_px_file_backed) != VexRegUpdSpAtMemAccess) { 1979 CLG_DEBUG(1, " Using user specified value for " 1980 "--px-file-backed\n"); 1981 } else { 1982 CLG_DEBUG(1, 1983 " Using default --px-file-backed=" 1984 "sp-at-mem-access\n"); 1985 } 1986 1987 if (VG_(clo_vex_control).iropt_unroll_thresh != 0) { 1988 VG_(message)(Vg_UserMsg, 1989 "callgrind only works with --vex-iropt-unroll-thresh=0\n" 1990 "=> resetting it back to 0\n"); 1991 VG_(clo_vex_control).iropt_unroll_thresh = 0; // cannot be overriden. 1992 } 1993 if (VG_(clo_vex_control).guest_chase_thresh != 0) { 1994 VG_(message)(Vg_UserMsg, 1995 "callgrind only works with --vex-guest-chase-thresh=0\n" 1996 "=> resetting it back to 0\n"); 1997 VG_(clo_vex_control).guest_chase_thresh = 0; // cannot be overriden. 1998 } 1999 2000 CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No"); 2001 CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo).separate_callers); 2002 CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo).separate_recursions); 2003 2004 if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) { 2005 VG_(message)(Vg_UserMsg, "Using source line as position.\n"); 2006 CLG_(clo).dump_line = True; 2007 } 2008 2009 CLG_(init_dumps)(); 2010 2011 (*CLG_(cachesim).post_clo_init)(); 2012 2013 CLG_(init_eventsets)(); 2014 CLG_(init_statistics)(& CLG_(stat)); 2015 CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) ); 2016 2017 /* initialize hash tables */ 2018 CLG_(init_obj_table)(); 2019 CLG_(init_cxt_table)(); 2020 CLG_(init_bb_hash)(); 2021 2022 CLG_(init_threads)(); 2023 CLG_(run_thread)(1); 2024 2025 CLG_(instrument_state) = CLG_(clo).instrument_atstart; 2026 2027 if (VG_(clo_verbosity > 0)) { 2028 VG_(message)(Vg_UserMsg, 2029 "For interactive control, run 'callgrind_control%s%s -h'.\n", 2030 (VG_(arg_vgdb_prefix) ? " " : ""), 2031 (VG_(arg_vgdb_prefix) ? VG_(arg_vgdb_prefix) : "")); 2032 } 2033 } 2034 2035 static 2036 void CLG_(pre_clo_init)(void) 2037 { 2038 VG_(details_name) ("Callgrind"); 2039 VG_(details_version) (NULL); 2040 VG_(details_description) ("a call-graph generating cache profiler"); 2041 VG_(details_copyright_author)("Copyright (C) 2002-2015, and GNU GPL'd, " 2042 "by Josef Weidendorfer et al."); 2043 VG_(details_bug_reports_to) (VG_BUGS_TO); 2044 VG_(details_avg_translation_sizeB) ( 500 ); 2045 2046 VG_(clo_vex_control).iropt_register_updates_default 2047 = VG_(clo_px_file_backed) 2048 = VexRegUpdSpAtMemAccess; // overridable by the user. 2049 2050 VG_(clo_vex_control).iropt_unroll_thresh = 0; // cannot be overriden. 2051 VG_(clo_vex_control).guest_chase_thresh = 0; // cannot be overriden. 2052 2053 VG_(basic_tool_funcs) (CLG_(post_clo_init), 2054 CLG_(instrument), 2055 CLG_(fini)); 2056 2057 VG_(needs_superblock_discards)(clg_discard_superblock_info); 2058 2059 2060 VG_(needs_command_line_options)(CLG_(process_cmd_line_option), 2061 CLG_(print_usage), 2062 CLG_(print_debug_usage)); 2063 2064 VG_(needs_client_requests)(CLG_(handle_client_request)); 2065 VG_(needs_print_stats) (clg_print_stats); 2066 VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime), 2067 CLG_(post_syscalltime)); 2068 2069 VG_(track_start_client_code) ( & clg_start_client_code_callback ); 2070 VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) ); 2071 VG_(track_post_deliver_signal)( & CLG_(post_signal) ); 2072 2073 CLG_(set_clo_defaults)(); 2074 2075 syscalltime = CLG_MALLOC("cl.main.pci.1", 2076 VG_N_THREADS * sizeof syscalltime[0]); 2077 for (UInt i = 0; i < VG_N_THREADS; ++i) { 2078 syscalltime[i] = 0; 2079 } 2080 } 2081 2082 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init)) 2083 2084 /*--------------------------------------------------------------------*/ 2085 /*--- end main.c ---*/ 2086 /*--------------------------------------------------------------------*/ 2087