1 2 /*--------------------------------------------------------------------*/ 3 /*--- Cachegrind: everything but the simulation itself. ---*/ 4 /*--- cg_main.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Cachegrind, a Valgrind tool for cache 9 profiling programs. 10 11 Copyright (C) 2002-2012 Nicholas Nethercote 12 njn (at) valgrind.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 #include "pub_tool_basics.h" 33 #include "pub_tool_vki.h" 34 #include "pub_tool_debuginfo.h" 35 #include "pub_tool_libcbase.h" 36 #include "pub_tool_libcassert.h" 37 #include "pub_tool_libcfile.h" 38 #include "pub_tool_libcprint.h" 39 #include "pub_tool_libcproc.h" 40 #include "pub_tool_machine.h" 41 #include "pub_tool_mallocfree.h" 42 #include "pub_tool_options.h" 43 #include "pub_tool_oset.h" 44 #include "pub_tool_tooliface.h" 45 #include "pub_tool_xarray.h" 46 #include "pub_tool_clientstate.h" 47 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry) 48 49 #include "cg_arch.h" 50 #include "cg_sim.c" 51 #include "cg_branchpred.c" 52 53 /*------------------------------------------------------------*/ 54 /*--- Constants ---*/ 55 /*------------------------------------------------------------*/ 56 57 /* Set to 1 for very verbose debugging */ 58 #define DEBUG_CG 0 59 60 #define MIN_LINE_SIZE 16 61 #define FILE_LEN VKI_PATH_MAX 62 #define FN_LEN 256 63 64 /*------------------------------------------------------------*/ 65 /*--- Options ---*/ 66 /*------------------------------------------------------------*/ 67 68 static Bool clo_cache_sim = True; /* do cache simulation? */ 69 static Bool clo_branch_sim = False; /* do branch simulation? */ 70 static Char* clo_cachegrind_out_file = "cachegrind.out.%p"; 71 72 /*------------------------------------------------------------*/ 73 /*--- Cachesim configuration ---*/ 74 /*------------------------------------------------------------*/ 75 76 static Int min_line_size = 0; /* min of L1 and LL cache line sizes */ 77 78 /*------------------------------------------------------------*/ 79 /*--- Types and Data Structures ---*/ 80 /*------------------------------------------------------------*/ 81 82 typedef 83 struct { 84 ULong a; /* total # memory accesses of this kind */ 85 ULong m1; /* misses in the first level cache */ 86 ULong mL; /* misses in the second level cache */ 87 } 88 CacheCC; 89 90 typedef 91 struct { 92 ULong b; /* total # branches of this kind */ 93 ULong mp; /* number of branches mispredicted */ 94 } 95 BranchCC; 96 97 //------------------------------------------------------------ 98 // Primary data structure #1: CC table 99 // - Holds the per-source-line hit/miss stats, grouped by file/function/line. 100 // - an ordered set of CCs. CC indexing done by file/function/line (as 101 // determined from the instrAddr). 102 // - Traversed for dumping stats at end in file/func/line hierarchy. 103 104 typedef struct { 105 Char* file; 106 Char* fn; 107 Int line; 108 } 109 CodeLoc; 110 111 typedef struct { 112 CodeLoc loc; /* Source location that these counts pertain to */ 113 CacheCC Ir; /* Insn read counts */ 114 CacheCC Dr; /* Data read counts */ 115 CacheCC Dw; /* Data write/modify counts */ 116 BranchCC Bc; /* Conditional branch counts */ 117 BranchCC Bi; /* Indirect branch counts */ 118 } LineCC; 119 120 // First compare file, then fn, then line. 121 static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc) 122 { 123 Word res; 124 CodeLoc* a = (CodeLoc*)vloc; 125 CodeLoc* b = &(((LineCC*)vcc)->loc); 126 127 res = VG_(strcmp)(a->file, b->file); 128 if (0 != res) 129 return res; 130 131 res = VG_(strcmp)(a->fn, b->fn); 132 if (0 != res) 133 return res; 134 135 return a->line - b->line; 136 } 137 138 static OSet* CC_table; 139 140 //------------------------------------------------------------ 141 // Primary data structure #2: InstrInfo table 142 // - Holds the cached info about each instr that is used for simulation. 143 // - table(SB_start_addr, list(InstrInfo)) 144 // - For each SB, each InstrInfo in the list holds info about the 145 // instruction (instrLen, instrAddr, etc), plus a pointer to its line 146 // CC. This node is what's passed to the simulation function. 147 // - When SBs are discarded the relevant list(instr_details) is freed. 148 149 typedef struct _InstrInfo InstrInfo; 150 struct _InstrInfo { 151 Addr instr_addr; 152 UChar instr_len; 153 LineCC* parent; // parent line-CC 154 }; 155 156 typedef struct _SB_info SB_info; 157 struct _SB_info { 158 Addr SB_addr; // key; MUST BE FIRST 159 Int n_instrs; 160 InstrInfo instrs[0]; 161 }; 162 163 static OSet* instrInfoTable; 164 165 //------------------------------------------------------------ 166 // Secondary data structure: string table 167 // - holds strings, avoiding dups 168 // - used for filenames and function names, each of which will be 169 // pointed to by one or more CCs. 170 // - it also allows equality checks just by pointer comparison, which 171 // is good when printing the output file at the end. 172 173 static OSet* stringTable; 174 175 //------------------------------------------------------------ 176 // Stats 177 static Int distinct_files = 0; 178 static Int distinct_fns = 0; 179 static Int distinct_lines = 0; 180 static Int distinct_instrs = 0; 181 182 static Int full_debugs = 0; 183 static Int file_line_debugs = 0; 184 static Int fn_debugs = 0; 185 static Int no_debugs = 0; 186 187 /*------------------------------------------------------------*/ 188 /*--- String table operations ---*/ 189 /*------------------------------------------------------------*/ 190 191 static Word stringCmp( const void* key, const void* elem ) 192 { 193 return VG_(strcmp)(*(Char**)key, *(Char**)elem); 194 } 195 196 // Get a permanent string; either pull it out of the string table if it's 197 // been encountered before, or dup it and put it into the string table. 198 static Char* get_perm_string(Char* s) 199 { 200 Char** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s); 201 if (s_ptr) { 202 return *s_ptr; 203 } else { 204 Char** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(Char*)); 205 *s_node = VG_(strdup)("cg.main.gps.1", s); 206 VG_(OSetGen_Insert)(stringTable, s_node); 207 return *s_node; 208 } 209 } 210 211 /*------------------------------------------------------------*/ 212 /*--- CC table operations ---*/ 213 /*------------------------------------------------------------*/ 214 215 static void get_debug_info(Addr instr_addr, Char file[FILE_LEN], 216 Char fn[FN_LEN], Int* line) 217 { 218 Char dir[FILE_LEN]; 219 Bool found_dirname; 220 Bool found_file_line = VG_(get_filename_linenum)( 221 instr_addr, 222 file, FILE_LEN, 223 dir, FILE_LEN, &found_dirname, 224 line 225 ); 226 Bool found_fn = VG_(get_fnname)(instr_addr, fn, FN_LEN); 227 228 if (!found_file_line) { 229 VG_(strcpy)(file, "???"); 230 *line = 0; 231 } 232 if (!found_fn) { 233 VG_(strcpy)(fn, "???"); 234 } 235 236 if (found_dirname) { 237 // +1 for the '/'. 238 tl_assert(VG_(strlen)(dir) + VG_(strlen)(file) + 1 < FILE_LEN); 239 VG_(strcat)(dir, "/"); // Append '/' 240 VG_(strcat)(dir, file); // Append file to dir 241 VG_(strcpy)(file, dir); // Move dir+file to file 242 } 243 244 if (found_file_line) { 245 if (found_fn) full_debugs++; 246 else file_line_debugs++; 247 } else { 248 if (found_fn) fn_debugs++; 249 else no_debugs++; 250 } 251 } 252 253 // Do a three step traversal: by file, then fn, then line. 254 // Returns a pointer to the line CC, creates a new one if necessary. 255 static LineCC* get_lineCC(Addr origAddr) 256 { 257 Char file[FILE_LEN], fn[FN_LEN]; 258 Int line; 259 CodeLoc loc; 260 LineCC* lineCC; 261 262 get_debug_info(origAddr, file, fn, &line); 263 264 loc.file = file; 265 loc.fn = fn; 266 loc.line = line; 267 268 lineCC = VG_(OSetGen_Lookup)(CC_table, &loc); 269 if (!lineCC) { 270 // Allocate and zero a new node. 271 lineCC = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC)); 272 lineCC->loc.file = get_perm_string(loc.file); 273 lineCC->loc.fn = get_perm_string(loc.fn); 274 lineCC->loc.line = loc.line; 275 lineCC->Ir.a = 0; 276 lineCC->Ir.m1 = 0; 277 lineCC->Ir.mL = 0; 278 lineCC->Dr.a = 0; 279 lineCC->Dr.m1 = 0; 280 lineCC->Dr.mL = 0; 281 lineCC->Dw.a = 0; 282 lineCC->Dw.m1 = 0; 283 lineCC->Dw.mL = 0; 284 lineCC->Bc.b = 0; 285 lineCC->Bc.mp = 0; 286 lineCC->Bi.b = 0; 287 lineCC->Bi.mp = 0; 288 VG_(OSetGen_Insert)(CC_table, lineCC); 289 } 290 291 return lineCC; 292 } 293 294 /*------------------------------------------------------------*/ 295 /*--- Cache simulation functions ---*/ 296 /*------------------------------------------------------------*/ 297 298 // Only used with --cache-sim=no. 299 static VG_REGPARM(1) 300 void log_1I(InstrInfo* n) 301 { 302 n->parent->Ir.a++; 303 } 304 305 // Only used with --cache-sim=no. 306 static VG_REGPARM(2) 307 void log_2I(InstrInfo* n, InstrInfo* n2) 308 { 309 n->parent->Ir.a++; 310 n2->parent->Ir.a++; 311 } 312 313 // Only used with --cache-sim=no. 314 static VG_REGPARM(3) 315 void log_3I(InstrInfo* n, InstrInfo* n2, InstrInfo* n3) 316 { 317 n->parent->Ir.a++; 318 n2->parent->Ir.a++; 319 n3->parent->Ir.a++; 320 } 321 322 static VG_REGPARM(1) 323 void log_1I_0D_cache_access(InstrInfo* n) 324 { 325 //VG_(printf)("1I_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n", 326 // n, n->instr_addr, n->instr_len); 327 cachesim_I1_doref(n->instr_addr, n->instr_len, 328 &n->parent->Ir.m1, &n->parent->Ir.mL); 329 n->parent->Ir.a++; 330 } 331 332 static VG_REGPARM(2) 333 void log_2I_0D_cache_access(InstrInfo* n, InstrInfo* n2) 334 { 335 //VG_(printf)("2I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n" 336 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n", 337 // n, n->instr_addr, n->instr_len, 338 // n2, n2->instr_addr, n2->instr_len); 339 cachesim_I1_doref(n->instr_addr, n->instr_len, 340 &n->parent->Ir.m1, &n->parent->Ir.mL); 341 n->parent->Ir.a++; 342 cachesim_I1_doref(n2->instr_addr, n2->instr_len, 343 &n2->parent->Ir.m1, &n2->parent->Ir.mL); 344 n2->parent->Ir.a++; 345 } 346 347 static VG_REGPARM(3) 348 void log_3I_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3) 349 { 350 //VG_(printf)("3I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n" 351 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n" 352 // " CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n", 353 // n, n->instr_addr, n->instr_len, 354 // n2, n2->instr_addr, n2->instr_len, 355 // n3, n3->instr_addr, n3->instr_len); 356 cachesim_I1_doref(n->instr_addr, n->instr_len, 357 &n->parent->Ir.m1, &n->parent->Ir.mL); 358 n->parent->Ir.a++; 359 cachesim_I1_doref(n2->instr_addr, n2->instr_len, 360 &n2->parent->Ir.m1, &n2->parent->Ir.mL); 361 n2->parent->Ir.a++; 362 cachesim_I1_doref(n3->instr_addr, n3->instr_len, 363 &n3->parent->Ir.m1, &n3->parent->Ir.mL); 364 n3->parent->Ir.a++; 365 } 366 367 static VG_REGPARM(3) 368 void log_1I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size) 369 { 370 //VG_(printf)("1I_1Dr: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n" 371 // " daddr=0x%010lx, dsize=%lu\n", 372 // n, n->instr_addr, n->instr_len, data_addr, data_size); 373 cachesim_I1_doref(n->instr_addr, n->instr_len, 374 &n->parent->Ir.m1, &n->parent->Ir.mL); 375 n->parent->Ir.a++; 376 377 cachesim_D1_doref(data_addr, data_size, 378 &n->parent->Dr.m1, &n->parent->Dr.mL); 379 n->parent->Dr.a++; 380 } 381 382 static VG_REGPARM(3) 383 void log_1I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size) 384 { 385 //VG_(printf)("1I_1Dw: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n" 386 // " daddr=0x%010lx, dsize=%lu\n", 387 // n, n->instr_addr, n->instr_len, data_addr, data_size); 388 cachesim_I1_doref(n->instr_addr, n->instr_len, 389 &n->parent->Ir.m1, &n->parent->Ir.mL); 390 n->parent->Ir.a++; 391 392 cachesim_D1_doref(data_addr, data_size, 393 &n->parent->Dw.m1, &n->parent->Dw.mL); 394 n->parent->Dw.a++; 395 } 396 397 static VG_REGPARM(3) 398 void log_0I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size) 399 { 400 //VG_(printf)("0I_1Dr: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n", 401 // n, data_addr, data_size); 402 cachesim_D1_doref(data_addr, data_size, 403 &n->parent->Dr.m1, &n->parent->Dr.mL); 404 n->parent->Dr.a++; 405 } 406 407 static VG_REGPARM(3) 408 void log_0I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size) 409 { 410 //VG_(printf)("0I_1Dw: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n", 411 // n, data_addr, data_size); 412 cachesim_D1_doref(data_addr, data_size, 413 &n->parent->Dw.m1, &n->parent->Dw.mL); 414 n->parent->Dw.a++; 415 } 416 417 /* For branches, we consult two different predictors, one which 418 predicts taken/untaken for conditional branches, and the other 419 which predicts the branch target address for indirect branches 420 (jump-to-register style ones). */ 421 422 static VG_REGPARM(2) 423 void log_cond_branch(InstrInfo* n, Word taken) 424 { 425 //VG_(printf)("cbrnch: CCaddr=0x%010lx, taken=0x%010lx\n", 426 // n, taken); 427 n->parent->Bc.b++; 428 n->parent->Bc.mp 429 += (1 & do_cond_branch_predict(n->instr_addr, taken)); 430 } 431 432 static VG_REGPARM(2) 433 void log_ind_branch(InstrInfo* n, UWord actual_dst) 434 { 435 //VG_(printf)("ibrnch: CCaddr=0x%010lx, dst=0x%010lx\n", 436 // n, actual_dst); 437 n->parent->Bi.b++; 438 n->parent->Bi.mp 439 += (1 & do_ind_branch_predict(n->instr_addr, actual_dst)); 440 } 441 442 443 /*------------------------------------------------------------*/ 444 /*--- Instrumentation types and structures ---*/ 445 /*------------------------------------------------------------*/ 446 447 /* Maintain an ordered list of memory events which are outstanding, in 448 the sense that no IR has yet been generated to do the relevant 449 helper calls. The BB is scanned top to bottom and memory events 450 are added to the end of the list, merging with the most recent 451 notified event where possible (Dw immediately following Dr and 452 having the same size and EA can be merged). 453 454 This merging is done so that for architectures which have 455 load-op-store instructions (x86, amd64), the insn is treated as if 456 it makes just one memory reference (a modify), rather than two (a 457 read followed by a write at the same address). 458 459 At various points the list will need to be flushed, that is, IR 460 generated from it. That must happen before any possible exit from 461 the block (the end, or an IRStmt_Exit). Flushing also takes place 462 when there is no space to add a new event. 463 464 If we require the simulation statistics to be up to date with 465 respect to possible memory exceptions, then the list would have to 466 be flushed before each memory reference. That would however lose 467 performance by inhibiting event-merging during flushing. 468 469 Flushing the list consists of walking it start to end and emitting 470 instrumentation IR for each event, in the order in which they 471 appear. It may be possible to emit a single call for two adjacent 472 events in order to reduce the number of helper function calls made. 473 For example, it could well be profitable to handle two adjacent Ir 474 events with a single helper call. */ 475 476 typedef 477 IRExpr 478 IRAtom; 479 480 typedef 481 enum { 482 Ev_Ir, // Instruction read 483 Ev_Dr, // Data read 484 Ev_Dw, // Data write 485 Ev_Dm, // Data modify (read then write) 486 Ev_Bc, // branch conditional 487 Ev_Bi // branch indirect (to unknown destination) 488 } 489 EventTag; 490 491 typedef 492 struct { 493 EventTag tag; 494 InstrInfo* inode; 495 union { 496 struct { 497 } Ir; 498 struct { 499 IRAtom* ea; 500 Int szB; 501 } Dr; 502 struct { 503 IRAtom* ea; 504 Int szB; 505 } Dw; 506 struct { 507 IRAtom* ea; 508 Int szB; 509 } Dm; 510 struct { 511 IRAtom* taken; /* :: Ity_I1 */ 512 } Bc; 513 struct { 514 IRAtom* dst; 515 } Bi; 516 } Ev; 517 } 518 Event; 519 520 static void init_Event ( Event* ev ) { 521 VG_(memset)(ev, 0, sizeof(Event)); 522 } 523 524 static IRAtom* get_Event_dea ( Event* ev ) { 525 switch (ev->tag) { 526 case Ev_Dr: return ev->Ev.Dr.ea; 527 case Ev_Dw: return ev->Ev.Dw.ea; 528 case Ev_Dm: return ev->Ev.Dm.ea; 529 default: tl_assert(0); 530 } 531 } 532 533 static Int get_Event_dszB ( Event* ev ) { 534 switch (ev->tag) { 535 case Ev_Dr: return ev->Ev.Dr.szB; 536 case Ev_Dw: return ev->Ev.Dw.szB; 537 case Ev_Dm: return ev->Ev.Dm.szB; 538 default: tl_assert(0); 539 } 540 } 541 542 543 /* Up to this many unnotified events are allowed. Number is 544 arbitrary. Larger numbers allow more event merging to occur, but 545 potentially induce more spilling due to extending live ranges of 546 address temporaries. */ 547 #define N_EVENTS 16 548 549 550 /* A struct which holds all the running state during instrumentation. 551 Mostly to avoid passing loads of parameters everywhere. */ 552 typedef 553 struct { 554 /* The current outstanding-memory-event list. */ 555 Event events[N_EVENTS]; 556 Int events_used; 557 558 /* The array of InstrInfo bins for the BB. */ 559 SB_info* sbInfo; 560 561 /* Number InstrInfo bins 'used' so far. */ 562 Int sbInfo_i; 563 564 /* The output SB being constructed. */ 565 IRSB* sbOut; 566 } 567 CgState; 568 569 570 /*------------------------------------------------------------*/ 571 /*--- Instrumentation main ---*/ 572 /*------------------------------------------------------------*/ 573 574 // Note that origAddr is the real origAddr, not the address of the first 575 // instruction in the block (they can be different due to redirection). 576 static 577 SB_info* get_SB_info(IRSB* sbIn, Addr origAddr) 578 { 579 Int i, n_instrs; 580 IRStmt* st; 581 SB_info* sbInfo; 582 583 // Count number of original instrs in SB 584 n_instrs = 0; 585 for (i = 0; i < sbIn->stmts_used; i++) { 586 st = sbIn->stmts[i]; 587 if (Ist_IMark == st->tag) n_instrs++; 588 } 589 590 // Check that we don't have an entry for this BB in the instr-info table. 591 // If this assertion fails, there has been some screwup: some 592 // translations must have been discarded but Cachegrind hasn't discarded 593 // the corresponding entries in the instr-info table. 594 sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr); 595 tl_assert(NULL == sbInfo); 596 597 // BB never translated before (at this address, at least; could have 598 // been unloaded and then reloaded elsewhere in memory) 599 sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable, 600 sizeof(SB_info) + n_instrs*sizeof(InstrInfo)); 601 sbInfo->SB_addr = origAddr; 602 sbInfo->n_instrs = n_instrs; 603 VG_(OSetGen_Insert)( instrInfoTable, sbInfo ); 604 distinct_instrs++; 605 606 return sbInfo; 607 } 608 609 610 static void showEvent ( Event* ev ) 611 { 612 switch (ev->tag) { 613 case Ev_Ir: 614 VG_(printf)("Ir %p\n", ev->inode); 615 break; 616 case Ev_Dr: 617 VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB); 618 ppIRExpr(ev->Ev.Dr.ea); 619 VG_(printf)("\n"); 620 break; 621 case Ev_Dw: 622 VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB); 623 ppIRExpr(ev->Ev.Dw.ea); 624 VG_(printf)("\n"); 625 break; 626 case Ev_Dm: 627 VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB); 628 ppIRExpr(ev->Ev.Dm.ea); 629 VG_(printf)("\n"); 630 break; 631 case Ev_Bc: 632 VG_(printf)("Bc %p GA=", ev->inode); 633 ppIRExpr(ev->Ev.Bc.taken); 634 VG_(printf)("\n"); 635 break; 636 case Ev_Bi: 637 VG_(printf)("Bi %p DST=", ev->inode); 638 ppIRExpr(ev->Ev.Bi.dst); 639 VG_(printf)("\n"); 640 break; 641 default: 642 tl_assert(0); 643 break; 644 } 645 } 646 647 // Reserve and initialise an InstrInfo for the first mention of a new insn. 648 static 649 InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len ) 650 { 651 InstrInfo* i_node; 652 tl_assert(cgs->sbInfo_i >= 0); 653 tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs); 654 i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ]; 655 i_node->instr_addr = instr_addr; 656 i_node->instr_len = instr_len; 657 i_node->parent = get_lineCC(instr_addr); 658 cgs->sbInfo_i++; 659 return i_node; 660 } 661 662 663 /* Generate code for all outstanding memory events, and mark the queue 664 empty. Code is generated into cgs->bbOut, and this activity 665 'consumes' slots in cgs->sbInfo. */ 666 667 static void flushEvents ( CgState* cgs ) 668 { 669 Int i, regparms; 670 Char* helperName; 671 void* helperAddr; 672 IRExpr** argv; 673 IRExpr* i_node_expr; 674 IRDirty* di; 675 Event* ev; 676 Event* ev2; 677 Event* ev3; 678 679 i = 0; 680 while (i < cgs->events_used) { 681 682 helperName = NULL; 683 helperAddr = NULL; 684 argv = NULL; 685 regparms = 0; 686 687 /* generate IR to notify event i and possibly the ones 688 immediately following it. */ 689 tl_assert(i >= 0 && i < cgs->events_used); 690 691 ev = &cgs->events[i]; 692 ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL ); 693 ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL ); 694 695 if (DEBUG_CG) { 696 VG_(printf)(" flush "); 697 showEvent( ev ); 698 } 699 700 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode ); 701 702 /* Decide on helper fn to call and args to pass it, and advance 703 i appropriately. */ 704 switch (ev->tag) { 705 case Ev_Ir: 706 /* Merge an Ir with a following Dr/Dm. */ 707 if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) { 708 /* Why is this true? It's because we're merging an Ir 709 with a following Dr or Dm. The Ir derives from the 710 instruction's IMark and the Dr/Dm from data 711 references which follow it. In short it holds 712 because each insn starts with an IMark, hence an 713 Ev_Ir, and so these Dr/Dm must pertain to the 714 immediately preceding Ir. Same applies to analogous 715 assertions in the subsequent cases. */ 716 tl_assert(ev2->inode == ev->inode); 717 helperName = "log_1I_1Dr_cache_access"; 718 helperAddr = &log_1I_1Dr_cache_access; 719 argv = mkIRExprVec_3( i_node_expr, 720 get_Event_dea(ev2), 721 mkIRExpr_HWord( get_Event_dszB(ev2) ) ); 722 regparms = 3; 723 i += 2; 724 } 725 /* Merge an Ir with a following Dw. */ 726 else 727 if (ev2 && ev2->tag == Ev_Dw) { 728 tl_assert(ev2->inode == ev->inode); 729 helperName = "log_1I_1Dw_cache_access"; 730 helperAddr = &log_1I_1Dw_cache_access; 731 argv = mkIRExprVec_3( i_node_expr, 732 get_Event_dea(ev2), 733 mkIRExpr_HWord( get_Event_dszB(ev2) ) ); 734 regparms = 3; 735 i += 2; 736 } 737 /* Merge an Ir with two following Irs. */ 738 else 739 if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) 740 { 741 if (clo_cache_sim) { 742 helperName = "log_3I_0D_cache_access"; 743 helperAddr = &log_3I_0D_cache_access; 744 } else { 745 helperName = "log_3I"; 746 helperAddr = &log_3I; 747 } 748 argv = mkIRExprVec_3( i_node_expr, 749 mkIRExpr_HWord( (HWord)ev2->inode ), 750 mkIRExpr_HWord( (HWord)ev3->inode ) ); 751 regparms = 3; 752 i += 3; 753 } 754 /* Merge an Ir with one following Ir. */ 755 else 756 if (ev2 && ev2->tag == Ev_Ir) { 757 if (clo_cache_sim) { 758 helperName = "log_2I_0D_cache_access"; 759 helperAddr = &log_2I_0D_cache_access; 760 } else { 761 helperName = "log_2I"; 762 helperAddr = &log_2I; 763 } 764 argv = mkIRExprVec_2( i_node_expr, 765 mkIRExpr_HWord( (HWord)ev2->inode ) ); 766 regparms = 2; 767 i += 2; 768 } 769 /* No merging possible; emit as-is. */ 770 else { 771 if (clo_cache_sim) { 772 helperName = "log_1I_0D_cache_access"; 773 helperAddr = &log_1I_0D_cache_access; 774 } else { 775 helperName = "log_1I"; 776 helperAddr = &log_1I; 777 } 778 argv = mkIRExprVec_1( i_node_expr ); 779 regparms = 1; 780 i++; 781 } 782 break; 783 case Ev_Dr: 784 case Ev_Dm: 785 /* Data read or modify */ 786 helperName = "log_0I_1Dr_cache_access"; 787 helperAddr = &log_0I_1Dr_cache_access; 788 argv = mkIRExprVec_3( i_node_expr, 789 get_Event_dea(ev), 790 mkIRExpr_HWord( get_Event_dszB(ev) ) ); 791 regparms = 3; 792 i++; 793 break; 794 case Ev_Dw: 795 /* Data write */ 796 helperName = "log_0I_1Dw_cache_access"; 797 helperAddr = &log_0I_1Dw_cache_access; 798 argv = mkIRExprVec_3( i_node_expr, 799 get_Event_dea(ev), 800 mkIRExpr_HWord( get_Event_dszB(ev) ) ); 801 regparms = 3; 802 i++; 803 break; 804 case Ev_Bc: 805 /* Conditional branch */ 806 helperName = "log_cond_branch"; 807 helperAddr = &log_cond_branch; 808 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken ); 809 regparms = 2; 810 i++; 811 break; 812 case Ev_Bi: 813 /* Branch to an unknown destination */ 814 helperName = "log_ind_branch"; 815 helperAddr = &log_ind_branch; 816 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst ); 817 regparms = 2; 818 i++; 819 break; 820 default: 821 tl_assert(0); 822 } 823 824 /* Add the helper. */ 825 tl_assert(helperName); 826 tl_assert(helperAddr); 827 tl_assert(argv); 828 di = unsafeIRDirty_0_N( regparms, 829 helperName, VG_(fnptr_to_fnentry)( helperAddr ), 830 argv ); 831 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) ); 832 } 833 834 cgs->events_used = 0; 835 } 836 837 static void addEvent_Ir ( CgState* cgs, InstrInfo* inode ) 838 { 839 Event* evt; 840 if (cgs->events_used == N_EVENTS) 841 flushEvents(cgs); 842 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS); 843 evt = &cgs->events[cgs->events_used]; 844 init_Event(evt); 845 evt->tag = Ev_Ir; 846 evt->inode = inode; 847 cgs->events_used++; 848 } 849 850 static 851 void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea ) 852 { 853 Event* evt; 854 tl_assert(isIRAtom(ea)); 855 tl_assert(datasize >= 1 && datasize <= min_line_size); 856 if (!clo_cache_sim) 857 return; 858 if (cgs->events_used == N_EVENTS) 859 flushEvents(cgs); 860 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS); 861 evt = &cgs->events[cgs->events_used]; 862 init_Event(evt); 863 evt->tag = Ev_Dr; 864 evt->inode = inode; 865 evt->Ev.Dr.szB = datasize; 866 evt->Ev.Dr.ea = ea; 867 cgs->events_used++; 868 } 869 870 static 871 void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea ) 872 { 873 Event* lastEvt; 874 Event* evt; 875 876 tl_assert(isIRAtom(ea)); 877 tl_assert(datasize >= 1 && datasize <= min_line_size); 878 879 if (!clo_cache_sim) 880 return; 881 882 /* Is it possible to merge this write with the preceding read? */ 883 lastEvt = &cgs->events[cgs->events_used-1]; 884 if (cgs->events_used > 0 885 && lastEvt->tag == Ev_Dr 886 && lastEvt->Ev.Dr.szB == datasize 887 && lastEvt->inode == inode 888 && eqIRAtom(lastEvt->Ev.Dr.ea, ea)) 889 { 890 lastEvt->tag = Ev_Dm; 891 return; 892 } 893 894 /* No. Add as normal. */ 895 if (cgs->events_used == N_EVENTS) 896 flushEvents(cgs); 897 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS); 898 evt = &cgs->events[cgs->events_used]; 899 init_Event(evt); 900 evt->tag = Ev_Dw; 901 evt->inode = inode; 902 evt->Ev.Dw.szB = datasize; 903 evt->Ev.Dw.ea = ea; 904 cgs->events_used++; 905 } 906 907 static 908 void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard ) 909 { 910 Event* evt; 911 tl_assert(isIRAtom(guard)); 912 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard) 913 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64)); 914 if (!clo_branch_sim) 915 return; 916 if (cgs->events_used == N_EVENTS) 917 flushEvents(cgs); 918 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS); 919 evt = &cgs->events[cgs->events_used]; 920 init_Event(evt); 921 evt->tag = Ev_Bc; 922 evt->inode = inode; 923 evt->Ev.Bc.taken = guard; 924 cgs->events_used++; 925 } 926 927 static 928 void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo ) 929 { 930 Event* evt; 931 tl_assert(isIRAtom(whereTo)); 932 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo) 933 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64)); 934 if (!clo_branch_sim) 935 return; 936 if (cgs->events_used == N_EVENTS) 937 flushEvents(cgs); 938 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS); 939 evt = &cgs->events[cgs->events_used]; 940 init_Event(evt); 941 evt->tag = Ev_Bi; 942 evt->inode = inode; 943 evt->Ev.Bi.dst = whereTo; 944 cgs->events_used++; 945 } 946 947 //////////////////////////////////////////////////////////// 948 949 950 static 951 IRSB* cg_instrument ( VgCallbackClosure* closure, 952 IRSB* sbIn, 953 VexGuestLayout* layout, 954 VexGuestExtents* vge, 955 IRType gWordTy, IRType hWordTy ) 956 { 957 Int i, isize; 958 IRStmt* st; 959 Addr64 cia; /* address of current insn */ 960 CgState cgs; 961 IRTypeEnv* tyenv = sbIn->tyenv; 962 InstrInfo* curr_inode = NULL; 963 964 if (gWordTy != hWordTy) { 965 /* We don't currently support this case. */ 966 VG_(tool_panic)("host/guest word size mismatch"); 967 } 968 969 // Set up new SB 970 cgs.sbOut = deepCopyIRSBExceptStmts(sbIn); 971 972 // Copy verbatim any IR preamble preceding the first IMark 973 i = 0; 974 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) { 975 addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] ); 976 i++; 977 } 978 979 // Get the first statement, and initial cia from it 980 tl_assert(sbIn->stmts_used > 0); 981 tl_assert(i < sbIn->stmts_used); 982 st = sbIn->stmts[i]; 983 tl_assert(Ist_IMark == st->tag); 984 985 cia = st->Ist.IMark.addr; 986 isize = st->Ist.IMark.len; 987 // If Vex fails to decode an instruction, the size will be zero. 988 // Pretend otherwise. 989 if (isize == 0) isize = VG_MIN_INSTR_SZB; 990 991 // Set up running state and get block info 992 tl_assert(closure->readdr == vge->base[0]); 993 cgs.events_used = 0; 994 cgs.sbInfo = get_SB_info(sbIn, (Addr)closure->readdr); 995 cgs.sbInfo_i = 0; 996 997 if (DEBUG_CG) 998 VG_(printf)("\n\n---------- cg_instrument ----------\n"); 999 1000 // Traverse the block, initialising inodes, adding events and flushing as 1001 // necessary. 1002 for (/*use current i*/; i < sbIn->stmts_used; i++) { 1003 1004 st = sbIn->stmts[i]; 1005 tl_assert(isFlatIRStmt(st)); 1006 1007 switch (st->tag) { 1008 case Ist_NoOp: 1009 case Ist_AbiHint: 1010 case Ist_Put: 1011 case Ist_PutI: 1012 case Ist_MBE: 1013 break; 1014 1015 case Ist_IMark: 1016 cia = st->Ist.IMark.addr; 1017 isize = st->Ist.IMark.len; 1018 1019 // If Vex fails to decode an instruction, the size will be zero. 1020 // Pretend otherwise. 1021 if (isize == 0) isize = VG_MIN_INSTR_SZB; 1022 1023 // Sanity-check size. 1024 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB) 1025 || VG_CLREQ_SZB == isize ); 1026 1027 // Get space for and init the inode, record it as the current one. 1028 // Subsequent Dr/Dw/Dm events from the same instruction will 1029 // also use it. 1030 curr_inode = setup_InstrInfo(&cgs, cia, isize); 1031 1032 addEvent_Ir( &cgs, curr_inode ); 1033 break; 1034 1035 case Ist_WrTmp: { 1036 IRExpr* data = st->Ist.WrTmp.data; 1037 if (data->tag == Iex_Load) { 1038 IRExpr* aexpr = data->Iex.Load.addr; 1039 // Note also, endianness info is ignored. I guess 1040 // that's not interesting. 1041 addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty), 1042 aexpr ); 1043 } 1044 break; 1045 } 1046 1047 case Ist_Store: { 1048 IRExpr* data = st->Ist.Store.data; 1049 IRExpr* aexpr = st->Ist.Store.addr; 1050 addEvent_Dw( &cgs, curr_inode, 1051 sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr ); 1052 break; 1053 } 1054 1055 case Ist_Dirty: { 1056 Int dataSize; 1057 IRDirty* d = st->Ist.Dirty.details; 1058 if (d->mFx != Ifx_None) { 1059 /* This dirty helper accesses memory. Collect the details. */ 1060 tl_assert(d->mAddr != NULL); 1061 tl_assert(d->mSize != 0); 1062 dataSize = d->mSize; 1063 // Large (eg. 28B, 108B, 512B on x86) data-sized 1064 // instructions will be done inaccurately, but they're 1065 // very rare and this avoids errors from hitting more 1066 // than two cache lines in the simulation. 1067 if (dataSize > min_line_size) 1068 dataSize = min_line_size; 1069 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) 1070 addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr ); 1071 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) 1072 addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr ); 1073 } else { 1074 tl_assert(d->mAddr == NULL); 1075 tl_assert(d->mSize == 0); 1076 } 1077 break; 1078 } 1079 1080 case Ist_CAS: { 1081 /* We treat it as a read and a write of the location. I 1082 think that is the same behaviour as it was before IRCAS 1083 was introduced, since prior to that point, the Vex 1084 front ends would translate a lock-prefixed instruction 1085 into a (normal) read followed by a (normal) write. */ 1086 Int dataSize; 1087 IRCAS* cas = st->Ist.CAS.details; 1088 tl_assert(cas->addr != NULL); 1089 tl_assert(cas->dataLo != NULL); 1090 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo)); 1091 if (cas->dataHi != NULL) 1092 dataSize *= 2; /* since it's a doubleword-CAS */ 1093 /* I don't think this can ever happen, but play safe. */ 1094 if (dataSize > min_line_size) 1095 dataSize = min_line_size; 1096 addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr ); 1097 addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr ); 1098 break; 1099 } 1100 1101 case Ist_LLSC: { 1102 IRType dataTy; 1103 if (st->Ist.LLSC.storedata == NULL) { 1104 /* LL */ 1105 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result); 1106 addEvent_Dr( &cgs, curr_inode, 1107 sizeofIRType(dataTy), st->Ist.LLSC.addr ); 1108 } else { 1109 /* SC */ 1110 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata); 1111 addEvent_Dw( &cgs, curr_inode, 1112 sizeofIRType(dataTy), st->Ist.LLSC.addr ); 1113 } 1114 break; 1115 } 1116 1117 case Ist_Exit: { 1118 // call branch predictor only if this is a branch in guest code 1119 if ( (st->Ist.Exit.jk == Ijk_Boring) || 1120 (st->Ist.Exit.jk == Ijk_Call) || 1121 (st->Ist.Exit.jk == Ijk_Ret) ) 1122 { 1123 /* Stuff to widen the guard expression to a host word, so 1124 we can pass it to the branch predictor simulation 1125 functions easily. */ 1126 Bool inverted; 1127 Addr64 nia, sea; 1128 IRConst* dst; 1129 IRType tyW = hWordTy; 1130 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64; 1131 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64; 1132 IRTemp guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1); 1133 IRTemp guardW = newIRTemp(cgs.sbOut->tyenv, tyW); 1134 IRTemp guard = newIRTemp(cgs.sbOut->tyenv, tyW); 1135 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1)) 1136 : IRExpr_Const(IRConst_U64(1)); 1137 1138 /* First we need to figure out whether the side exit got 1139 inverted by the ir optimiser. To do that, figure out 1140 the next (fallthrough) instruction's address and the 1141 side exit address and see if they are the same. */ 1142 nia = cia + (Addr64)isize; 1143 if (tyW == Ity_I32) 1144 nia &= 0xFFFFFFFFULL; 1145 1146 /* Side exit address */ 1147 dst = st->Ist.Exit.dst; 1148 if (tyW == Ity_I32) { 1149 tl_assert(dst->tag == Ico_U32); 1150 sea = (Addr64)(UInt)dst->Ico.U32; 1151 } else { 1152 tl_assert(tyW == Ity_I64); 1153 tl_assert(dst->tag == Ico_U64); 1154 sea = dst->Ico.U64; 1155 } 1156 1157 inverted = nia == sea; 1158 1159 /* Widen the guard expression. */ 1160 addStmtToIRSB( cgs.sbOut, 1161 IRStmt_WrTmp( guard1, st->Ist.Exit.guard )); 1162 addStmtToIRSB( cgs.sbOut, 1163 IRStmt_WrTmp( guardW, 1164 IRExpr_Unop(widen, 1165 IRExpr_RdTmp(guard1))) ); 1166 /* If the exit is inverted, invert the sense of the guard. */ 1167 addStmtToIRSB( 1168 cgs.sbOut, 1169 IRStmt_WrTmp( 1170 guard, 1171 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one) 1172 : IRExpr_RdTmp(guardW) 1173 )); 1174 /* And post the event. */ 1175 addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) ); 1176 } 1177 1178 /* We may never reach the next statement, so need to flush 1179 all outstanding transactions now. */ 1180 flushEvents( &cgs ); 1181 break; 1182 } 1183 1184 default: 1185 tl_assert(0); 1186 break; 1187 } 1188 1189 /* Copy the original statement */ 1190 addStmtToIRSB( cgs.sbOut, st ); 1191 1192 if (DEBUG_CG) { 1193 ppIRStmt(st); 1194 VG_(printf)("\n"); 1195 } 1196 } 1197 1198 /* Deal with branches to unknown destinations. Except ignore ones 1199 which are function returns as we assume the return stack 1200 predictor never mispredicts. */ 1201 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) { 1202 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); } 1203 switch (sbIn->next->tag) { 1204 case Iex_Const: 1205 break; /* boring - branch to known address */ 1206 case Iex_RdTmp: 1207 /* looks like an indirect branch (branch to unknown) */ 1208 addEvent_Bi( &cgs, curr_inode, sbIn->next ); 1209 break; 1210 default: 1211 /* shouldn't happen - if the incoming IR is properly 1212 flattened, should only have tmp and const cases to 1213 consider. */ 1214 tl_assert(0); 1215 } 1216 } 1217 1218 /* At the end of the bb. Flush outstandings. */ 1219 flushEvents( &cgs ); 1220 1221 /* done. stay sane ... */ 1222 tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs); 1223 1224 if (DEBUG_CG) { 1225 VG_(printf)( "goto {"); 1226 ppIRJumpKind(sbIn->jumpkind); 1227 VG_(printf)( "} "); 1228 ppIRExpr( sbIn->next ); 1229 VG_(printf)( "}\n"); 1230 } 1231 1232 return cgs.sbOut; 1233 } 1234 1235 /*------------------------------------------------------------*/ 1236 /*--- Cache configuration ---*/ 1237 /*------------------------------------------------------------*/ 1238 1239 #define UNDEFINED_CACHE { -1, -1, -1 } 1240 1241 static cache_t clo_I1_cache = UNDEFINED_CACHE; 1242 static cache_t clo_D1_cache = UNDEFINED_CACHE; 1243 static cache_t clo_LL_cache = UNDEFINED_CACHE; 1244 1245 /*------------------------------------------------------------*/ 1246 /*--- cg_fini() and related function ---*/ 1247 /*------------------------------------------------------------*/ 1248 1249 // Total reads/writes/misses. Calculated during CC traversal at the end. 1250 // All auto-zeroed. 1251 static CacheCC Ir_total; 1252 static CacheCC Dr_total; 1253 static CacheCC Dw_total; 1254 static BranchCC Bc_total; 1255 static BranchCC Bi_total; 1256 1257 static void fprint_CC_table_and_calc_totals(void) 1258 { 1259 Int i, fd; 1260 SysRes sres; 1261 Char buf[512], *currFile = NULL, *currFn = NULL; 1262 LineCC* lineCC; 1263 1264 // Setup output filename. Nb: it's important to do this now, ie. as late 1265 // as possible. If we do it at start-up and the program forks and the 1266 // output file format string contains a %p (pid) specifier, both the 1267 // parent and child will incorrectly write to the same file; this 1268 // happened in 3.3.0. 1269 Char* cachegrind_out_file = 1270 VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file); 1271 1272 sres = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY, 1273 VKI_S_IRUSR|VKI_S_IWUSR); 1274 if (sr_isError(sres)) { 1275 // If the file can't be opened for whatever reason (conflict 1276 // between multiple cachegrinded processes?), give up now. 1277 VG_(umsg)("error: can't open cache simulation output file '%s'\n", 1278 cachegrind_out_file ); 1279 VG_(umsg)(" ... so simulation results will be missing.\n"); 1280 VG_(free)(cachegrind_out_file); 1281 return; 1282 } else { 1283 fd = sr_Res(sres); 1284 VG_(free)(cachegrind_out_file); 1285 } 1286 1287 // "desc:" lines (giving I1/D1/LL cache configuration). The spaces after 1288 // the 2nd colon makes cg_annotate's output look nicer. 1289 VG_(sprintf)(buf, "desc: I1 cache: %s\n" 1290 "desc: D1 cache: %s\n" 1291 "desc: LL cache: %s\n", 1292 I1.desc_line, D1.desc_line, LL.desc_line); 1293 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); 1294 1295 // "cmd:" line 1296 VG_(strcpy)(buf, "cmd:"); 1297 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); 1298 if (VG_(args_the_exename)) { 1299 VG_(write)(fd, " ", 1); 1300 VG_(write)(fd, VG_(args_the_exename), 1301 VG_(strlen)( VG_(args_the_exename) )); 1302 } 1303 for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) { 1304 HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i ); 1305 if (arg) { 1306 VG_(write)(fd, " ", 1); 1307 VG_(write)(fd, arg, VG_(strlen)( arg )); 1308 } 1309 } 1310 // "events:" line 1311 if (clo_cache_sim && clo_branch_sim) { 1312 VG_(sprintf)(buf, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw " 1313 "Bc Bcm Bi Bim\n"); 1314 } 1315 else if (clo_cache_sim && !clo_branch_sim) { 1316 VG_(sprintf)(buf, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw " 1317 "\n"); 1318 } 1319 else if (!clo_cache_sim && clo_branch_sim) { 1320 VG_(sprintf)(buf, "\nevents: Ir " 1321 "Bc Bcm Bi Bim\n"); 1322 } 1323 else { 1324 VG_(sprintf)(buf, "\nevents: Ir\n"); 1325 } 1326 1327 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); 1328 1329 // Traverse every lineCC 1330 VG_(OSetGen_ResetIter)(CC_table); 1331 while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) { 1332 Bool just_hit_a_new_file = False; 1333 // If we've hit a new file, print a "fl=" line. Note that because 1334 // each string is stored exactly once in the string table, we can use 1335 // pointer comparison rather than strcmp() to test for equality, which 1336 // is good because most of the time the comparisons are equal and so 1337 // the whole strings would have to be checked. 1338 if ( lineCC->loc.file != currFile ) { 1339 currFile = lineCC->loc.file; 1340 VG_(sprintf)(buf, "fl=%s\n", currFile); 1341 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); 1342 distinct_files++; 1343 just_hit_a_new_file = True; 1344 } 1345 // If we've hit a new function, print a "fn=" line. We know to do 1346 // this when the function name changes, and also every time we hit a 1347 // new file (in which case the new function name might be the same as 1348 // in the old file, hence the just_hit_a_new_file test). 1349 if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) { 1350 currFn = lineCC->loc.fn; 1351 VG_(sprintf)(buf, "fn=%s\n", currFn); 1352 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); 1353 distinct_fns++; 1354 } 1355 1356 // Print the LineCC 1357 if (clo_cache_sim && clo_branch_sim) { 1358 VG_(sprintf)(buf, "%u %llu %llu %llu" 1359 " %llu %llu %llu" 1360 " %llu %llu %llu" 1361 " %llu %llu %llu %llu\n", 1362 lineCC->loc.line, 1363 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL, 1364 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL, 1365 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL, 1366 lineCC->Bc.b, lineCC->Bc.mp, 1367 lineCC->Bi.b, lineCC->Bi.mp); 1368 } 1369 else if (clo_cache_sim && !clo_branch_sim) { 1370 VG_(sprintf)(buf, "%u %llu %llu %llu" 1371 " %llu %llu %llu" 1372 " %llu %llu %llu\n", 1373 lineCC->loc.line, 1374 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL, 1375 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL, 1376 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL); 1377 } 1378 else if (!clo_cache_sim && clo_branch_sim) { 1379 VG_(sprintf)(buf, "%u %llu" 1380 " %llu %llu %llu %llu\n", 1381 lineCC->loc.line, 1382 lineCC->Ir.a, 1383 lineCC->Bc.b, lineCC->Bc.mp, 1384 lineCC->Bi.b, lineCC->Bi.mp); 1385 } 1386 else { 1387 VG_(sprintf)(buf, "%u %llu\n", 1388 lineCC->loc.line, 1389 lineCC->Ir.a); 1390 } 1391 1392 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); 1393 1394 // Update summary stats 1395 Ir_total.a += lineCC->Ir.a; 1396 Ir_total.m1 += lineCC->Ir.m1; 1397 Ir_total.mL += lineCC->Ir.mL; 1398 Dr_total.a += lineCC->Dr.a; 1399 Dr_total.m1 += lineCC->Dr.m1; 1400 Dr_total.mL += lineCC->Dr.mL; 1401 Dw_total.a += lineCC->Dw.a; 1402 Dw_total.m1 += lineCC->Dw.m1; 1403 Dw_total.mL += lineCC->Dw.mL; 1404 Bc_total.b += lineCC->Bc.b; 1405 Bc_total.mp += lineCC->Bc.mp; 1406 Bi_total.b += lineCC->Bi.b; 1407 Bi_total.mp += lineCC->Bi.mp; 1408 1409 distinct_lines++; 1410 } 1411 1412 // Summary stats must come after rest of table, since we calculate them 1413 // during traversal. */ 1414 if (clo_cache_sim && clo_branch_sim) { 1415 VG_(sprintf)(buf, "summary:" 1416 " %llu %llu %llu" 1417 " %llu %llu %llu" 1418 " %llu %llu %llu" 1419 " %llu %llu %llu %llu\n", 1420 Ir_total.a, Ir_total.m1, Ir_total.mL, 1421 Dr_total.a, Dr_total.m1, Dr_total.mL, 1422 Dw_total.a, Dw_total.m1, Dw_total.mL, 1423 Bc_total.b, Bc_total.mp, 1424 Bi_total.b, Bi_total.mp); 1425 } 1426 else if (clo_cache_sim && !clo_branch_sim) { 1427 VG_(sprintf)(buf, "summary:" 1428 " %llu %llu %llu" 1429 " %llu %llu %llu" 1430 " %llu %llu %llu\n", 1431 Ir_total.a, Ir_total.m1, Ir_total.mL, 1432 Dr_total.a, Dr_total.m1, Dr_total.mL, 1433 Dw_total.a, Dw_total.m1, Dw_total.mL); 1434 } 1435 else if (!clo_cache_sim && clo_branch_sim) { 1436 VG_(sprintf)(buf, "summary:" 1437 " %llu" 1438 " %llu %llu %llu %llu\n", 1439 Ir_total.a, 1440 Bc_total.b, Bc_total.mp, 1441 Bi_total.b, Bi_total.mp); 1442 } 1443 else { 1444 VG_(sprintf)(buf, "summary:" 1445 " %llu\n", 1446 Ir_total.a); 1447 } 1448 1449 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); 1450 VG_(close)(fd); 1451 } 1452 1453 static UInt ULong_width(ULong n) 1454 { 1455 UInt w = 0; 1456 while (n > 0) { 1457 n = n / 10; 1458 w++; 1459 } 1460 if (w == 0) w = 1; 1461 return w + (w-1)/3; // add space for commas 1462 } 1463 1464 static void cg_fini(Int exitcode) 1465 { 1466 static Char buf1[128], buf2[128], buf3[128], buf4[123], fmt[128]; 1467 1468 CacheCC D_total; 1469 BranchCC B_total; 1470 ULong LL_total_m, LL_total_mr, LL_total_mw, 1471 LL_total, LL_total_r, LL_total_w; 1472 Int l1, l2, l3; 1473 1474 fprint_CC_table_and_calc_totals(); 1475 1476 if (VG_(clo_verbosity) == 0) 1477 return; 1478 1479 // Nb: this isn't called "MAX" because that overshadows a global on Darwin. 1480 #define CG_MAX(a, b) ((a) >= (b) ? (a) : (b)) 1481 1482 /* I cache results. Use the I_refs value to determine the first column 1483 * width. */ 1484 l1 = ULong_width(Ir_total.a); 1485 l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b)); 1486 l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b)); 1487 1488 /* Make format string, getting width right for numbers */ 1489 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1); 1490 1491 /* Always print this */ 1492 VG_(umsg)(fmt, "I refs: ", Ir_total.a); 1493 1494 /* If cache profiling is enabled, show D access numbers and all 1495 miss numbers */ 1496 if (clo_cache_sim) { 1497 VG_(umsg)(fmt, "I1 misses: ", Ir_total.m1); 1498 VG_(umsg)(fmt, "LLi misses: ", Ir_total.mL); 1499 1500 if (0 == Ir_total.a) Ir_total.a = 1; 1501 VG_(percentify)(Ir_total.m1, Ir_total.a, 2, l1+1, buf1); 1502 VG_(umsg)("I1 miss rate: %s\n", buf1); 1503 1504 VG_(percentify)(Ir_total.mL, Ir_total.a, 2, l1+1, buf1); 1505 VG_(umsg)("LLi miss rate: %s\n", buf1); 1506 VG_(umsg)("\n"); 1507 1508 /* D cache results. Use the D_refs.rd and D_refs.wr values to 1509 * determine the width of columns 2 & 3. */ 1510 D_total.a = Dr_total.a + Dw_total.a; 1511 D_total.m1 = Dr_total.m1 + Dw_total.m1; 1512 D_total.mL = Dr_total.mL + Dw_total.mL; 1513 1514 /* Make format string, getting width right for numbers */ 1515 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu rd + %%,%dllu wr)\n", 1516 l1, l2, l3); 1517 1518 VG_(umsg)(fmt, "D refs: ", 1519 D_total.a, Dr_total.a, Dw_total.a); 1520 VG_(umsg)(fmt, "D1 misses: ", 1521 D_total.m1, Dr_total.m1, Dw_total.m1); 1522 VG_(umsg)(fmt, "LLd misses: ", 1523 D_total.mL, Dr_total.mL, Dw_total.mL); 1524 1525 if (0 == D_total.a) D_total.a = 1; 1526 if (0 == Dr_total.a) Dr_total.a = 1; 1527 if (0 == Dw_total.a) Dw_total.a = 1; 1528 VG_(percentify)( D_total.m1, D_total.a, 1, l1+1, buf1); 1529 VG_(percentify)(Dr_total.m1, Dr_total.a, 1, l2+1, buf2); 1530 VG_(percentify)(Dw_total.m1, Dw_total.a, 1, l3+1, buf3); 1531 VG_(umsg)("D1 miss rate: %s (%s + %s )\n", buf1, buf2,buf3); 1532 1533 VG_(percentify)( D_total.mL, D_total.a, 1, l1+1, buf1); 1534 VG_(percentify)(Dr_total.mL, Dr_total.a, 1, l2+1, buf2); 1535 VG_(percentify)(Dw_total.mL, Dw_total.a, 1, l3+1, buf3); 1536 VG_(umsg)("LLd miss rate: %s (%s + %s )\n", buf1, buf2,buf3); 1537 VG_(umsg)("\n"); 1538 1539 /* LL overall results */ 1540 1541 LL_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1; 1542 LL_total_r = Dr_total.m1 + Ir_total.m1; 1543 LL_total_w = Dw_total.m1; 1544 VG_(umsg)(fmt, "LL refs: ", 1545 LL_total, LL_total_r, LL_total_w); 1546 1547 LL_total_m = Dr_total.mL + Dw_total.mL + Ir_total.mL; 1548 LL_total_mr = Dr_total.mL + Ir_total.mL; 1549 LL_total_mw = Dw_total.mL; 1550 VG_(umsg)(fmt, "LL misses: ", 1551 LL_total_m, LL_total_mr, LL_total_mw); 1552 1553 VG_(percentify)(LL_total_m, (Ir_total.a + D_total.a), 1, l1+1, buf1); 1554 VG_(percentify)(LL_total_mr, (Ir_total.a + Dr_total.a), 1, l2+1, buf2); 1555 VG_(percentify)(LL_total_mw, Dw_total.a, 1, l3+1, buf3); 1556 VG_(umsg)("LL miss rate: %s (%s + %s )\n", buf1, buf2,buf3); 1557 } 1558 1559 /* If branch profiling is enabled, show branch overall results. */ 1560 if (clo_branch_sim) { 1561 /* Make format string, getting width right for numbers */ 1562 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n", 1563 l1, l2, l3); 1564 1565 if (0 == Bc_total.b) Bc_total.b = 1; 1566 if (0 == Bi_total.b) Bi_total.b = 1; 1567 B_total.b = Bc_total.b + Bi_total.b; 1568 B_total.mp = Bc_total.mp + Bi_total.mp; 1569 1570 VG_(umsg)("\n"); 1571 VG_(umsg)(fmt, "Branches: ", 1572 B_total.b, Bc_total.b, Bi_total.b); 1573 1574 VG_(umsg)(fmt, "Mispredicts: ", 1575 B_total.mp, Bc_total.mp, Bi_total.mp); 1576 1577 VG_(percentify)(B_total.mp, B_total.b, 1, l1+1, buf1); 1578 VG_(percentify)(Bc_total.mp, Bc_total.b, 1, l2+1, buf2); 1579 VG_(percentify)(Bi_total.mp, Bi_total.b, 1, l3+1, buf3); 1580 1581 VG_(umsg)("Mispred rate: %s (%s + %s )\n", buf1, buf2,buf3); 1582 } 1583 1584 // Various stats 1585 if (VG_(clo_stats)) { 1586 Int debug_lookups = full_debugs + fn_debugs + 1587 file_line_debugs + no_debugs; 1588 1589 VG_(dmsg)("\n"); 1590 VG_(dmsg)("cachegrind: distinct files: %d\n", distinct_files); 1591 VG_(dmsg)("cachegrind: distinct fns: %d\n", distinct_fns); 1592 VG_(dmsg)("cachegrind: distinct lines: %d\n", distinct_lines); 1593 VG_(dmsg)("cachegrind: distinct instrs:%d\n", distinct_instrs); 1594 VG_(dmsg)("cachegrind: debug lookups : %d\n", debug_lookups); 1595 1596 VG_(percentify)(full_debugs, debug_lookups, 1, 6, buf1); 1597 VG_(percentify)(file_line_debugs, debug_lookups, 1, 6, buf2); 1598 VG_(percentify)(fn_debugs, debug_lookups, 1, 6, buf3); 1599 VG_(percentify)(no_debugs, debug_lookups, 1, 6, buf4); 1600 VG_(dmsg)("cachegrind: with full info:%s (%d)\n", 1601 buf1, full_debugs); 1602 VG_(dmsg)("cachegrind: with file/line info:%s (%d)\n", 1603 buf2, file_line_debugs); 1604 VG_(dmsg)("cachegrind: with fn name info:%s (%d)\n", 1605 buf3, fn_debugs); 1606 VG_(dmsg)("cachegrind: with zero info:%s (%d)\n", 1607 buf4, no_debugs); 1608 1609 VG_(dmsg)("cachegrind: string table size: %lu\n", 1610 VG_(OSetGen_Size)(stringTable)); 1611 VG_(dmsg)("cachegrind: CC table size: %lu\n", 1612 VG_(OSetGen_Size)(CC_table)); 1613 VG_(dmsg)("cachegrind: InstrInfo table size: %lu\n", 1614 VG_(OSetGen_Size)(instrInfoTable)); 1615 } 1616 } 1617 1618 /*--------------------------------------------------------------------*/ 1619 /*--- Discarding BB info ---*/ 1620 /*--------------------------------------------------------------------*/ 1621 1622 // Called when a translation is removed from the translation cache for 1623 // any reason at all: to free up space, because the guest code was 1624 // unmapped or modified, or for any arbitrary reason. 1625 static 1626 void cg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge ) 1627 { 1628 SB_info* sbInfo; 1629 Addr orig_addr = (Addr)vge.base[0]; 1630 1631 tl_assert(vge.n_used > 0); 1632 1633 if (DEBUG_CG) 1634 VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n", 1635 (void*)(Addr)orig_addr, 1636 (void*)(Addr)vge.base[0], (ULong)vge.len[0]); 1637 1638 // Get BB info, remove from table, free BB info. Simple! Note that we 1639 // use orig_addr, not the first instruction address in vge. 1640 sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr); 1641 tl_assert(NULL != sbInfo); 1642 VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo); 1643 } 1644 1645 /*--------------------------------------------------------------------*/ 1646 /*--- Command line processing ---*/ 1647 /*--------------------------------------------------------------------*/ 1648 1649 static Bool cg_process_cmd_line_option(Char* arg) 1650 { 1651 if (VG_(str_clo_cache_opt)(arg, 1652 &clo_I1_cache, 1653 &clo_D1_cache, 1654 &clo_LL_cache)) {} 1655 1656 else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {} 1657 else if VG_BOOL_CLO(arg, "--cache-sim", clo_cache_sim) {} 1658 else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {} 1659 else 1660 return False; 1661 1662 return True; 1663 } 1664 1665 static void cg_print_usage(void) 1666 { 1667 VG_(print_cache_clo_opts)(); 1668 VG_(printf)( 1669 " --cache-sim=yes|no [yes] collect cache stats?\n" 1670 " --branch-sim=yes|no [no] collect branch prediction stats?\n" 1671 " --cachegrind-out-file=<file> output file name [cachegrind.out.%%p]\n" 1672 ); 1673 } 1674 1675 static void cg_print_debug_usage(void) 1676 { 1677 VG_(printf)( 1678 " (none)\n" 1679 ); 1680 } 1681 1682 /*--------------------------------------------------------------------*/ 1683 /*--- Setup ---*/ 1684 /*--------------------------------------------------------------------*/ 1685 1686 static void cg_post_clo_init(void); /* just below */ 1687 1688 static void cg_pre_clo_init(void) 1689 { 1690 VG_(details_name) ("Cachegrind"); 1691 VG_(details_version) (NULL); 1692 VG_(details_description) ("a cache and branch-prediction profiler"); 1693 VG_(details_copyright_author)( 1694 "Copyright (C) 2002-2012, and GNU GPL'd, by Nicholas Nethercote et al."); 1695 VG_(details_bug_reports_to) (VG_BUGS_TO); 1696 VG_(details_avg_translation_sizeB) ( 500 ); 1697 1698 VG_(basic_tool_funcs) (cg_post_clo_init, 1699 cg_instrument, 1700 cg_fini); 1701 1702 VG_(needs_superblock_discards)(cg_discard_superblock_info); 1703 VG_(needs_command_line_options)(cg_process_cmd_line_option, 1704 cg_print_usage, 1705 cg_print_debug_usage); 1706 } 1707 1708 static void cg_post_clo_init(void) 1709 { 1710 cache_t I1c, D1c, LLc; 1711 1712 CC_table = 1713 VG_(OSetGen_Create)(offsetof(LineCC, loc), 1714 cmp_CodeLoc_LineCC, 1715 VG_(malloc), "cg.main.cpci.1", 1716 VG_(free)); 1717 instrInfoTable = 1718 VG_(OSetGen_Create)(/*keyOff*/0, 1719 NULL, 1720 VG_(malloc), "cg.main.cpci.2", 1721 VG_(free)); 1722 stringTable = 1723 VG_(OSetGen_Create)(/*keyOff*/0, 1724 stringCmp, 1725 VG_(malloc), "cg.main.cpci.3", 1726 VG_(free)); 1727 1728 VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc, 1729 &clo_I1_cache, 1730 &clo_D1_cache, 1731 &clo_LL_cache); 1732 1733 // min_line_size is used to make sure that we never feed 1734 // accesses to the simulator straddling more than two 1735 // cache lines at any cache level 1736 min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size; 1737 min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size; 1738 1739 Int largest_load_or_store_size 1740 = VG_(machine_get_size_of_largest_guest_register)(); 1741 if (min_line_size < largest_load_or_store_size) { 1742 /* We can't continue, because the cache simulation might 1743 straddle more than 2 lines, and it will assert. So let's 1744 just stop before we start. */ 1745 VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n", 1746 (Int)min_line_size); 1747 VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n", 1748 largest_load_or_store_size ); 1749 VG_(umsg)(" but it is not. Exiting now.\n"); 1750 VG_(exit)(1); 1751 } 1752 1753 cachesim_I1_initcache(I1c); 1754 cachesim_D1_initcache(D1c); 1755 cachesim_LL_initcache(LLc); 1756 } 1757 1758 VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init) 1759 1760 /*--------------------------------------------------------------------*/ 1761 /*--- end ---*/ 1762 /*--------------------------------------------------------------------*/ 1763 1764