1 2 /*--------------------------------------------------------------------*/ 3 /*--- Cachegrind: everything but the simulation itself. ---*/ 4 /*--- cg_main.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Cachegrind, a Valgrind tool for cache 9 profiling programs. 10 11 Copyright (C) 2002-2013 Nicholas Nethercote 12 njn (at) valgrind.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 #include "pub_tool_basics.h" 33 #include "pub_tool_vki.h" 34 #include "pub_tool_debuginfo.h" 35 #include "pub_tool_libcbase.h" 36 #include "pub_tool_libcassert.h" 37 #include "pub_tool_libcfile.h" 38 #include "pub_tool_libcprint.h" 39 #include "pub_tool_libcproc.h" 40 #include "pub_tool_machine.h" 41 #include "pub_tool_mallocfree.h" 42 #include "pub_tool_options.h" 43 #include "pub_tool_oset.h" 44 #include "pub_tool_tooliface.h" 45 #include "pub_tool_xarray.h" 46 #include "pub_tool_clientstate.h" 47 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry) 48 49 #include "cg_arch.h" 50 #include "cg_sim.c" 51 #include "cg_branchpred.c" 52 53 /*------------------------------------------------------------*/ 54 /*--- Constants ---*/ 55 /*------------------------------------------------------------*/ 56 57 /* Set to 1 for very verbose debugging */ 58 #define DEBUG_CG 0 59 60 #define MIN_LINE_SIZE 16 61 #define FILE_LEN VKI_PATH_MAX 62 #define FN_LEN 256 63 64 /*------------------------------------------------------------*/ 65 /*--- Options ---*/ 66 /*------------------------------------------------------------*/ 67 68 static Bool clo_cache_sim = True; /* do cache simulation? */ 69 static Bool clo_branch_sim = False; /* do branch simulation? */ 70 static const HChar* clo_cachegrind_out_file = "cachegrind.out.%p"; 71 72 /*------------------------------------------------------------*/ 73 /*--- Cachesim configuration ---*/ 74 /*------------------------------------------------------------*/ 75 76 static Int min_line_size = 0; /* min of L1 and LL cache line sizes */ 77 78 /*------------------------------------------------------------*/ 79 /*--- Types and Data Structures ---*/ 80 /*------------------------------------------------------------*/ 81 82 typedef 83 struct { 84 ULong a; /* total # memory accesses of this kind */ 85 ULong m1; /* misses in the first level cache */ 86 ULong mL; /* misses in the second level cache */ 87 } 88 CacheCC; 89 90 typedef 91 struct { 92 ULong b; /* total # branches of this kind */ 93 ULong mp; /* number of branches mispredicted */ 94 } 95 BranchCC; 96 97 //------------------------------------------------------------ 98 // Primary data structure #1: CC table 99 // - Holds the per-source-line hit/miss stats, grouped by file/function/line. 100 // - an ordered set of CCs. CC indexing done by file/function/line (as 101 // determined from the instrAddr). 102 // - Traversed for dumping stats at end in file/func/line hierarchy. 103 104 typedef struct { 105 HChar* file; 106 HChar* fn; 107 Int line; 108 } 109 CodeLoc; 110 111 typedef struct { 112 CodeLoc loc; /* Source location that these counts pertain to */ 113 CacheCC Ir; /* Insn read counts */ 114 CacheCC Dr; /* Data read counts */ 115 CacheCC Dw; /* Data write/modify counts */ 116 BranchCC Bc; /* Conditional branch counts */ 117 BranchCC Bi; /* Indirect branch counts */ 118 } LineCC; 119 120 // First compare file, then fn, then line. 121 static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc) 122 { 123 Word res; 124 const CodeLoc* a = (const CodeLoc*)vloc; 125 const CodeLoc* b = &(((const LineCC*)vcc)->loc); 126 127 res = VG_(strcmp)(a->file, b->file); 128 if (0 != res) 129 return res; 130 131 res = VG_(strcmp)(a->fn, b->fn); 132 if (0 != res) 133 return res; 134 135 return a->line - b->line; 136 } 137 138 static OSet* CC_table; 139 140 //------------------------------------------------------------ 141 // Primary data structure #2: InstrInfo table 142 // - Holds the cached info about each instr that is used for simulation. 143 // - table(SB_start_addr, list(InstrInfo)) 144 // - For each SB, each InstrInfo in the list holds info about the 145 // instruction (instrLen, instrAddr, etc), plus a pointer to its line 146 // CC. This node is what's passed to the simulation function. 147 // - When SBs are discarded the relevant list(instr_details) is freed. 148 149 typedef struct _InstrInfo InstrInfo; 150 struct _InstrInfo { 151 Addr instr_addr; 152 UChar instr_len; 153 LineCC* parent; // parent line-CC 154 }; 155 156 typedef struct _SB_info SB_info; 157 struct _SB_info { 158 Addr SB_addr; // key; MUST BE FIRST 159 Int n_instrs; 160 InstrInfo instrs[0]; 161 }; 162 163 static OSet* instrInfoTable; 164 165 //------------------------------------------------------------ 166 // Secondary data structure: string table 167 // - holds strings, avoiding dups 168 // - used for filenames and function names, each of which will be 169 // pointed to by one or more CCs. 170 // - it also allows equality checks just by pointer comparison, which 171 // is good when printing the output file at the end. 172 173 static OSet* stringTable; 174 175 //------------------------------------------------------------ 176 // Stats 177 static Int distinct_files = 0; 178 static Int distinct_fns = 0; 179 static Int distinct_lines = 0; 180 static Int distinct_instrsGen = 0; 181 static Int distinct_instrsNoX = 0; 182 183 static Int full_debugs = 0; 184 static Int file_line_debugs = 0; 185 static Int fn_debugs = 0; 186 static Int no_debugs = 0; 187 188 /*------------------------------------------------------------*/ 189 /*--- String table operations ---*/ 190 /*------------------------------------------------------------*/ 191 192 static Word stringCmp( const void* key, const void* elem ) 193 { 194 return VG_(strcmp)(*(const HChar *const *)key, *(const HChar *const *)elem); 195 } 196 197 // Get a permanent string; either pull it out of the string table if it's 198 // been encountered before, or dup it and put it into the string table. 199 static HChar* get_perm_string(HChar* s) 200 { 201 HChar** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s); 202 if (s_ptr) { 203 return *s_ptr; 204 } else { 205 HChar** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(HChar*)); 206 *s_node = VG_(strdup)("cg.main.gps.1", s); 207 VG_(OSetGen_Insert)(stringTable, s_node); 208 return *s_node; 209 } 210 } 211 212 /*------------------------------------------------------------*/ 213 /*--- CC table operations ---*/ 214 /*------------------------------------------------------------*/ 215 216 static void get_debug_info(Addr instr_addr, HChar file[FILE_LEN], 217 HChar fn[FN_LEN], UInt* line) 218 { 219 HChar dir[FILE_LEN]; 220 Bool found_dirname; 221 Bool found_file_line = VG_(get_filename_linenum)( 222 instr_addr, 223 file, FILE_LEN, 224 dir, FILE_LEN, &found_dirname, 225 line 226 ); 227 Bool found_fn = VG_(get_fnname)(instr_addr, fn, FN_LEN); 228 229 if (!found_file_line) { 230 VG_(strcpy)(file, "???"); 231 *line = 0; 232 } 233 if (!found_fn) { 234 VG_(strcpy)(fn, "???"); 235 } 236 237 if (found_dirname) { 238 // +1 for the '/'. 239 tl_assert(VG_(strlen)(dir) + VG_(strlen)(file) + 1 < FILE_LEN); 240 VG_(strcat)(dir, "/"); // Append '/' 241 VG_(strcat)(dir, file); // Append file to dir 242 VG_(strcpy)(file, dir); // Move dir+file to file 243 } 244 245 if (found_file_line) { 246 if (found_fn) full_debugs++; 247 else file_line_debugs++; 248 } else { 249 if (found_fn) fn_debugs++; 250 else no_debugs++; 251 } 252 } 253 254 // Do a three step traversal: by file, then fn, then line. 255 // Returns a pointer to the line CC, creates a new one if necessary. 256 static LineCC* get_lineCC(Addr origAddr) 257 { 258 HChar file[FILE_LEN], fn[FN_LEN]; 259 UInt line; 260 CodeLoc loc; 261 LineCC* lineCC; 262 263 get_debug_info(origAddr, file, fn, &line); 264 265 loc.file = file; 266 loc.fn = fn; 267 loc.line = line; 268 269 lineCC = VG_(OSetGen_Lookup)(CC_table, &loc); 270 if (!lineCC) { 271 // Allocate and zero a new node. 272 lineCC = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC)); 273 lineCC->loc.file = get_perm_string(loc.file); 274 lineCC->loc.fn = get_perm_string(loc.fn); 275 lineCC->loc.line = loc.line; 276 lineCC->Ir.a = 0; 277 lineCC->Ir.m1 = 0; 278 lineCC->Ir.mL = 0; 279 lineCC->Dr.a = 0; 280 lineCC->Dr.m1 = 0; 281 lineCC->Dr.mL = 0; 282 lineCC->Dw.a = 0; 283 lineCC->Dw.m1 = 0; 284 lineCC->Dw.mL = 0; 285 lineCC->Bc.b = 0; 286 lineCC->Bc.mp = 0; 287 lineCC->Bi.b = 0; 288 lineCC->Bi.mp = 0; 289 VG_(OSetGen_Insert)(CC_table, lineCC); 290 } 291 292 return lineCC; 293 } 294 295 /*------------------------------------------------------------*/ 296 /*--- Cache simulation functions ---*/ 297 /*------------------------------------------------------------*/ 298 299 /* A common case for an instruction read event is that the 300 * bytes read belong to the same cache line in both L1I and LL 301 * (if cache line sizes of L1 and LL are the same). 302 * As this can be detected at instrumentation time, and results 303 * in faster simulation, special-casing is benefical. 304 * 305 * Abbrevations used in var/function names: 306 * IrNoX - instruction read does not cross cache lines 307 * IrGen - generic instruction read; not detected as IrNoX 308 * Ir - not known / not important whether it is an IrNoX 309 */ 310 311 // Only used with --cache-sim=no. 312 static VG_REGPARM(1) 313 void log_1Ir(InstrInfo* n) 314 { 315 n->parent->Ir.a++; 316 } 317 318 // Only used with --cache-sim=no. 319 static VG_REGPARM(2) 320 void log_2Ir(InstrInfo* n, InstrInfo* n2) 321 { 322 n->parent->Ir.a++; 323 n2->parent->Ir.a++; 324 } 325 326 // Only used with --cache-sim=no. 327 static VG_REGPARM(3) 328 void log_3Ir(InstrInfo* n, InstrInfo* n2, InstrInfo* n3) 329 { 330 n->parent->Ir.a++; 331 n2->parent->Ir.a++; 332 n3->parent->Ir.a++; 333 } 334 335 // Generic case for instruction reads: may cross cache lines. 336 // All other Ir handlers expect IrNoX instruction reads. 337 static VG_REGPARM(1) 338 void log_1IrGen_0D_cache_access(InstrInfo* n) 339 { 340 //VG_(printf)("1IrGen_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n", 341 // n, n->instr_addr, n->instr_len); 342 cachesim_I1_doref_Gen(n->instr_addr, n->instr_len, 343 &n->parent->Ir.m1, &n->parent->Ir.mL); 344 n->parent->Ir.a++; 345 } 346 347 static VG_REGPARM(1) 348 void log_1IrNoX_0D_cache_access(InstrInfo* n) 349 { 350 //VG_(printf)("1IrNoX_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n", 351 // n, n->instr_addr, n->instr_len); 352 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len, 353 &n->parent->Ir.m1, &n->parent->Ir.mL); 354 n->parent->Ir.a++; 355 } 356 357 static VG_REGPARM(2) 358 void log_2IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2) 359 { 360 //VG_(printf)("2IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n" 361 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n", 362 // n, n->instr_addr, n->instr_len, 363 // n2, n2->instr_addr, n2->instr_len); 364 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len, 365 &n->parent->Ir.m1, &n->parent->Ir.mL); 366 n->parent->Ir.a++; 367 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len, 368 &n2->parent->Ir.m1, &n2->parent->Ir.mL); 369 n2->parent->Ir.a++; 370 } 371 372 static VG_REGPARM(3) 373 void log_3IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3) 374 { 375 //VG_(printf)("3IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n" 376 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n" 377 // " CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n", 378 // n, n->instr_addr, n->instr_len, 379 // n2, n2->instr_addr, n2->instr_len, 380 // n3, n3->instr_addr, n3->instr_len); 381 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len, 382 &n->parent->Ir.m1, &n->parent->Ir.mL); 383 n->parent->Ir.a++; 384 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len, 385 &n2->parent->Ir.m1, &n2->parent->Ir.mL); 386 n2->parent->Ir.a++; 387 cachesim_I1_doref_NoX(n3->instr_addr, n3->instr_len, 388 &n3->parent->Ir.m1, &n3->parent->Ir.mL); 389 n3->parent->Ir.a++; 390 } 391 392 static VG_REGPARM(3) 393 void log_1IrNoX_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size) 394 { 395 //VG_(printf)("1IrNoX_1Dr: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n" 396 // " daddr=0x%010lx, dsize=%lu\n", 397 // n, n->instr_addr, n->instr_len, data_addr, data_size); 398 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len, 399 &n->parent->Ir.m1, &n->parent->Ir.mL); 400 n->parent->Ir.a++; 401 402 cachesim_D1_doref(data_addr, data_size, 403 &n->parent->Dr.m1, &n->parent->Dr.mL); 404 n->parent->Dr.a++; 405 } 406 407 static VG_REGPARM(3) 408 void log_1IrNoX_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size) 409 { 410 //VG_(printf)("1IrNoX_1Dw: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n" 411 // " daddr=0x%010lx, dsize=%lu\n", 412 // n, n->instr_addr, n->instr_len, data_addr, data_size); 413 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len, 414 &n->parent->Ir.m1, &n->parent->Ir.mL); 415 n->parent->Ir.a++; 416 417 cachesim_D1_doref(data_addr, data_size, 418 &n->parent->Dw.m1, &n->parent->Dw.mL); 419 n->parent->Dw.a++; 420 } 421 422 /* Note that addEvent_D_guarded assumes that log_0Ir_1Dr_cache_access 423 and log_0Ir_1Dw_cache_access have exactly the same prototype. If 424 you change them, you must change addEvent_D_guarded too. */ 425 static VG_REGPARM(3) 426 void log_0Ir_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size) 427 { 428 //VG_(printf)("0Ir_1Dr: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n", 429 // n, data_addr, data_size); 430 cachesim_D1_doref(data_addr, data_size, 431 &n->parent->Dr.m1, &n->parent->Dr.mL); 432 n->parent->Dr.a++; 433 } 434 435 /* See comment on log_0Ir_1Dr_cache_access. */ 436 static VG_REGPARM(3) 437 void log_0Ir_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size) 438 { 439 //VG_(printf)("0Ir_1Dw: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n", 440 // n, data_addr, data_size); 441 cachesim_D1_doref(data_addr, data_size, 442 &n->parent->Dw.m1, &n->parent->Dw.mL); 443 n->parent->Dw.a++; 444 } 445 446 /* For branches, we consult two different predictors, one which 447 predicts taken/untaken for conditional branches, and the other 448 which predicts the branch target address for indirect branches 449 (jump-to-register style ones). */ 450 451 static VG_REGPARM(2) 452 void log_cond_branch(InstrInfo* n, Word taken) 453 { 454 //VG_(printf)("cbrnch: CCaddr=0x%010lx, taken=0x%010lx\n", 455 // n, taken); 456 n->parent->Bc.b++; 457 n->parent->Bc.mp 458 += (1 & do_cond_branch_predict(n->instr_addr, taken)); 459 } 460 461 static VG_REGPARM(2) 462 void log_ind_branch(InstrInfo* n, UWord actual_dst) 463 { 464 //VG_(printf)("ibrnch: CCaddr=0x%010lx, dst=0x%010lx\n", 465 // n, actual_dst); 466 n->parent->Bi.b++; 467 n->parent->Bi.mp 468 += (1 & do_ind_branch_predict(n->instr_addr, actual_dst)); 469 } 470 471 472 /*------------------------------------------------------------*/ 473 /*--- Instrumentation types and structures ---*/ 474 /*------------------------------------------------------------*/ 475 476 /* Maintain an ordered list of memory events which are outstanding, in 477 the sense that no IR has yet been generated to do the relevant 478 helper calls. The BB is scanned top to bottom and memory events 479 are added to the end of the list, merging with the most recent 480 notified event where possible (Dw immediately following Dr and 481 having the same size and EA can be merged). 482 483 This merging is done so that for architectures which have 484 load-op-store instructions (x86, amd64), the insn is treated as if 485 it makes just one memory reference (a modify), rather than two (a 486 read followed by a write at the same address). 487 488 At various points the list will need to be flushed, that is, IR 489 generated from it. That must happen before any possible exit from 490 the block (the end, or an IRStmt_Exit). Flushing also takes place 491 when there is no space to add a new event. 492 493 If we require the simulation statistics to be up to date with 494 respect to possible memory exceptions, then the list would have to 495 be flushed before each memory reference. That would however lose 496 performance by inhibiting event-merging during flushing. 497 498 Flushing the list consists of walking it start to end and emitting 499 instrumentation IR for each event, in the order in which they 500 appear. It may be possible to emit a single call for two adjacent 501 events in order to reduce the number of helper function calls made. 502 For example, it could well be profitable to handle two adjacent Ir 503 events with a single helper call. */ 504 505 typedef 506 IRExpr 507 IRAtom; 508 509 typedef 510 enum { 511 Ev_IrNoX, // Instruction read not crossing cache lines 512 Ev_IrGen, // Generic Ir, not being detected as IrNoX 513 Ev_Dr, // Data read 514 Ev_Dw, // Data write 515 Ev_Dm, // Data modify (read then write) 516 Ev_Bc, // branch conditional 517 Ev_Bi // branch indirect (to unknown destination) 518 } 519 EventTag; 520 521 typedef 522 struct { 523 EventTag tag; 524 InstrInfo* inode; 525 union { 526 struct { 527 } IrGen; 528 struct { 529 } IrNoX; 530 struct { 531 IRAtom* ea; 532 Int szB; 533 } Dr; 534 struct { 535 IRAtom* ea; 536 Int szB; 537 } Dw; 538 struct { 539 IRAtom* ea; 540 Int szB; 541 } Dm; 542 struct { 543 IRAtom* taken; /* :: Ity_I1 */ 544 } Bc; 545 struct { 546 IRAtom* dst; 547 } Bi; 548 } Ev; 549 } 550 Event; 551 552 static void init_Event ( Event* ev ) { 553 VG_(memset)(ev, 0, sizeof(Event)); 554 } 555 556 static IRAtom* get_Event_dea ( Event* ev ) { 557 switch (ev->tag) { 558 case Ev_Dr: return ev->Ev.Dr.ea; 559 case Ev_Dw: return ev->Ev.Dw.ea; 560 case Ev_Dm: return ev->Ev.Dm.ea; 561 default: tl_assert(0); 562 } 563 } 564 565 static Int get_Event_dszB ( Event* ev ) { 566 switch (ev->tag) { 567 case Ev_Dr: return ev->Ev.Dr.szB; 568 case Ev_Dw: return ev->Ev.Dw.szB; 569 case Ev_Dm: return ev->Ev.Dm.szB; 570 default: tl_assert(0); 571 } 572 } 573 574 575 /* Up to this many unnotified events are allowed. Number is 576 arbitrary. Larger numbers allow more event merging to occur, but 577 potentially induce more spilling due to extending live ranges of 578 address temporaries. */ 579 #define N_EVENTS 16 580 581 582 /* A struct which holds all the running state during instrumentation. 583 Mostly to avoid passing loads of parameters everywhere. */ 584 typedef 585 struct { 586 /* The current outstanding-memory-event list. */ 587 Event events[N_EVENTS]; 588 Int events_used; 589 590 /* The array of InstrInfo bins for the BB. */ 591 SB_info* sbInfo; 592 593 /* Number InstrInfo bins 'used' so far. */ 594 Int sbInfo_i; 595 596 /* The output SB being constructed. */ 597 IRSB* sbOut; 598 } 599 CgState; 600 601 602 /*------------------------------------------------------------*/ 603 /*--- Instrumentation main ---*/ 604 /*------------------------------------------------------------*/ 605 606 // Note that origAddr is the real origAddr, not the address of the first 607 // instruction in the block (they can be different due to redirection). 608 static 609 SB_info* get_SB_info(IRSB* sbIn, Addr origAddr) 610 { 611 Int i, n_instrs; 612 IRStmt* st; 613 SB_info* sbInfo; 614 615 // Count number of original instrs in SB 616 n_instrs = 0; 617 for (i = 0; i < sbIn->stmts_used; i++) { 618 st = sbIn->stmts[i]; 619 if (Ist_IMark == st->tag) n_instrs++; 620 } 621 622 // Check that we don't have an entry for this BB in the instr-info table. 623 // If this assertion fails, there has been some screwup: some 624 // translations must have been discarded but Cachegrind hasn't discarded 625 // the corresponding entries in the instr-info table. 626 sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr); 627 tl_assert(NULL == sbInfo); 628 629 // BB never translated before (at this address, at least; could have 630 // been unloaded and then reloaded elsewhere in memory) 631 sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable, 632 sizeof(SB_info) + n_instrs*sizeof(InstrInfo)); 633 sbInfo->SB_addr = origAddr; 634 sbInfo->n_instrs = n_instrs; 635 VG_(OSetGen_Insert)( instrInfoTable, sbInfo ); 636 637 return sbInfo; 638 } 639 640 641 static void showEvent ( Event* ev ) 642 { 643 switch (ev->tag) { 644 case Ev_IrGen: 645 VG_(printf)("IrGen %p\n", ev->inode); 646 break; 647 case Ev_IrNoX: 648 VG_(printf)("IrNoX %p\n", ev->inode); 649 break; 650 case Ev_Dr: 651 VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB); 652 ppIRExpr(ev->Ev.Dr.ea); 653 VG_(printf)("\n"); 654 break; 655 case Ev_Dw: 656 VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB); 657 ppIRExpr(ev->Ev.Dw.ea); 658 VG_(printf)("\n"); 659 break; 660 case Ev_Dm: 661 VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB); 662 ppIRExpr(ev->Ev.Dm.ea); 663 VG_(printf)("\n"); 664 break; 665 case Ev_Bc: 666 VG_(printf)("Bc %p GA=", ev->inode); 667 ppIRExpr(ev->Ev.Bc.taken); 668 VG_(printf)("\n"); 669 break; 670 case Ev_Bi: 671 VG_(printf)("Bi %p DST=", ev->inode); 672 ppIRExpr(ev->Ev.Bi.dst); 673 VG_(printf)("\n"); 674 break; 675 default: 676 tl_assert(0); 677 break; 678 } 679 } 680 681 // Reserve and initialise an InstrInfo for the first mention of a new insn. 682 static 683 InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len ) 684 { 685 InstrInfo* i_node; 686 tl_assert(cgs->sbInfo_i >= 0); 687 tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs); 688 i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ]; 689 i_node->instr_addr = instr_addr; 690 i_node->instr_len = instr_len; 691 i_node->parent = get_lineCC(instr_addr); 692 cgs->sbInfo_i++; 693 return i_node; 694 } 695 696 697 /* Generate code for all outstanding memory events, and mark the queue 698 empty. Code is generated into cgs->bbOut, and this activity 699 'consumes' slots in cgs->sbInfo. */ 700 701 static void flushEvents ( CgState* cgs ) 702 { 703 Int i, regparms; 704 const HChar* helperName; 705 void* helperAddr; 706 IRExpr** argv; 707 IRExpr* i_node_expr; 708 IRDirty* di; 709 Event* ev; 710 Event* ev2; 711 Event* ev3; 712 713 i = 0; 714 while (i < cgs->events_used) { 715 716 helperName = NULL; 717 helperAddr = NULL; 718 argv = NULL; 719 regparms = 0; 720 721 /* generate IR to notify event i and possibly the ones 722 immediately following it. */ 723 tl_assert(i >= 0 && i < cgs->events_used); 724 725 ev = &cgs->events[i]; 726 ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL ); 727 ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL ); 728 729 if (DEBUG_CG) { 730 VG_(printf)(" flush "); 731 showEvent( ev ); 732 } 733 734 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode ); 735 736 /* Decide on helper fn to call and args to pass it, and advance 737 i appropriately. */ 738 switch (ev->tag) { 739 case Ev_IrNoX: 740 /* Merge an IrNoX with a following Dr/Dm. */ 741 if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) { 742 /* Why is this true? It's because we're merging an Ir 743 with a following Dr or Dm. The Ir derives from the 744 instruction's IMark and the Dr/Dm from data 745 references which follow it. In short it holds 746 because each insn starts with an IMark, hence an 747 Ev_Ir, and so these Dr/Dm must pertain to the 748 immediately preceding Ir. Same applies to analogous 749 assertions in the subsequent cases. */ 750 tl_assert(ev2->inode == ev->inode); 751 helperName = "log_1IrNoX_1Dr_cache_access"; 752 helperAddr = &log_1IrNoX_1Dr_cache_access; 753 argv = mkIRExprVec_3( i_node_expr, 754 get_Event_dea(ev2), 755 mkIRExpr_HWord( get_Event_dszB(ev2) ) ); 756 regparms = 3; 757 i += 2; 758 } 759 /* Merge an IrNoX with a following Dw. */ 760 else 761 if (ev2 && ev2->tag == Ev_Dw) { 762 tl_assert(ev2->inode == ev->inode); 763 helperName = "log_1IrNoX_1Dw_cache_access"; 764 helperAddr = &log_1IrNoX_1Dw_cache_access; 765 argv = mkIRExprVec_3( i_node_expr, 766 get_Event_dea(ev2), 767 mkIRExpr_HWord( get_Event_dszB(ev2) ) ); 768 regparms = 3; 769 i += 2; 770 } 771 /* Merge an IrNoX with two following IrNoX's. */ 772 else 773 if (ev2 && ev3 && ev2->tag == Ev_IrNoX && ev3->tag == Ev_IrNoX) 774 { 775 if (clo_cache_sim) { 776 helperName = "log_3IrNoX_0D_cache_access"; 777 helperAddr = &log_3IrNoX_0D_cache_access; 778 } else { 779 helperName = "log_3Ir"; 780 helperAddr = &log_3Ir; 781 } 782 argv = mkIRExprVec_3( i_node_expr, 783 mkIRExpr_HWord( (HWord)ev2->inode ), 784 mkIRExpr_HWord( (HWord)ev3->inode ) ); 785 regparms = 3; 786 i += 3; 787 } 788 /* Merge an IrNoX with one following IrNoX. */ 789 else 790 if (ev2 && ev2->tag == Ev_IrNoX) { 791 if (clo_cache_sim) { 792 helperName = "log_2IrNoX_0D_cache_access"; 793 helperAddr = &log_2IrNoX_0D_cache_access; 794 } else { 795 helperName = "log_2Ir"; 796 helperAddr = &log_2Ir; 797 } 798 argv = mkIRExprVec_2( i_node_expr, 799 mkIRExpr_HWord( (HWord)ev2->inode ) ); 800 regparms = 2; 801 i += 2; 802 } 803 /* No merging possible; emit as-is. */ 804 else { 805 if (clo_cache_sim) { 806 helperName = "log_1IrNoX_0D_cache_access"; 807 helperAddr = &log_1IrNoX_0D_cache_access; 808 } else { 809 helperName = "log_1Ir"; 810 helperAddr = &log_1Ir; 811 } 812 argv = mkIRExprVec_1( i_node_expr ); 813 regparms = 1; 814 i++; 815 } 816 break; 817 case Ev_IrGen: 818 if (clo_cache_sim) { 819 helperName = "log_1IrGen_0D_cache_access"; 820 helperAddr = &log_1IrGen_0D_cache_access; 821 } else { 822 helperName = "log_1Ir"; 823 helperAddr = &log_1Ir; 824 } 825 argv = mkIRExprVec_1( i_node_expr ); 826 regparms = 1; 827 i++; 828 break; 829 case Ev_Dr: 830 case Ev_Dm: 831 /* Data read or modify */ 832 helperName = "log_0Ir_1Dr_cache_access"; 833 helperAddr = &log_0Ir_1Dr_cache_access; 834 argv = mkIRExprVec_3( i_node_expr, 835 get_Event_dea(ev), 836 mkIRExpr_HWord( get_Event_dszB(ev) ) ); 837 regparms = 3; 838 i++; 839 break; 840 case Ev_Dw: 841 /* Data write */ 842 helperName = "log_0Ir_1Dw_cache_access"; 843 helperAddr = &log_0Ir_1Dw_cache_access; 844 argv = mkIRExprVec_3( i_node_expr, 845 get_Event_dea(ev), 846 mkIRExpr_HWord( get_Event_dszB(ev) ) ); 847 regparms = 3; 848 i++; 849 break; 850 case Ev_Bc: 851 /* Conditional branch */ 852 helperName = "log_cond_branch"; 853 helperAddr = &log_cond_branch; 854 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken ); 855 regparms = 2; 856 i++; 857 break; 858 case Ev_Bi: 859 /* Branch to an unknown destination */ 860 helperName = "log_ind_branch"; 861 helperAddr = &log_ind_branch; 862 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst ); 863 regparms = 2; 864 i++; 865 break; 866 default: 867 tl_assert(0); 868 } 869 870 /* Add the helper. */ 871 tl_assert(helperName); 872 tl_assert(helperAddr); 873 tl_assert(argv); 874 di = unsafeIRDirty_0_N( regparms, 875 helperName, VG_(fnptr_to_fnentry)( helperAddr ), 876 argv ); 877 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) ); 878 } 879 880 cgs->events_used = 0; 881 } 882 883 static void addEvent_Ir ( CgState* cgs, InstrInfo* inode ) 884 { 885 Event* evt; 886 if (cgs->events_used == N_EVENTS) 887 flushEvents(cgs); 888 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS); 889 evt = &cgs->events[cgs->events_used]; 890 init_Event(evt); 891 evt->inode = inode; 892 if (cachesim_is_IrNoX(inode->instr_addr, inode->instr_len)) { 893 evt->tag = Ev_IrNoX; 894 distinct_instrsNoX++; 895 } else { 896 evt->tag = Ev_IrGen; 897 distinct_instrsGen++; 898 } 899 cgs->events_used++; 900 } 901 902 static 903 void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea ) 904 { 905 Event* evt; 906 tl_assert(isIRAtom(ea)); 907 tl_assert(datasize >= 1 && datasize <= min_line_size); 908 if (!clo_cache_sim) 909 return; 910 if (cgs->events_used == N_EVENTS) 911 flushEvents(cgs); 912 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS); 913 evt = &cgs->events[cgs->events_used]; 914 init_Event(evt); 915 evt->tag = Ev_Dr; 916 evt->inode = inode; 917 evt->Ev.Dr.szB = datasize; 918 evt->Ev.Dr.ea = ea; 919 cgs->events_used++; 920 } 921 922 static 923 void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea ) 924 { 925 Event* lastEvt; 926 Event* evt; 927 928 tl_assert(isIRAtom(ea)); 929 tl_assert(datasize >= 1 && datasize <= min_line_size); 930 931 if (!clo_cache_sim) 932 return; 933 934 /* Is it possible to merge this write with the preceding read? */ 935 lastEvt = &cgs->events[cgs->events_used-1]; 936 if (cgs->events_used > 0 937 && lastEvt->tag == Ev_Dr 938 && lastEvt->Ev.Dr.szB == datasize 939 && lastEvt->inode == inode 940 && eqIRAtom(lastEvt->Ev.Dr.ea, ea)) 941 { 942 lastEvt->tag = Ev_Dm; 943 return; 944 } 945 946 /* No. Add as normal. */ 947 if (cgs->events_used == N_EVENTS) 948 flushEvents(cgs); 949 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS); 950 evt = &cgs->events[cgs->events_used]; 951 init_Event(evt); 952 evt->tag = Ev_Dw; 953 evt->inode = inode; 954 evt->Ev.Dw.szB = datasize; 955 evt->Ev.Dw.ea = ea; 956 cgs->events_used++; 957 } 958 959 static 960 void addEvent_D_guarded ( CgState* cgs, InstrInfo* inode, 961 Int datasize, IRAtom* ea, IRAtom* guard, 962 Bool isWrite ) 963 { 964 tl_assert(isIRAtom(ea)); 965 tl_assert(guard); 966 tl_assert(isIRAtom(guard)); 967 tl_assert(datasize >= 1 && datasize <= min_line_size); 968 969 if (!clo_cache_sim) 970 return; 971 972 /* Adding guarded memory actions and merging them with the existing 973 queue is too complex. Simply flush the queue and add this 974 action immediately. Since guarded loads and stores are pretty 975 rare, this is not thought likely to cause any noticeable 976 performance loss as a result of the loss of event-merging 977 opportunities. */ 978 tl_assert(cgs->events_used >= 0); 979 flushEvents(cgs); 980 tl_assert(cgs->events_used == 0); 981 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */ 982 IRExpr* i_node_expr; 983 const HChar* helperName; 984 void* helperAddr; 985 IRExpr** argv; 986 Int regparms; 987 IRDirty* di; 988 i_node_expr = mkIRExpr_HWord( (HWord)inode ); 989 helperName = isWrite ? "log_0Ir_1Dw_cache_access" 990 : "log_0Ir_1Dr_cache_access"; 991 helperAddr = isWrite ? &log_0Ir_1Dw_cache_access 992 : &log_0Ir_1Dr_cache_access; 993 argv = mkIRExprVec_3( i_node_expr, 994 ea, mkIRExpr_HWord( datasize ) ); 995 regparms = 3; 996 di = unsafeIRDirty_0_N( 997 regparms, 998 helperName, VG_(fnptr_to_fnentry)( helperAddr ), 999 argv ); 1000 di->guard = guard; 1001 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) ); 1002 } 1003 1004 1005 static 1006 void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard ) 1007 { 1008 Event* evt; 1009 tl_assert(isIRAtom(guard)); 1010 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard) 1011 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64)); 1012 if (!clo_branch_sim) 1013 return; 1014 if (cgs->events_used == N_EVENTS) 1015 flushEvents(cgs); 1016 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS); 1017 evt = &cgs->events[cgs->events_used]; 1018 init_Event(evt); 1019 evt->tag = Ev_Bc; 1020 evt->inode = inode; 1021 evt->Ev.Bc.taken = guard; 1022 cgs->events_used++; 1023 } 1024 1025 static 1026 void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo ) 1027 { 1028 Event* evt; 1029 tl_assert(isIRAtom(whereTo)); 1030 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo) 1031 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64)); 1032 if (!clo_branch_sim) 1033 return; 1034 if (cgs->events_used == N_EVENTS) 1035 flushEvents(cgs); 1036 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS); 1037 evt = &cgs->events[cgs->events_used]; 1038 init_Event(evt); 1039 evt->tag = Ev_Bi; 1040 evt->inode = inode; 1041 evt->Ev.Bi.dst = whereTo; 1042 cgs->events_used++; 1043 } 1044 1045 //////////////////////////////////////////////////////////// 1046 1047 1048 static 1049 IRSB* cg_instrument ( VgCallbackClosure* closure, 1050 IRSB* sbIn, 1051 VexGuestLayout* layout, 1052 VexGuestExtents* vge, 1053 VexArchInfo* archinfo_host, 1054 IRType gWordTy, IRType hWordTy ) 1055 { 1056 Int i, isize; 1057 IRStmt* st; 1058 Addr64 cia; /* address of current insn */ 1059 CgState cgs; 1060 IRTypeEnv* tyenv = sbIn->tyenv; 1061 InstrInfo* curr_inode = NULL; 1062 1063 if (gWordTy != hWordTy) { 1064 /* We don't currently support this case. */ 1065 VG_(tool_panic)("host/guest word size mismatch"); 1066 } 1067 1068 // Set up new SB 1069 cgs.sbOut = deepCopyIRSBExceptStmts(sbIn); 1070 1071 // Copy verbatim any IR preamble preceding the first IMark 1072 i = 0; 1073 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) { 1074 addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] ); 1075 i++; 1076 } 1077 1078 // Get the first statement, and initial cia from it 1079 tl_assert(sbIn->stmts_used > 0); 1080 tl_assert(i < sbIn->stmts_used); 1081 st = sbIn->stmts[i]; 1082 tl_assert(Ist_IMark == st->tag); 1083 1084 cia = st->Ist.IMark.addr; 1085 isize = st->Ist.IMark.len; 1086 // If Vex fails to decode an instruction, the size will be zero. 1087 // Pretend otherwise. 1088 if (isize == 0) isize = VG_MIN_INSTR_SZB; 1089 1090 // Set up running state and get block info 1091 tl_assert(closure->readdr == vge->base[0]); 1092 cgs.events_used = 0; 1093 cgs.sbInfo = get_SB_info(sbIn, (Addr)closure->readdr); 1094 cgs.sbInfo_i = 0; 1095 1096 if (DEBUG_CG) 1097 VG_(printf)("\n\n---------- cg_instrument ----------\n"); 1098 1099 // Traverse the block, initialising inodes, adding events and flushing as 1100 // necessary. 1101 for (/*use current i*/; i < sbIn->stmts_used; i++) { 1102 1103 st = sbIn->stmts[i]; 1104 tl_assert(isFlatIRStmt(st)); 1105 1106 switch (st->tag) { 1107 case Ist_NoOp: 1108 case Ist_AbiHint: 1109 case Ist_Put: 1110 case Ist_PutI: 1111 case Ist_MBE: 1112 break; 1113 1114 case Ist_IMark: 1115 cia = st->Ist.IMark.addr; 1116 isize = st->Ist.IMark.len; 1117 1118 // If Vex fails to decode an instruction, the size will be zero. 1119 // Pretend otherwise. 1120 if (isize == 0) isize = VG_MIN_INSTR_SZB; 1121 1122 // Sanity-check size. 1123 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB) 1124 || VG_CLREQ_SZB == isize ); 1125 1126 // Get space for and init the inode, record it as the current one. 1127 // Subsequent Dr/Dw/Dm events from the same instruction will 1128 // also use it. 1129 curr_inode = setup_InstrInfo(&cgs, cia, isize); 1130 1131 addEvent_Ir( &cgs, curr_inode ); 1132 break; 1133 1134 case Ist_WrTmp: { 1135 IRExpr* data = st->Ist.WrTmp.data; 1136 if (data->tag == Iex_Load) { 1137 IRExpr* aexpr = data->Iex.Load.addr; 1138 // Note also, endianness info is ignored. I guess 1139 // that's not interesting. 1140 addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty), 1141 aexpr ); 1142 } 1143 break; 1144 } 1145 1146 case Ist_Store: { 1147 IRExpr* data = st->Ist.Store.data; 1148 IRExpr* aexpr = st->Ist.Store.addr; 1149 addEvent_Dw( &cgs, curr_inode, 1150 sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr ); 1151 break; 1152 } 1153 1154 case Ist_StoreG: { 1155 IRStoreG* sg = st->Ist.StoreG.details; 1156 IRExpr* data = sg->data; 1157 IRExpr* addr = sg->addr; 1158 IRType type = typeOfIRExpr(tyenv, data); 1159 tl_assert(type != Ity_INVALID); 1160 addEvent_D_guarded( &cgs, curr_inode, 1161 sizeofIRType(type), addr, sg->guard, 1162 True/*isWrite*/ ); 1163 break; 1164 } 1165 1166 case Ist_LoadG: { 1167 IRLoadG* lg = st->Ist.LoadG.details; 1168 IRType type = Ity_INVALID; /* loaded type */ 1169 IRType typeWide = Ity_INVALID; /* after implicit widening */ 1170 IRExpr* addr = lg->addr; 1171 typeOfIRLoadGOp(lg->cvt, &typeWide, &type); 1172 tl_assert(type != Ity_INVALID); 1173 addEvent_D_guarded( &cgs, curr_inode, 1174 sizeofIRType(type), addr, lg->guard, 1175 False/*!isWrite*/ ); 1176 break; 1177 } 1178 1179 case Ist_Dirty: { 1180 Int dataSize; 1181 IRDirty* d = st->Ist.Dirty.details; 1182 if (d->mFx != Ifx_None) { 1183 /* This dirty helper accesses memory. Collect the details. */ 1184 tl_assert(d->mAddr != NULL); 1185 tl_assert(d->mSize != 0); 1186 dataSize = d->mSize; 1187 // Large (eg. 28B, 108B, 512B on x86) data-sized 1188 // instructions will be done inaccurately, but they're 1189 // very rare and this avoids errors from hitting more 1190 // than two cache lines in the simulation. 1191 if (dataSize > min_line_size) 1192 dataSize = min_line_size; 1193 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) 1194 addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr ); 1195 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) 1196 addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr ); 1197 } else { 1198 tl_assert(d->mAddr == NULL); 1199 tl_assert(d->mSize == 0); 1200 } 1201 break; 1202 } 1203 1204 case Ist_CAS: { 1205 /* We treat it as a read and a write of the location. I 1206 think that is the same behaviour as it was before IRCAS 1207 was introduced, since prior to that point, the Vex 1208 front ends would translate a lock-prefixed instruction 1209 into a (normal) read followed by a (normal) write. */ 1210 Int dataSize; 1211 IRCAS* cas = st->Ist.CAS.details; 1212 tl_assert(cas->addr != NULL); 1213 tl_assert(cas->dataLo != NULL); 1214 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo)); 1215 if (cas->dataHi != NULL) 1216 dataSize *= 2; /* since it's a doubleword-CAS */ 1217 /* I don't think this can ever happen, but play safe. */ 1218 if (dataSize > min_line_size) 1219 dataSize = min_line_size; 1220 addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr ); 1221 addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr ); 1222 break; 1223 } 1224 1225 case Ist_LLSC: { 1226 IRType dataTy; 1227 if (st->Ist.LLSC.storedata == NULL) { 1228 /* LL */ 1229 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result); 1230 addEvent_Dr( &cgs, curr_inode, 1231 sizeofIRType(dataTy), st->Ist.LLSC.addr ); 1232 /* flush events before LL, should help SC to succeed */ 1233 flushEvents( &cgs ); 1234 } else { 1235 /* SC */ 1236 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata); 1237 addEvent_Dw( &cgs, curr_inode, 1238 sizeofIRType(dataTy), st->Ist.LLSC.addr ); 1239 } 1240 break; 1241 } 1242 1243 case Ist_Exit: { 1244 // call branch predictor only if this is a branch in guest code 1245 if ( (st->Ist.Exit.jk == Ijk_Boring) || 1246 (st->Ist.Exit.jk == Ijk_Call) || 1247 (st->Ist.Exit.jk == Ijk_Ret) ) 1248 { 1249 /* Stuff to widen the guard expression to a host word, so 1250 we can pass it to the branch predictor simulation 1251 functions easily. */ 1252 Bool inverted; 1253 Addr64 nia, sea; 1254 IRConst* dst; 1255 IRType tyW = hWordTy; 1256 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64; 1257 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64; 1258 IRTemp guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1); 1259 IRTemp guardW = newIRTemp(cgs.sbOut->tyenv, tyW); 1260 IRTemp guard = newIRTemp(cgs.sbOut->tyenv, tyW); 1261 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1)) 1262 : IRExpr_Const(IRConst_U64(1)); 1263 1264 /* First we need to figure out whether the side exit got 1265 inverted by the ir optimiser. To do that, figure out 1266 the next (fallthrough) instruction's address and the 1267 side exit address and see if they are the same. */ 1268 nia = cia + (Addr64)isize; 1269 if (tyW == Ity_I32) 1270 nia &= 0xFFFFFFFFULL; 1271 1272 /* Side exit address */ 1273 dst = st->Ist.Exit.dst; 1274 if (tyW == Ity_I32) { 1275 tl_assert(dst->tag == Ico_U32); 1276 sea = (Addr64)(UInt)dst->Ico.U32; 1277 } else { 1278 tl_assert(tyW == Ity_I64); 1279 tl_assert(dst->tag == Ico_U64); 1280 sea = dst->Ico.U64; 1281 } 1282 1283 inverted = nia == sea; 1284 1285 /* Widen the guard expression. */ 1286 addStmtToIRSB( cgs.sbOut, 1287 IRStmt_WrTmp( guard1, st->Ist.Exit.guard )); 1288 addStmtToIRSB( cgs.sbOut, 1289 IRStmt_WrTmp( guardW, 1290 IRExpr_Unop(widen, 1291 IRExpr_RdTmp(guard1))) ); 1292 /* If the exit is inverted, invert the sense of the guard. */ 1293 addStmtToIRSB( 1294 cgs.sbOut, 1295 IRStmt_WrTmp( 1296 guard, 1297 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one) 1298 : IRExpr_RdTmp(guardW) 1299 )); 1300 /* And post the event. */ 1301 addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) ); 1302 } 1303 1304 /* We may never reach the next statement, so need to flush 1305 all outstanding transactions now. */ 1306 flushEvents( &cgs ); 1307 break; 1308 } 1309 1310 default: 1311 ppIRStmt(st); 1312 tl_assert(0); 1313 break; 1314 } 1315 1316 /* Copy the original statement */ 1317 addStmtToIRSB( cgs.sbOut, st ); 1318 1319 if (DEBUG_CG) { 1320 ppIRStmt(st); 1321 VG_(printf)("\n"); 1322 } 1323 } 1324 1325 /* Deal with branches to unknown destinations. Except ignore ones 1326 which are function returns as we assume the return stack 1327 predictor never mispredicts. */ 1328 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) { 1329 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); } 1330 switch (sbIn->next->tag) { 1331 case Iex_Const: 1332 break; /* boring - branch to known address */ 1333 case Iex_RdTmp: 1334 /* looks like an indirect branch (branch to unknown) */ 1335 addEvent_Bi( &cgs, curr_inode, sbIn->next ); 1336 break; 1337 default: 1338 /* shouldn't happen - if the incoming IR is properly 1339 flattened, should only have tmp and const cases to 1340 consider. */ 1341 tl_assert(0); 1342 } 1343 } 1344 1345 /* At the end of the bb. Flush outstandings. */ 1346 flushEvents( &cgs ); 1347 1348 /* done. stay sane ... */ 1349 tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs); 1350 1351 if (DEBUG_CG) { 1352 VG_(printf)( "goto {"); 1353 ppIRJumpKind(sbIn->jumpkind); 1354 VG_(printf)( "} "); 1355 ppIRExpr( sbIn->next ); 1356 VG_(printf)( "}\n"); 1357 } 1358 1359 return cgs.sbOut; 1360 } 1361 1362 /*------------------------------------------------------------*/ 1363 /*--- Cache configuration ---*/ 1364 /*------------------------------------------------------------*/ 1365 1366 #define UNDEFINED_CACHE { -1, -1, -1 } 1367 1368 static cache_t clo_I1_cache = UNDEFINED_CACHE; 1369 static cache_t clo_D1_cache = UNDEFINED_CACHE; 1370 static cache_t clo_LL_cache = UNDEFINED_CACHE; 1371 1372 /*------------------------------------------------------------*/ 1373 /*--- cg_fini() and related function ---*/ 1374 /*------------------------------------------------------------*/ 1375 1376 // Total reads/writes/misses. Calculated during CC traversal at the end. 1377 // All auto-zeroed. 1378 static CacheCC Ir_total; 1379 static CacheCC Dr_total; 1380 static CacheCC Dw_total; 1381 static BranchCC Bc_total; 1382 static BranchCC Bi_total; 1383 1384 static void fprint_CC_table_and_calc_totals(void) 1385 { 1386 Int i, fd; 1387 SysRes sres; 1388 HChar buf[512]; 1389 HChar *currFile = NULL, *currFn = NULL; 1390 LineCC* lineCC; 1391 1392 // Setup output filename. Nb: it's important to do this now, ie. as late 1393 // as possible. If we do it at start-up and the program forks and the 1394 // output file format string contains a %p (pid) specifier, both the 1395 // parent and child will incorrectly write to the same file; this 1396 // happened in 3.3.0. 1397 HChar* cachegrind_out_file = 1398 VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file); 1399 1400 sres = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY, 1401 VKI_S_IRUSR|VKI_S_IWUSR); 1402 if (sr_isError(sres)) { 1403 // If the file can't be opened for whatever reason (conflict 1404 // between multiple cachegrinded processes?), give up now. 1405 VG_(umsg)("error: can't open cache simulation output file '%s'\n", 1406 cachegrind_out_file ); 1407 VG_(umsg)(" ... so simulation results will be missing.\n"); 1408 VG_(free)(cachegrind_out_file); 1409 return; 1410 } else { 1411 fd = sr_Res(sres); 1412 VG_(free)(cachegrind_out_file); 1413 } 1414 1415 // "desc:" lines (giving I1/D1/LL cache configuration). The spaces after 1416 // the 2nd colon makes cg_annotate's output look nicer. 1417 VG_(sprintf)(buf, "desc: I1 cache: %s\n" 1418 "desc: D1 cache: %s\n" 1419 "desc: LL cache: %s\n", 1420 I1.desc_line, D1.desc_line, LL.desc_line); 1421 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); 1422 1423 // "cmd:" line 1424 VG_(strcpy)(buf, "cmd:"); 1425 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); 1426 if (VG_(args_the_exename)) { 1427 VG_(write)(fd, " ", 1); 1428 VG_(write)(fd, VG_(args_the_exename), 1429 VG_(strlen)( VG_(args_the_exename) )); 1430 } 1431 for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) { 1432 HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i ); 1433 if (arg) { 1434 VG_(write)(fd, " ", 1); 1435 VG_(write)(fd, arg, VG_(strlen)( arg )); 1436 } 1437 } 1438 // "events:" line 1439 if (clo_cache_sim && clo_branch_sim) { 1440 VG_(sprintf)(buf, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw " 1441 "Bc Bcm Bi Bim\n"); 1442 } 1443 else if (clo_cache_sim && !clo_branch_sim) { 1444 VG_(sprintf)(buf, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw " 1445 "\n"); 1446 } 1447 else if (!clo_cache_sim && clo_branch_sim) { 1448 VG_(sprintf)(buf, "\nevents: Ir " 1449 "Bc Bcm Bi Bim\n"); 1450 } 1451 else { 1452 VG_(sprintf)(buf, "\nevents: Ir\n"); 1453 } 1454 1455 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); 1456 1457 // Traverse every lineCC 1458 VG_(OSetGen_ResetIter)(CC_table); 1459 while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) { 1460 Bool just_hit_a_new_file = False; 1461 // If we've hit a new file, print a "fl=" line. Note that because 1462 // each string is stored exactly once in the string table, we can use 1463 // pointer comparison rather than strcmp() to test for equality, which 1464 // is good because most of the time the comparisons are equal and so 1465 // the whole strings would have to be checked. 1466 if ( lineCC->loc.file != currFile ) { 1467 currFile = lineCC->loc.file; 1468 VG_(sprintf)(buf, "fl=%s\n", currFile); 1469 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); 1470 distinct_files++; 1471 just_hit_a_new_file = True; 1472 } 1473 // If we've hit a new function, print a "fn=" line. We know to do 1474 // this when the function name changes, and also every time we hit a 1475 // new file (in which case the new function name might be the same as 1476 // in the old file, hence the just_hit_a_new_file test). 1477 if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) { 1478 currFn = lineCC->loc.fn; 1479 VG_(sprintf)(buf, "fn=%s\n", currFn); 1480 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); 1481 distinct_fns++; 1482 } 1483 1484 // Print the LineCC 1485 if (clo_cache_sim && clo_branch_sim) { 1486 VG_(sprintf)(buf, "%u %llu %llu %llu" 1487 " %llu %llu %llu" 1488 " %llu %llu %llu" 1489 " %llu %llu %llu %llu\n", 1490 lineCC->loc.line, 1491 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL, 1492 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL, 1493 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL, 1494 lineCC->Bc.b, lineCC->Bc.mp, 1495 lineCC->Bi.b, lineCC->Bi.mp); 1496 } 1497 else if (clo_cache_sim && !clo_branch_sim) { 1498 VG_(sprintf)(buf, "%u %llu %llu %llu" 1499 " %llu %llu %llu" 1500 " %llu %llu %llu\n", 1501 lineCC->loc.line, 1502 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL, 1503 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL, 1504 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL); 1505 } 1506 else if (!clo_cache_sim && clo_branch_sim) { 1507 VG_(sprintf)(buf, "%u %llu" 1508 " %llu %llu %llu %llu\n", 1509 lineCC->loc.line, 1510 lineCC->Ir.a, 1511 lineCC->Bc.b, lineCC->Bc.mp, 1512 lineCC->Bi.b, lineCC->Bi.mp); 1513 } 1514 else { 1515 VG_(sprintf)(buf, "%u %llu\n", 1516 lineCC->loc.line, 1517 lineCC->Ir.a); 1518 } 1519 1520 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); 1521 1522 // Update summary stats 1523 Ir_total.a += lineCC->Ir.a; 1524 Ir_total.m1 += lineCC->Ir.m1; 1525 Ir_total.mL += lineCC->Ir.mL; 1526 Dr_total.a += lineCC->Dr.a; 1527 Dr_total.m1 += lineCC->Dr.m1; 1528 Dr_total.mL += lineCC->Dr.mL; 1529 Dw_total.a += lineCC->Dw.a; 1530 Dw_total.m1 += lineCC->Dw.m1; 1531 Dw_total.mL += lineCC->Dw.mL; 1532 Bc_total.b += lineCC->Bc.b; 1533 Bc_total.mp += lineCC->Bc.mp; 1534 Bi_total.b += lineCC->Bi.b; 1535 Bi_total.mp += lineCC->Bi.mp; 1536 1537 distinct_lines++; 1538 } 1539 1540 // Summary stats must come after rest of table, since we calculate them 1541 // during traversal. */ 1542 if (clo_cache_sim && clo_branch_sim) { 1543 VG_(sprintf)(buf, "summary:" 1544 " %llu %llu %llu" 1545 " %llu %llu %llu" 1546 " %llu %llu %llu" 1547 " %llu %llu %llu %llu\n", 1548 Ir_total.a, Ir_total.m1, Ir_total.mL, 1549 Dr_total.a, Dr_total.m1, Dr_total.mL, 1550 Dw_total.a, Dw_total.m1, Dw_total.mL, 1551 Bc_total.b, Bc_total.mp, 1552 Bi_total.b, Bi_total.mp); 1553 } 1554 else if (clo_cache_sim && !clo_branch_sim) { 1555 VG_(sprintf)(buf, "summary:" 1556 " %llu %llu %llu" 1557 " %llu %llu %llu" 1558 " %llu %llu %llu\n", 1559 Ir_total.a, Ir_total.m1, Ir_total.mL, 1560 Dr_total.a, Dr_total.m1, Dr_total.mL, 1561 Dw_total.a, Dw_total.m1, Dw_total.mL); 1562 } 1563 else if (!clo_cache_sim && clo_branch_sim) { 1564 VG_(sprintf)(buf, "summary:" 1565 " %llu" 1566 " %llu %llu %llu %llu\n", 1567 Ir_total.a, 1568 Bc_total.b, Bc_total.mp, 1569 Bi_total.b, Bi_total.mp); 1570 } 1571 else { 1572 VG_(sprintf)(buf, "summary:" 1573 " %llu\n", 1574 Ir_total.a); 1575 } 1576 1577 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); 1578 VG_(close)(fd); 1579 } 1580 1581 static UInt ULong_width(ULong n) 1582 { 1583 UInt w = 0; 1584 while (n > 0) { 1585 n = n / 10; 1586 w++; 1587 } 1588 if (w == 0) w = 1; 1589 return w + (w-1)/3; // add space for commas 1590 } 1591 1592 static void cg_fini(Int exitcode) 1593 { 1594 static HChar buf1[128], buf2[128], buf3[128], buf4[123]; 1595 static HChar fmt[128]; 1596 1597 CacheCC D_total; 1598 BranchCC B_total; 1599 ULong LL_total_m, LL_total_mr, LL_total_mw, 1600 LL_total, LL_total_r, LL_total_w; 1601 Int l1, l2, l3; 1602 1603 fprint_CC_table_and_calc_totals(); 1604 1605 if (VG_(clo_verbosity) == 0) 1606 return; 1607 1608 // Nb: this isn't called "MAX" because that overshadows a global on Darwin. 1609 #define CG_MAX(a, b) ((a) >= (b) ? (a) : (b)) 1610 1611 /* I cache results. Use the I_refs value to determine the first column 1612 * width. */ 1613 l1 = ULong_width(Ir_total.a); 1614 l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b)); 1615 l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b)); 1616 1617 /* Make format string, getting width right for numbers */ 1618 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1); 1619 1620 /* Always print this */ 1621 VG_(umsg)(fmt, "I refs: ", Ir_total.a); 1622 1623 /* If cache profiling is enabled, show D access numbers and all 1624 miss numbers */ 1625 if (clo_cache_sim) { 1626 VG_(umsg)(fmt, "I1 misses: ", Ir_total.m1); 1627 VG_(umsg)(fmt, "LLi misses: ", Ir_total.mL); 1628 1629 if (0 == Ir_total.a) Ir_total.a = 1; 1630 VG_(percentify)(Ir_total.m1, Ir_total.a, 2, l1+1, buf1); 1631 VG_(umsg)("I1 miss rate: %s\n", buf1); 1632 1633 VG_(percentify)(Ir_total.mL, Ir_total.a, 2, l1+1, buf1); 1634 VG_(umsg)("LLi miss rate: %s\n", buf1); 1635 VG_(umsg)("\n"); 1636 1637 /* D cache results. Use the D_refs.rd and D_refs.wr values to 1638 * determine the width of columns 2 & 3. */ 1639 D_total.a = Dr_total.a + Dw_total.a; 1640 D_total.m1 = Dr_total.m1 + Dw_total.m1; 1641 D_total.mL = Dr_total.mL + Dw_total.mL; 1642 1643 /* Make format string, getting width right for numbers */ 1644 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu rd + %%,%dllu wr)\n", 1645 l1, l2, l3); 1646 1647 VG_(umsg)(fmt, "D refs: ", 1648 D_total.a, Dr_total.a, Dw_total.a); 1649 VG_(umsg)(fmt, "D1 misses: ", 1650 D_total.m1, Dr_total.m1, Dw_total.m1); 1651 VG_(umsg)(fmt, "LLd misses: ", 1652 D_total.mL, Dr_total.mL, Dw_total.mL); 1653 1654 if (0 == D_total.a) D_total.a = 1; 1655 if (0 == Dr_total.a) Dr_total.a = 1; 1656 if (0 == Dw_total.a) Dw_total.a = 1; 1657 VG_(percentify)( D_total.m1, D_total.a, 1, l1+1, buf1); 1658 VG_(percentify)(Dr_total.m1, Dr_total.a, 1, l2+1, buf2); 1659 VG_(percentify)(Dw_total.m1, Dw_total.a, 1, l3+1, buf3); 1660 VG_(umsg)("D1 miss rate: %s (%s + %s )\n", buf1, buf2,buf3); 1661 1662 VG_(percentify)( D_total.mL, D_total.a, 1, l1+1, buf1); 1663 VG_(percentify)(Dr_total.mL, Dr_total.a, 1, l2+1, buf2); 1664 VG_(percentify)(Dw_total.mL, Dw_total.a, 1, l3+1, buf3); 1665 VG_(umsg)("LLd miss rate: %s (%s + %s )\n", buf1, buf2,buf3); 1666 VG_(umsg)("\n"); 1667 1668 /* LL overall results */ 1669 1670 LL_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1; 1671 LL_total_r = Dr_total.m1 + Ir_total.m1; 1672 LL_total_w = Dw_total.m1; 1673 VG_(umsg)(fmt, "LL refs: ", 1674 LL_total, LL_total_r, LL_total_w); 1675 1676 LL_total_m = Dr_total.mL + Dw_total.mL + Ir_total.mL; 1677 LL_total_mr = Dr_total.mL + Ir_total.mL; 1678 LL_total_mw = Dw_total.mL; 1679 VG_(umsg)(fmt, "LL misses: ", 1680 LL_total_m, LL_total_mr, LL_total_mw); 1681 1682 VG_(percentify)(LL_total_m, (Ir_total.a + D_total.a), 1, l1+1, buf1); 1683 VG_(percentify)(LL_total_mr, (Ir_total.a + Dr_total.a), 1, l2+1, buf2); 1684 VG_(percentify)(LL_total_mw, Dw_total.a, 1, l3+1, buf3); 1685 VG_(umsg)("LL miss rate: %s (%s + %s )\n", buf1, buf2,buf3); 1686 } 1687 1688 /* If branch profiling is enabled, show branch overall results. */ 1689 if (clo_branch_sim) { 1690 /* Make format string, getting width right for numbers */ 1691 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n", 1692 l1, l2, l3); 1693 1694 if (0 == Bc_total.b) Bc_total.b = 1; 1695 if (0 == Bi_total.b) Bi_total.b = 1; 1696 B_total.b = Bc_total.b + Bi_total.b; 1697 B_total.mp = Bc_total.mp + Bi_total.mp; 1698 1699 VG_(umsg)("\n"); 1700 VG_(umsg)(fmt, "Branches: ", 1701 B_total.b, Bc_total.b, Bi_total.b); 1702 1703 VG_(umsg)(fmt, "Mispredicts: ", 1704 B_total.mp, Bc_total.mp, Bi_total.mp); 1705 1706 VG_(percentify)(B_total.mp, B_total.b, 1, l1+1, buf1); 1707 VG_(percentify)(Bc_total.mp, Bc_total.b, 1, l2+1, buf2); 1708 VG_(percentify)(Bi_total.mp, Bi_total.b, 1, l3+1, buf3); 1709 1710 VG_(umsg)("Mispred rate: %s (%s + %s )\n", buf1, buf2,buf3); 1711 } 1712 1713 // Various stats 1714 if (VG_(clo_stats)) { 1715 Int debug_lookups = full_debugs + fn_debugs + 1716 file_line_debugs + no_debugs; 1717 1718 VG_(dmsg)("\n"); 1719 VG_(dmsg)("cachegrind: distinct files : %d\n", distinct_files); 1720 VG_(dmsg)("cachegrind: distinct functions : %d\n", distinct_fns); 1721 VG_(dmsg)("cachegrind: distinct lines : %d\n", distinct_lines); 1722 VG_(dmsg)("cachegrind: distinct instrs NoX: %d\n", distinct_instrsNoX); 1723 VG_(dmsg)("cachegrind: distinct instrs Gen: %d\n", distinct_instrsGen); 1724 VG_(dmsg)("cachegrind: debug lookups : %d\n", debug_lookups); 1725 1726 VG_(percentify)(full_debugs, debug_lookups, 1, 6, buf1); 1727 VG_(percentify)(file_line_debugs, debug_lookups, 1, 6, buf2); 1728 VG_(percentify)(fn_debugs, debug_lookups, 1, 6, buf3); 1729 VG_(percentify)(no_debugs, debug_lookups, 1, 6, buf4); 1730 VG_(dmsg)("cachegrind: with full info:%s (%d)\n", 1731 buf1, full_debugs); 1732 VG_(dmsg)("cachegrind: with file/line info:%s (%d)\n", 1733 buf2, file_line_debugs); 1734 VG_(dmsg)("cachegrind: with fn name info:%s (%d)\n", 1735 buf3, fn_debugs); 1736 VG_(dmsg)("cachegrind: with zero info:%s (%d)\n", 1737 buf4, no_debugs); 1738 1739 VG_(dmsg)("cachegrind: string table size: %lu\n", 1740 VG_(OSetGen_Size)(stringTable)); 1741 VG_(dmsg)("cachegrind: CC table size: %lu\n", 1742 VG_(OSetGen_Size)(CC_table)); 1743 VG_(dmsg)("cachegrind: InstrInfo table size: %lu\n", 1744 VG_(OSetGen_Size)(instrInfoTable)); 1745 } 1746 } 1747 1748 /*--------------------------------------------------------------------*/ 1749 /*--- Discarding BB info ---*/ 1750 /*--------------------------------------------------------------------*/ 1751 1752 // Called when a translation is removed from the translation cache for 1753 // any reason at all: to free up space, because the guest code was 1754 // unmapped or modified, or for any arbitrary reason. 1755 static 1756 void cg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge ) 1757 { 1758 SB_info* sbInfo; 1759 Addr orig_addr = (Addr)vge.base[0]; 1760 1761 tl_assert(vge.n_used > 0); 1762 1763 if (DEBUG_CG) 1764 VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n", 1765 (void*)(Addr)orig_addr, 1766 (void*)(Addr)vge.base[0], (ULong)vge.len[0]); 1767 1768 // Get BB info, remove from table, free BB info. Simple! Note that we 1769 // use orig_addr, not the first instruction address in vge. 1770 sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr); 1771 tl_assert(NULL != sbInfo); 1772 VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo); 1773 } 1774 1775 /*--------------------------------------------------------------------*/ 1776 /*--- Command line processing ---*/ 1777 /*--------------------------------------------------------------------*/ 1778 1779 static Bool cg_process_cmd_line_option(const HChar* arg) 1780 { 1781 if (VG_(str_clo_cache_opt)(arg, 1782 &clo_I1_cache, 1783 &clo_D1_cache, 1784 &clo_LL_cache)) {} 1785 1786 else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {} 1787 else if VG_BOOL_CLO(arg, "--cache-sim", clo_cache_sim) {} 1788 else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {} 1789 else 1790 return False; 1791 1792 return True; 1793 } 1794 1795 static void cg_print_usage(void) 1796 { 1797 VG_(print_cache_clo_opts)(); 1798 VG_(printf)( 1799 " --cache-sim=yes|no [yes] collect cache stats?\n" 1800 " --branch-sim=yes|no [no] collect branch prediction stats?\n" 1801 " --cachegrind-out-file=<file> output file name [cachegrind.out.%%p]\n" 1802 ); 1803 } 1804 1805 static void cg_print_debug_usage(void) 1806 { 1807 VG_(printf)( 1808 " (none)\n" 1809 ); 1810 } 1811 1812 /*--------------------------------------------------------------------*/ 1813 /*--- Setup ---*/ 1814 /*--------------------------------------------------------------------*/ 1815 1816 static void cg_post_clo_init(void); /* just below */ 1817 1818 static void cg_pre_clo_init(void) 1819 { 1820 VG_(details_name) ("Cachegrind"); 1821 VG_(details_version) (NULL); 1822 VG_(details_description) ("a cache and branch-prediction profiler"); 1823 VG_(details_copyright_author)( 1824 "Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al."); 1825 VG_(details_bug_reports_to) (VG_BUGS_TO); 1826 VG_(details_avg_translation_sizeB) ( 500 ); 1827 1828 VG_(clo_vex_control).iropt_register_updates 1829 = VexRegUpdSpAtMemAccess; // overridable by the user. 1830 VG_(basic_tool_funcs) (cg_post_clo_init, 1831 cg_instrument, 1832 cg_fini); 1833 1834 VG_(needs_superblock_discards)(cg_discard_superblock_info); 1835 VG_(needs_command_line_options)(cg_process_cmd_line_option, 1836 cg_print_usage, 1837 cg_print_debug_usage); 1838 } 1839 1840 static void cg_post_clo_init(void) 1841 { 1842 cache_t I1c, D1c, LLc; 1843 1844 CC_table = 1845 VG_(OSetGen_Create)(offsetof(LineCC, loc), 1846 cmp_CodeLoc_LineCC, 1847 VG_(malloc), "cg.main.cpci.1", 1848 VG_(free)); 1849 instrInfoTable = 1850 VG_(OSetGen_Create)(/*keyOff*/0, 1851 NULL, 1852 VG_(malloc), "cg.main.cpci.2", 1853 VG_(free)); 1854 stringTable = 1855 VG_(OSetGen_Create)(/*keyOff*/0, 1856 stringCmp, 1857 VG_(malloc), "cg.main.cpci.3", 1858 VG_(free)); 1859 1860 VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc, 1861 &clo_I1_cache, 1862 &clo_D1_cache, 1863 &clo_LL_cache); 1864 1865 // min_line_size is used to make sure that we never feed 1866 // accesses to the simulator straddling more than two 1867 // cache lines at any cache level 1868 min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size; 1869 min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size; 1870 1871 Int largest_load_or_store_size 1872 = VG_(machine_get_size_of_largest_guest_register)(); 1873 if (min_line_size < largest_load_or_store_size) { 1874 /* We can't continue, because the cache simulation might 1875 straddle more than 2 lines, and it will assert. So let's 1876 just stop before we start. */ 1877 VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n", 1878 (Int)min_line_size); 1879 VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n", 1880 largest_load_or_store_size ); 1881 VG_(umsg)(" but it is not. Exiting now.\n"); 1882 VG_(exit)(1); 1883 } 1884 1885 cachesim_initcaches(I1c, D1c, LLc); 1886 } 1887 1888 VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init) 1889 1890 /*--------------------------------------------------------------------*/ 1891 /*--- end ---*/ 1892 /*--------------------------------------------------------------------*/ 1893 1894