1 /*--------------------------------------------------------------------*/ 2 /*--- Cache simulation. ---*/ 3 /*--- sim.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Callgrind, a Valgrind tool for call graph 8 profiling programs. 9 10 Copyright (C) 2003-2012, Josef Weidendorfer (Josef.Weidendorfer (at) gmx.de) 11 12 This tool is derived from and contains code from Cachegrind 13 Copyright (C) 2002-2012 Nicholas Nethercote (njn (at) valgrind.org) 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31 */ 32 33 #include "global.h" 34 35 36 /* Notes: 37 - simulates a write-allocate cache 38 - (block --> set) hash function uses simple bit selection 39 - handling of references straddling two cache blocks: 40 - counts as only one cache access (not two) 41 - both blocks hit --> one hit 42 - one block hits, the other misses --> one miss 43 - both blocks miss --> one miss (not two) 44 */ 45 46 /* Cache configuration */ 47 #include "cg_arch.h" 48 49 /* additional structures for cache use info, separated 50 * according usage frequency: 51 * - line_loaded : pointer to cost center of instruction 52 * which loaded the line into cache. 53 * Needed to increment counters when line is evicted. 54 * - line_use : updated on every access 55 */ 56 typedef struct { 57 UInt count; 58 UInt mask; /* e.g. for 64Byte line size 1bit/2Byte */ 59 } line_use; 60 61 typedef struct { 62 Addr memline, iaddr; 63 line_use* dep_use; /* point to higher-level cacheblock for this memline */ 64 ULong* use_base; 65 } line_loaded; 66 67 /* Cache state */ 68 typedef struct { 69 char* name; 70 int size; /* bytes */ 71 int assoc; 72 int line_size; /* bytes */ 73 Bool sectored; /* prefetch nearside cacheline on read */ 74 int sets; 75 int sets_min_1; 76 int line_size_bits; 77 int tag_shift; 78 UWord tag_mask; 79 char desc_line[128]; 80 UWord* tags; 81 82 /* for cache use */ 83 int line_size_mask; 84 int* line_start_mask; 85 int* line_end_mask; 86 line_loaded* loaded; 87 line_use* use; 88 } cache_t2; 89 90 /* 91 * States of flat caches in our model. 92 * We use a 2-level hierarchy, 93 */ 94 static cache_t2 I1, D1, LL; 95 96 /* Lower bits of cache tags are used as flags for a cache line */ 97 #define CACHELINE_FLAGMASK (MIN_LINE_SIZE-1) 98 #define CACHELINE_DIRTY 1 99 100 101 /* Cache simulator Options */ 102 static Bool clo_simulate_writeback = False; 103 static Bool clo_simulate_hwpref = False; 104 static Bool clo_simulate_sectors = False; 105 static Bool clo_collect_cacheuse = False; 106 107 /* Following global vars are setup before by setup_bbcc(): 108 * 109 * - Addr CLG_(bb_base) (instruction start address of original BB) 110 * - ULong* CLG_(cost_base) (start of cost array for BB) 111 */ 112 113 Addr CLG_(bb_base); 114 ULong* CLG_(cost_base); 115 116 static InstrInfo* current_ii; 117 118 /* Cache use offsets */ 119 /* The offsets are only correct because all per-instruction event sets get 120 * the "Use" set added first ! 121 */ 122 static Int off_I1_AcCost = 0; 123 static Int off_I1_SpLoss = 1; 124 static Int off_D1_AcCost = 0; 125 static Int off_D1_SpLoss = 1; 126 static Int off_LL_AcCost = 2; 127 static Int off_LL_SpLoss = 3; 128 129 /* Cache access types */ 130 typedef enum { Read = 0, Write = CACHELINE_DIRTY } RefType; 131 132 /* Result of a reference into a flat cache */ 133 typedef enum { Hit = 0, Miss, MissDirty } CacheResult; 134 135 /* Result of a reference into a hierarchical cache model */ 136 typedef enum { 137 L1_Hit, 138 LL_Hit, 139 MemAccess, 140 WriteBackMemAccess } CacheModelResult; 141 142 typedef CacheModelResult (*simcall_type)(Addr, UChar); 143 144 static struct { 145 simcall_type I1_Read; 146 simcall_type D1_Read; 147 simcall_type D1_Write; 148 } simulator; 149 150 /*------------------------------------------------------------*/ 151 /*--- Cache Simulator Initialization ---*/ 152 /*------------------------------------------------------------*/ 153 154 static void cachesim_clearcache(cache_t2* c) 155 { 156 Int i; 157 158 for (i = 0; i < c->sets * c->assoc; i++) 159 c->tags[i] = 0; 160 if (c->use) { 161 for (i = 0; i < c->sets * c->assoc; i++) { 162 c->loaded[i].memline = 0; 163 c->loaded[i].use_base = 0; 164 c->loaded[i].dep_use = 0; 165 c->loaded[i].iaddr = 0; 166 c->use[i].mask = 0; 167 c->use[i].count = 0; 168 c->tags[i] = i % c->assoc; /* init lower bits as pointer */ 169 } 170 } 171 } 172 173 static void cacheuse_initcache(cache_t2* c); 174 175 /* By this point, the size/assoc/line_size has been checked. */ 176 static void cachesim_initcache(cache_t config, cache_t2* c) 177 { 178 c->size = config.size; 179 c->assoc = config.assoc; 180 c->line_size = config.line_size; 181 c->sectored = False; // FIXME 182 183 c->sets = (c->size / c->line_size) / c->assoc; 184 c->sets_min_1 = c->sets - 1; 185 c->line_size_bits = VG_(log2)(c->line_size); 186 c->tag_shift = c->line_size_bits + VG_(log2)(c->sets); 187 c->tag_mask = ~((1<<c->tag_shift)-1); 188 189 /* Can bits in tag entries be used for flags? 190 * Should be always true as MIN_LINE_SIZE >= 16 */ 191 CLG_ASSERT( (c->tag_mask & CACHELINE_FLAGMASK) == 0); 192 193 if (c->assoc == 1) { 194 VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped%s", 195 c->size, c->line_size, 196 c->sectored ? ", sectored":""); 197 } else { 198 VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative%s", 199 c->size, c->line_size, c->assoc, 200 c->sectored ? ", sectored":""); 201 } 202 203 c->tags = (UWord*) CLG_MALLOC("cl.sim.cs_ic.1", 204 sizeof(UWord) * c->sets * c->assoc); 205 if (clo_collect_cacheuse) 206 cacheuse_initcache(c); 207 else 208 c->use = 0; 209 cachesim_clearcache(c); 210 } 211 212 213 #if 0 214 static void print_cache(cache_t2* c) 215 { 216 UInt set, way, i; 217 218 /* Note initialisation and update of 'i'. */ 219 for (i = 0, set = 0; set < c->sets; set++) { 220 for (way = 0; way < c->assoc; way++, i++) { 221 VG_(printf)("%8x ", c->tags[i]); 222 } 223 VG_(printf)("\n"); 224 } 225 } 226 #endif 227 228 229 /*------------------------------------------------------------*/ 230 /*--- Write Through Cache Simulation ---*/ 231 /*------------------------------------------------------------*/ 232 233 /* 234 * Simple model: L1 & LL Write Through 235 * Does not distinguish among read and write references 236 * 237 * Simulator functions: 238 * CacheModelResult cachesim_I1_ref(Addr a, UChar size) 239 * CacheModelResult cachesim_D1_ref(Addr a, UChar size) 240 */ 241 242 static __inline__ 243 CacheResult cachesim_setref(cache_t2* c, UInt set_no, UWord tag) 244 { 245 int i, j; 246 UWord *set; 247 248 set = &(c->tags[set_no * c->assoc]); 249 250 /* This loop is unrolled for just the first case, which is the most */ 251 /* common. We can't unroll any further because it would screw up */ 252 /* if we have a direct-mapped (1-way) cache. */ 253 if (tag == set[0]) 254 return Hit; 255 256 /* If the tag is one other than the MRU, move it into the MRU spot */ 257 /* and shuffle the rest down. */ 258 for (i = 1; i < c->assoc; i++) { 259 if (tag == set[i]) { 260 for (j = i; j > 0; j--) { 261 set[j] = set[j - 1]; 262 } 263 set[0] = tag; 264 return Hit; 265 } 266 } 267 268 /* A miss; install this tag as MRU, shuffle rest down. */ 269 for (j = c->assoc - 1; j > 0; j--) { 270 set[j] = set[j - 1]; 271 } 272 set[0] = tag; 273 274 return Miss; 275 } 276 277 static CacheResult cachesim_ref(cache_t2* c, Addr a, UChar size) 278 { 279 UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1); 280 UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1); 281 UWord tag = a >> c->tag_shift; 282 283 /* Access entirely within line. */ 284 if (set1 == set2) 285 return cachesim_setref(c, set1, tag); 286 287 /* Access straddles two lines. */ 288 /* Nb: this is a fast way of doing ((set1+1) % c->sets) */ 289 else if (((set1 + 1) & (c->sets_min_1)) == set2) { 290 UWord tag2 = (a+size-1) >> c->tag_shift; 291 292 /* the call updates cache structures as side effect */ 293 CacheResult res1 = cachesim_setref(c, set1, tag); 294 CacheResult res2 = cachesim_setref(c, set2, tag2); 295 return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit; 296 297 } else { 298 VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2); 299 VG_(tool_panic)("item straddles more than two cache sets"); 300 } 301 return Hit; 302 } 303 304 static 305 CacheModelResult cachesim_I1_ref(Addr a, UChar size) 306 { 307 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit; 308 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit; 309 return MemAccess; 310 } 311 312 static 313 CacheModelResult cachesim_D1_ref(Addr a, UChar size) 314 { 315 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit; 316 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit; 317 return MemAccess; 318 } 319 320 321 /*------------------------------------------------------------*/ 322 /*--- Write Back Cache Simulation ---*/ 323 /*------------------------------------------------------------*/ 324 325 /* 326 * More complex model: L1 Write-through, LL Write-back 327 * This needs to distinguish among read and write references. 328 * 329 * Simulator functions: 330 * CacheModelResult cachesim_I1_Read(Addr a, UChar size) 331 * CacheModelResult cachesim_D1_Read(Addr a, UChar size) 332 * CacheModelResult cachesim_D1_Write(Addr a, UChar size) 333 */ 334 335 /* 336 * With write-back, result can be a miss evicting a dirty line 337 * The dirty state of a cache line is stored in Bit0 of the tag for 338 * this cache line (CACHELINE_DIRTY = 1). By OR'ing the reference 339 * type (Read/Write), the line gets dirty on a write. 340 */ 341 static __inline__ 342 CacheResult cachesim_setref_wb(cache_t2* c, RefType ref, UInt set_no, UWord tag) 343 { 344 int i, j; 345 UWord *set, tmp_tag; 346 347 set = &(c->tags[set_no * c->assoc]); 348 349 /* This loop is unrolled for just the first case, which is the most */ 350 /* common. We can't unroll any further because it would screw up */ 351 /* if we have a direct-mapped (1-way) cache. */ 352 if (tag == (set[0] & ~CACHELINE_DIRTY)) { 353 set[0] |= ref; 354 return Hit; 355 } 356 /* If the tag is one other than the MRU, move it into the MRU spot */ 357 /* and shuffle the rest down. */ 358 for (i = 1; i < c->assoc; i++) { 359 if (tag == (set[i] & ~CACHELINE_DIRTY)) { 360 tmp_tag = set[i] | ref; // update dirty flag 361 for (j = i; j > 0; j--) { 362 set[j] = set[j - 1]; 363 } 364 set[0] = tmp_tag; 365 return Hit; 366 } 367 } 368 369 /* A miss; install this tag as MRU, shuffle rest down. */ 370 tmp_tag = set[c->assoc - 1]; 371 for (j = c->assoc - 1; j > 0; j--) { 372 set[j] = set[j - 1]; 373 } 374 set[0] = tag | ref; 375 376 return (tmp_tag & CACHELINE_DIRTY) ? MissDirty : Miss; 377 } 378 379 380 static __inline__ 381 CacheResult cachesim_ref_wb(cache_t2* c, RefType ref, Addr a, UChar size) 382 { 383 UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1); 384 UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1); 385 UWord tag = a & c->tag_mask; 386 387 /* Access entirely within line. */ 388 if (set1 == set2) 389 return cachesim_setref_wb(c, ref, set1, tag); 390 391 /* Access straddles two lines. */ 392 /* Nb: this is a fast way of doing ((set1+1) % c->sets) */ 393 else if (((set1 + 1) & (c->sets_min_1)) == set2) { 394 UWord tag2 = (a+size-1) & c->tag_mask; 395 396 /* the call updates cache structures as side effect */ 397 CacheResult res1 = cachesim_setref_wb(c, ref, set1, tag); 398 CacheResult res2 = cachesim_setref_wb(c, ref, set2, tag2); 399 400 if ((res1 == MissDirty) || (res2 == MissDirty)) return MissDirty; 401 return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit; 402 403 } else { 404 VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2); 405 VG_(tool_panic)("item straddles more than two cache sets"); 406 } 407 return Hit; 408 } 409 410 411 static 412 CacheModelResult cachesim_I1_Read(Addr a, UChar size) 413 { 414 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit; 415 switch( cachesim_ref_wb( &LL, Read, a, size) ) { 416 case Hit: return LL_Hit; 417 case Miss: return MemAccess; 418 default: break; 419 } 420 return WriteBackMemAccess; 421 } 422 423 static 424 CacheModelResult cachesim_D1_Read(Addr a, UChar size) 425 { 426 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit; 427 switch( cachesim_ref_wb( &LL, Read, a, size) ) { 428 case Hit: return LL_Hit; 429 case Miss: return MemAccess; 430 default: break; 431 } 432 return WriteBackMemAccess; 433 } 434 435 static 436 CacheModelResult cachesim_D1_Write(Addr a, UChar size) 437 { 438 if ( cachesim_ref( &D1, a, size) == Hit ) { 439 /* Even for a L1 hit, the write-trough L1 passes 440 * the write to the LL to make the LL line dirty. 441 * But this causes no latency, so return the hit. 442 */ 443 cachesim_ref_wb( &LL, Write, a, size); 444 return L1_Hit; 445 } 446 switch( cachesim_ref_wb( &LL, Write, a, size) ) { 447 case Hit: return LL_Hit; 448 case Miss: return MemAccess; 449 default: break; 450 } 451 return WriteBackMemAccess; 452 } 453 454 455 /*------------------------------------------------------------*/ 456 /*--- Hardware Prefetch Simulation ---*/ 457 /*------------------------------------------------------------*/ 458 459 static ULong prefetch_up = 0; 460 static ULong prefetch_down = 0; 461 462 #define PF_STREAMS 8 463 #define PF_PAGEBITS 12 464 465 static UInt pf_lastblock[PF_STREAMS]; 466 static Int pf_seqblocks[PF_STREAMS]; 467 468 static 469 void prefetch_clear(void) 470 { 471 int i; 472 for(i=0;i<PF_STREAMS;i++) 473 pf_lastblock[i] = pf_seqblocks[i] = 0; 474 } 475 476 /* 477 * HW Prefetch emulation 478 * Start prefetching when detecting sequential access to 3 memory blocks. 479 * One stream can be detected per 4k page. 480 */ 481 static __inline__ 482 void prefetch_LL_doref(Addr a) 483 { 484 UInt stream = (a >> PF_PAGEBITS) % PF_STREAMS; 485 UInt block = ( a >> LL.line_size_bits); 486 487 if (block != pf_lastblock[stream]) { 488 if (pf_seqblocks[stream] == 0) { 489 if (pf_lastblock[stream] +1 == block) pf_seqblocks[stream]++; 490 else if (pf_lastblock[stream] -1 == block) pf_seqblocks[stream]--; 491 } 492 else if (pf_seqblocks[stream] >0) { 493 if (pf_lastblock[stream] +1 == block) { 494 pf_seqblocks[stream]++; 495 if (pf_seqblocks[stream] >= 2) { 496 prefetch_up++; 497 cachesim_ref(&LL, a + 5 * LL.line_size,1); 498 } 499 } 500 else pf_seqblocks[stream] = 0; 501 } 502 else if (pf_seqblocks[stream] <0) { 503 if (pf_lastblock[stream] -1 == block) { 504 pf_seqblocks[stream]--; 505 if (pf_seqblocks[stream] <= -2) { 506 prefetch_down++; 507 cachesim_ref(&LL, a - 5 * LL.line_size,1); 508 } 509 } 510 else pf_seqblocks[stream] = 0; 511 } 512 pf_lastblock[stream] = block; 513 } 514 } 515 516 /* simple model with hardware prefetch */ 517 518 static 519 CacheModelResult prefetch_I1_ref(Addr a, UChar size) 520 { 521 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit; 522 prefetch_LL_doref(a); 523 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit; 524 return MemAccess; 525 } 526 527 static 528 CacheModelResult prefetch_D1_ref(Addr a, UChar size) 529 { 530 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit; 531 prefetch_LL_doref(a); 532 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit; 533 return MemAccess; 534 } 535 536 537 /* complex model with hardware prefetch */ 538 539 static 540 CacheModelResult prefetch_I1_Read(Addr a, UChar size) 541 { 542 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit; 543 prefetch_LL_doref(a); 544 switch( cachesim_ref_wb( &LL, Read, a, size) ) { 545 case Hit: return LL_Hit; 546 case Miss: return MemAccess; 547 default: break; 548 } 549 return WriteBackMemAccess; 550 } 551 552 static 553 CacheModelResult prefetch_D1_Read(Addr a, UChar size) 554 { 555 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit; 556 prefetch_LL_doref(a); 557 switch( cachesim_ref_wb( &LL, Read, a, size) ) { 558 case Hit: return LL_Hit; 559 case Miss: return MemAccess; 560 default: break; 561 } 562 return WriteBackMemAccess; 563 } 564 565 static 566 CacheModelResult prefetch_D1_Write(Addr a, UChar size) 567 { 568 prefetch_LL_doref(a); 569 if ( cachesim_ref( &D1, a, size) == Hit ) { 570 /* Even for a L1 hit, the write-trough L1 passes 571 * the write to the LL to make the LL line dirty. 572 * But this causes no latency, so return the hit. 573 */ 574 cachesim_ref_wb( &LL, Write, a, size); 575 return L1_Hit; 576 } 577 switch( cachesim_ref_wb( &LL, Write, a, size) ) { 578 case Hit: return LL_Hit; 579 case Miss: return MemAccess; 580 default: break; 581 } 582 return WriteBackMemAccess; 583 } 584 585 586 /*------------------------------------------------------------*/ 587 /*--- Cache Simulation with use metric collection ---*/ 588 /*------------------------------------------------------------*/ 589 590 /* can not be combined with write-back or prefetch */ 591 592 static 593 void cacheuse_initcache(cache_t2* c) 594 { 595 int i; 596 unsigned int start_mask, start_val; 597 unsigned int end_mask, end_val; 598 599 c->use = CLG_MALLOC("cl.sim.cu_ic.1", 600 sizeof(line_use) * c->sets * c->assoc); 601 c->loaded = CLG_MALLOC("cl.sim.cu_ic.2", 602 sizeof(line_loaded) * c->sets * c->assoc); 603 c->line_start_mask = CLG_MALLOC("cl.sim.cu_ic.3", 604 sizeof(int) * c->line_size); 605 c->line_end_mask = CLG_MALLOC("cl.sim.cu_ic.4", 606 sizeof(int) * c->line_size); 607 608 c->line_size_mask = c->line_size-1; 609 610 /* Meaning of line_start_mask/line_end_mask 611 * Example: for a given cache line, you get an access starting at 612 * byte offset 5, length 4, byte 5 - 8 was touched. For a cache 613 * line size of 32, you have 1 bit per byte in the mask: 614 * 615 * bit31 bit8 bit5 bit 0 616 * | | | | 617 * 11..111111100000 line_start_mask[5] 618 * 00..000111111111 line_end_mask[(5+4)-1] 619 * 620 * use_mask |= line_start_mask[5] && line_end_mask[8] 621 * 622 */ 623 start_val = end_val = ~0; 624 if (c->line_size < 32) { 625 int bits_per_byte = 32/c->line_size; 626 start_mask = (1<<bits_per_byte)-1; 627 end_mask = start_mask << (32-bits_per_byte); 628 for(i=0;i<c->line_size;i++) { 629 c->line_start_mask[i] = start_val; 630 start_val = start_val & ~start_mask; 631 start_mask = start_mask << bits_per_byte; 632 633 c->line_end_mask[c->line_size-i-1] = end_val; 634 end_val = end_val & ~end_mask; 635 end_mask = end_mask >> bits_per_byte; 636 } 637 } 638 else { 639 int bytes_per_bit = c->line_size/32; 640 start_mask = 1; 641 end_mask = 1 << 31; 642 for(i=0;i<c->line_size;i++) { 643 c->line_start_mask[i] = start_val; 644 c->line_end_mask[c->line_size-i-1] = end_val; 645 if ( ((i+1)%bytes_per_bit) == 0) { 646 start_val &= ~start_mask; 647 end_val &= ~end_mask; 648 start_mask <<= 1; 649 end_mask >>= 1; 650 } 651 } 652 } 653 654 CLG_DEBUG(6, "Config %s:\n", c->desc_line); 655 for(i=0;i<c->line_size;i++) { 656 CLG_DEBUG(6, " [%2d]: start mask %8x, end mask %8x\n", 657 i, c->line_start_mask[i], c->line_end_mask[i]); 658 } 659 660 /* We use lower tag bits as offset pointers to cache use info. 661 * I.e. some cache parameters don't work. 662 */ 663 if ( (1<<c->tag_shift) < c->assoc) { 664 VG_(message)(Vg_DebugMsg, 665 "error: Use associativity < %d for cache use statistics!\n", 666 (1<<c->tag_shift) ); 667 VG_(tool_panic)("Unsupported cache configuration"); 668 } 669 } 670 671 672 /* for I1/D1 caches */ 673 #define CACHEUSE(L) \ 674 \ 675 static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size) \ 676 { \ 677 UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \ 678 UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \ 679 UWord tag = a & L.tag_mask; \ 680 UWord tag2; \ 681 int i, j, idx; \ 682 UWord *set, tmp_tag; \ 683 UInt use_mask; \ 684 \ 685 CLG_DEBUG(6,"%s.Acc(Addr %#lx, size %d): Sets [%d/%d]\n", \ 686 L.name, a, size, set1, set2); \ 687 \ 688 /* First case: word entirely within line. */ \ 689 if (set1 == set2) { \ 690 \ 691 set = &(L.tags[set1 * L.assoc]); \ 692 use_mask = L.line_start_mask[a & L.line_size_mask] & \ 693 L.line_end_mask[(a+size-1) & L.line_size_mask]; \ 694 \ 695 /* This loop is unrolled for just the first case, which is the most */\ 696 /* common. We can't unroll any further because it would screw up */\ 697 /* if we have a direct-mapped (1-way) cache. */\ 698 if (tag == (set[0] & L.tag_mask)) { \ 699 idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask); \ 700 L.use[idx].count ++; \ 701 L.use[idx].mask |= use_mask; \ 702 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 703 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 704 use_mask, L.use[idx].mask, L.use[idx].count); \ 705 return L1_Hit; \ 706 } \ 707 /* If the tag is one other than the MRU, move it into the MRU spot */\ 708 /* and shuffle the rest down. */\ 709 for (i = 1; i < L.assoc; i++) { \ 710 if (tag == (set[i] & L.tag_mask)) { \ 711 tmp_tag = set[i]; \ 712 for (j = i; j > 0; j--) { \ 713 set[j] = set[j - 1]; \ 714 } \ 715 set[0] = tmp_tag; \ 716 idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask); \ 717 L.use[idx].count ++; \ 718 L.use[idx].mask |= use_mask; \ 719 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 720 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 721 use_mask, L.use[idx].mask, L.use[idx].count); \ 722 return L1_Hit; \ 723 } \ 724 } \ 725 \ 726 /* A miss; install this tag as MRU, shuffle rest down. */ \ 727 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \ 728 for (j = L.assoc - 1; j > 0; j--) { \ 729 set[j] = set[j - 1]; \ 730 } \ 731 set[0] = tag | tmp_tag; \ 732 idx = (set1 * L.assoc) + tmp_tag; \ 733 return update_##L##_use(&L, idx, \ 734 use_mask, a &~ L.line_size_mask); \ 735 \ 736 /* Second case: word straddles two lines. */ \ 737 /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \ 738 } else if (((set1 + 1) & (L.sets_min_1)) == set2) { \ 739 Int miss1=0, miss2=0; /* 0: L1 hit, 1:L1 miss, 2:LL miss */ \ 740 set = &(L.tags[set1 * L.assoc]); \ 741 use_mask = L.line_start_mask[a & L.line_size_mask]; \ 742 if (tag == (set[0] & L.tag_mask)) { \ 743 idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask); \ 744 L.use[idx].count ++; \ 745 L.use[idx].mask |= use_mask; \ 746 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 747 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 748 use_mask, L.use[idx].mask, L.use[idx].count); \ 749 goto block2; \ 750 } \ 751 for (i = 1; i < L.assoc; i++) { \ 752 if (tag == (set[i] & L.tag_mask)) { \ 753 tmp_tag = set[i]; \ 754 for (j = i; j > 0; j--) { \ 755 set[j] = set[j - 1]; \ 756 } \ 757 set[0] = tmp_tag; \ 758 idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask); \ 759 L.use[idx].count ++; \ 760 L.use[idx].mask |= use_mask; \ 761 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 762 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 763 use_mask, L.use[idx].mask, L.use[idx].count); \ 764 goto block2; \ 765 } \ 766 } \ 767 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \ 768 for (j = L.assoc - 1; j > 0; j--) { \ 769 set[j] = set[j - 1]; \ 770 } \ 771 set[0] = tag | tmp_tag; \ 772 idx = (set1 * L.assoc) + tmp_tag; \ 773 miss1 = update_##L##_use(&L, idx, \ 774 use_mask, a &~ L.line_size_mask); \ 775 block2: \ 776 set = &(L.tags[set2 * L.assoc]); \ 777 use_mask = L.line_end_mask[(a+size-1) & L.line_size_mask]; \ 778 tag2 = (a+size-1) & L.tag_mask; \ 779 if (tag2 == (set[0] & L.tag_mask)) { \ 780 idx = (set2 * L.assoc) + (set[0] & ~L.tag_mask); \ 781 L.use[idx].count ++; \ 782 L.use[idx].mask |= use_mask; \ 783 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 784 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 785 use_mask, L.use[idx].mask, L.use[idx].count); \ 786 return miss1; \ 787 } \ 788 for (i = 1; i < L.assoc; i++) { \ 789 if (tag2 == (set[i] & L.tag_mask)) { \ 790 tmp_tag = set[i]; \ 791 for (j = i; j > 0; j--) { \ 792 set[j] = set[j - 1]; \ 793 } \ 794 set[0] = tmp_tag; \ 795 idx = (set2 * L.assoc) + (tmp_tag & ~L.tag_mask); \ 796 L.use[idx].count ++; \ 797 L.use[idx].mask |= use_mask; \ 798 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 799 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 800 use_mask, L.use[idx].mask, L.use[idx].count); \ 801 return miss1; \ 802 } \ 803 } \ 804 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \ 805 for (j = L.assoc - 1; j > 0; j--) { \ 806 set[j] = set[j - 1]; \ 807 } \ 808 set[0] = tag2 | tmp_tag; \ 809 idx = (set2 * L.assoc) + tmp_tag; \ 810 miss2 = update_##L##_use(&L, idx, \ 811 use_mask, (a+size-1) &~ L.line_size_mask); \ 812 return (miss1==MemAccess || miss2==MemAccess) ? MemAccess:LL_Hit; \ 813 \ 814 } else { \ 815 VG_(printf)("addr: %#lx size: %u sets: %d %d", a, size, set1, set2); \ 816 VG_(tool_panic)("item straddles more than two cache sets"); \ 817 } \ 818 return 0; \ 819 } 820 821 822 /* logarithmic bitcounting algorithm, see 823 * http://graphics.stanford.edu/~seander/bithacks.html 824 */ 825 static __inline__ unsigned int countBits(unsigned int bits) 826 { 827 unsigned int c; // store the total here 828 const int S[] = {1, 2, 4, 8, 16}; // Magic Binary Numbers 829 const int B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF}; 830 831 c = bits; 832 c = ((c >> S[0]) & B[0]) + (c & B[0]); 833 c = ((c >> S[1]) & B[1]) + (c & B[1]); 834 c = ((c >> S[2]) & B[2]) + (c & B[2]); 835 c = ((c >> S[3]) & B[3]) + (c & B[3]); 836 c = ((c >> S[4]) & B[4]) + (c & B[4]); 837 return c; 838 } 839 840 static void update_LL_use(int idx, Addr memline) 841 { 842 line_loaded* loaded = &(LL.loaded[idx]); 843 line_use* use = &(LL.use[idx]); 844 int i = ((32 - countBits(use->mask)) * LL.line_size)>>5; 845 846 CLG_DEBUG(2, " LL.miss [%d]: at %#lx accessing memline %#lx\n", 847 idx, CLG_(bb_base) + current_ii->instr_offset, memline); 848 if (use->count>0) { 849 CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %#lx from %#lx]\n", 850 use->count, i, use->mask, loaded->memline, loaded->iaddr); 851 CLG_DEBUG(2, " collect: %d, use_base %p\n", 852 CLG_(current_state).collect, loaded->use_base); 853 854 if (CLG_(current_state).collect && loaded->use_base) { 855 (loaded->use_base)[off_LL_AcCost] += 1000 / use->count; 856 (loaded->use_base)[off_LL_SpLoss] += i; 857 } 858 } 859 860 use->count = 0; 861 use->mask = 0; 862 863 loaded->memline = memline; 864 loaded->iaddr = CLG_(bb_base) + current_ii->instr_offset; 865 loaded->use_base = (CLG_(current_state).nonskipped) ? 866 CLG_(current_state).nonskipped->skipped : 867 CLG_(cost_base) + current_ii->cost_offset; 868 } 869 870 static 871 CacheModelResult cacheuse_LL_access(Addr memline, line_loaded* l1_loaded) 872 { 873 UInt setNo = (memline >> LL.line_size_bits) & (LL.sets_min_1); 874 UWord* set = &(LL.tags[setNo * LL.assoc]); 875 UWord tag = memline & LL.tag_mask; 876 877 int i, j, idx; 878 UWord tmp_tag; 879 880 CLG_DEBUG(6,"LL.Acc(Memline %#lx): Set %d\n", memline, setNo); 881 882 if (tag == (set[0] & LL.tag_mask)) { 883 idx = (setNo * LL.assoc) + (set[0] & ~LL.tag_mask); 884 l1_loaded->dep_use = &(LL.use[idx]); 885 886 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): => %08x, count %d\n", 887 idx, LL.loaded[idx].memline, LL.loaded[idx].iaddr, 888 LL.use[idx].mask, LL.use[idx].count); 889 return LL_Hit; 890 } 891 for (i = 1; i < LL.assoc; i++) { 892 if (tag == (set[i] & LL.tag_mask)) { 893 tmp_tag = set[i]; 894 for (j = i; j > 0; j--) { 895 set[j] = set[j - 1]; 896 } 897 set[0] = tmp_tag; 898 idx = (setNo * LL.assoc) + (tmp_tag & ~LL.tag_mask); 899 l1_loaded->dep_use = &(LL.use[idx]); 900 901 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): => %08x, count %d\n", 902 i, idx, LL.loaded[idx].memline, LL.loaded[idx].iaddr, 903 LL.use[idx].mask, LL.use[idx].count); 904 return LL_Hit; 905 } 906 } 907 908 /* A miss; install this tag as MRU, shuffle rest down. */ 909 tmp_tag = set[LL.assoc - 1] & ~LL.tag_mask; 910 for (j = LL.assoc - 1; j > 0; j--) { 911 set[j] = set[j - 1]; 912 } 913 set[0] = tag | tmp_tag; 914 idx = (setNo * LL.assoc) + tmp_tag; 915 l1_loaded->dep_use = &(LL.use[idx]); 916 917 update_LL_use(idx, memline); 918 919 return MemAccess; 920 } 921 922 923 924 925 #define UPDATE_USE(L) \ 926 \ 927 static CacheModelResult update##_##L##_use(cache_t2* cache, int idx, \ 928 UInt mask, Addr memline) \ 929 { \ 930 line_loaded* loaded = &(cache->loaded[idx]); \ 931 line_use* use = &(cache->use[idx]); \ 932 int c = ((32 - countBits(use->mask)) * cache->line_size)>>5; \ 933 \ 934 CLG_DEBUG(2, " %s.miss [%d]: at %#lx accessing memline %#lx (mask %08x)\n", \ 935 cache->name, idx, CLG_(bb_base) + current_ii->instr_offset, memline, mask); \ 936 if (use->count>0) { \ 937 CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %#lx from %#lx]\n",\ 938 use->count, c, use->mask, loaded->memline, loaded->iaddr); \ 939 CLG_DEBUG(2, " collect: %d, use_base %p\n", \ 940 CLG_(current_state).collect, loaded->use_base); \ 941 \ 942 if (CLG_(current_state).collect && loaded->use_base) { \ 943 (loaded->use_base)[off_##L##_AcCost] += 1000 / use->count; \ 944 (loaded->use_base)[off_##L##_SpLoss] += c; \ 945 \ 946 /* FIXME (?): L1/LL line sizes must be equal ! */ \ 947 loaded->dep_use->mask |= use->mask; \ 948 loaded->dep_use->count += use->count; \ 949 } \ 950 } \ 951 \ 952 use->count = 1; \ 953 use->mask = mask; \ 954 loaded->memline = memline; \ 955 loaded->iaddr = CLG_(bb_base) + current_ii->instr_offset; \ 956 loaded->use_base = (CLG_(current_state).nonskipped) ? \ 957 CLG_(current_state).nonskipped->skipped : \ 958 CLG_(cost_base) + current_ii->cost_offset; \ 959 \ 960 if (memline == 0) return LL_Hit; \ 961 return cacheuse_LL_access(memline, loaded); \ 962 } 963 964 UPDATE_USE(I1); 965 UPDATE_USE(D1); 966 967 CACHEUSE(I1); 968 CACHEUSE(D1); 969 970 971 static 972 void cacheuse_finish(void) 973 { 974 int i; 975 InstrInfo ii = { 0,0,0,0 }; 976 977 if (!CLG_(current_state).collect) return; 978 979 CLG_(bb_base) = 0; 980 current_ii = ⅈ /* needs to be set for update_XX_use */ 981 CLG_(cost_base) = 0; 982 983 /* update usage counters */ 984 if (I1.use) 985 for (i = 0; i < I1.sets * I1.assoc; i++) 986 if (I1.loaded[i].use_base) 987 update_I1_use( &I1, i, 0,0); 988 989 if (D1.use) 990 for (i = 0; i < D1.sets * D1.assoc; i++) 991 if (D1.loaded[i].use_base) 992 update_D1_use( &D1, i, 0,0); 993 994 if (LL.use) 995 for (i = 0; i < LL.sets * LL.assoc; i++) 996 if (LL.loaded[i].use_base) 997 update_LL_use(i, 0); 998 999 current_ii = 0; 1000 } 1001 1002 1003 1004 /*------------------------------------------------------------*/ 1005 /*--- Helper functions called by instrumented code ---*/ 1006 /*------------------------------------------------------------*/ 1007 1008 1009 static __inline__ 1010 void inc_costs(CacheModelResult r, ULong* c1, ULong* c2) 1011 { 1012 switch(r) { 1013 case WriteBackMemAccess: 1014 if (clo_simulate_writeback) { 1015 c1[3]++; 1016 c2[3]++; 1017 } 1018 // fall through 1019 1020 case MemAccess: 1021 c1[2]++; 1022 c2[2]++; 1023 // fall through 1024 1025 case LL_Hit: 1026 c1[1]++; 1027 c2[1]++; 1028 // fall through 1029 1030 default: 1031 c1[0]++; 1032 c2[0]++; 1033 } 1034 } 1035 1036 static 1037 Char* cacheRes(CacheModelResult r) 1038 { 1039 switch(r) { 1040 case L1_Hit: return "L1 Hit "; 1041 case LL_Hit: return "LL Hit "; 1042 case MemAccess: return "LL Miss"; 1043 case WriteBackMemAccess: return "LL Miss (dirty)"; 1044 default: 1045 tl_assert(0); 1046 } 1047 return "??"; 1048 } 1049 1050 VG_REGPARM(1) 1051 static void log_1I0D(InstrInfo* ii) 1052 { 1053 CacheModelResult IrRes; 1054 1055 current_ii = ii; 1056 IrRes = (*simulator.I1_Read)(CLG_(bb_base) + ii->instr_offset, ii->instr_size); 1057 1058 CLG_DEBUG(6, "log_1I0D: Ir %#lx/%u => %s\n", 1059 CLG_(bb_base) + ii->instr_offset, ii->instr_size, cacheRes(IrRes)); 1060 1061 if (CLG_(current_state).collect) { 1062 ULong* cost_Ir; 1063 1064 if (CLG_(current_state).nonskipped) 1065 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR); 1066 else 1067 cost_Ir = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_IR]; 1068 1069 inc_costs(IrRes, cost_Ir, 1070 CLG_(current_state).cost + fullOffset(EG_IR) ); 1071 } 1072 } 1073 1074 VG_REGPARM(2) 1075 static void log_2I0D(InstrInfo* ii1, InstrInfo* ii2) 1076 { 1077 CacheModelResult Ir1Res, Ir2Res; 1078 ULong *global_cost_Ir; 1079 1080 current_ii = ii1; 1081 Ir1Res = (*simulator.I1_Read)(CLG_(bb_base) + ii1->instr_offset, ii1->instr_size); 1082 current_ii = ii2; 1083 Ir2Res = (*simulator.I1_Read)(CLG_(bb_base) + ii2->instr_offset, ii2->instr_size); 1084 1085 CLG_DEBUG(6, "log_2I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s\n", 1086 CLG_(bb_base) + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res), 1087 CLG_(bb_base) + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res) ); 1088 1089 if (!CLG_(current_state).collect) return; 1090 1091 global_cost_Ir = CLG_(current_state).cost + fullOffset(EG_IR); 1092 if (CLG_(current_state).nonskipped) { 1093 ULong* skipped_cost_Ir = 1094 CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR); 1095 1096 inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir); 1097 inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir); 1098 return; 1099 } 1100 1101 inc_costs(Ir1Res, global_cost_Ir, 1102 CLG_(cost_base) + ii1->cost_offset + ii1->eventset->offset[EG_IR]); 1103 inc_costs(Ir2Res, global_cost_Ir, 1104 CLG_(cost_base) + ii2->cost_offset + ii2->eventset->offset[EG_IR]); 1105 } 1106 1107 VG_REGPARM(3) 1108 static void log_3I0D(InstrInfo* ii1, InstrInfo* ii2, InstrInfo* ii3) 1109 { 1110 CacheModelResult Ir1Res, Ir2Res, Ir3Res; 1111 ULong *global_cost_Ir; 1112 1113 current_ii = ii1; 1114 Ir1Res = (*simulator.I1_Read)(CLG_(bb_base) + ii1->instr_offset, ii1->instr_size); 1115 current_ii = ii2; 1116 Ir2Res = (*simulator.I1_Read)(CLG_(bb_base) + ii2->instr_offset, ii2->instr_size); 1117 current_ii = ii3; 1118 Ir3Res = (*simulator.I1_Read)(CLG_(bb_base) + ii3->instr_offset, ii3->instr_size); 1119 1120 CLG_DEBUG(6, "log_3I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s, Ir3 %#lx/%u => %s\n", 1121 CLG_(bb_base) + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res), 1122 CLG_(bb_base) + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res), 1123 CLG_(bb_base) + ii3->instr_offset, ii3->instr_size, cacheRes(Ir3Res) ); 1124 1125 if (!CLG_(current_state).collect) return; 1126 1127 global_cost_Ir = CLG_(current_state).cost + fullOffset(EG_IR); 1128 if (CLG_(current_state).nonskipped) { 1129 ULong* skipped_cost_Ir = 1130 CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR); 1131 inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir); 1132 inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir); 1133 inc_costs(Ir3Res, global_cost_Ir, skipped_cost_Ir); 1134 return; 1135 } 1136 1137 inc_costs(Ir1Res, global_cost_Ir, 1138 CLG_(cost_base) + ii1->cost_offset + ii1->eventset->offset[EG_IR]); 1139 inc_costs(Ir2Res, global_cost_Ir, 1140 CLG_(cost_base) + ii2->cost_offset + ii2->eventset->offset[EG_IR]); 1141 inc_costs(Ir3Res, global_cost_Ir, 1142 CLG_(cost_base) + ii3->cost_offset + ii3->eventset->offset[EG_IR]); 1143 } 1144 1145 /* Instruction doing a read access */ 1146 1147 VG_REGPARM(3) 1148 static void log_1I1Dr(InstrInfo* ii, Addr data_addr, Word data_size) 1149 { 1150 CacheModelResult IrRes, DrRes; 1151 1152 current_ii = ii; 1153 IrRes = (*simulator.I1_Read)(CLG_(bb_base) + ii->instr_offset, ii->instr_size); 1154 DrRes = (*simulator.D1_Read)(data_addr, data_size); 1155 1156 CLG_DEBUG(6, "log_1I1Dr: Ir %#lx/%u => %s, Dr %#lx/%lu => %s\n", 1157 CLG_(bb_base) + ii->instr_offset, ii->instr_size, cacheRes(IrRes), 1158 data_addr, data_size, cacheRes(DrRes)); 1159 1160 if (CLG_(current_state).collect) { 1161 ULong *cost_Ir, *cost_Dr; 1162 1163 if (CLG_(current_state).nonskipped) { 1164 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR); 1165 cost_Dr = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DR); 1166 } 1167 else { 1168 cost_Ir = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_IR]; 1169 cost_Dr = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DR]; 1170 } 1171 1172 inc_costs(IrRes, cost_Ir, 1173 CLG_(current_state).cost + fullOffset(EG_IR) ); 1174 inc_costs(DrRes, cost_Dr, 1175 CLG_(current_state).cost + fullOffset(EG_DR) ); 1176 } 1177 } 1178 1179 1180 VG_REGPARM(3) 1181 static void log_0I1Dr(InstrInfo* ii, Addr data_addr, Word data_size) 1182 { 1183 CacheModelResult DrRes; 1184 1185 current_ii = ii; 1186 DrRes = (*simulator.D1_Read)(data_addr, data_size); 1187 1188 CLG_DEBUG(6, "log_0I1Dr: Dr %#lx/%lu => %s\n", 1189 data_addr, data_size, cacheRes(DrRes)); 1190 1191 if (CLG_(current_state).collect) { 1192 ULong *cost_Dr; 1193 1194 if (CLG_(current_state).nonskipped) 1195 cost_Dr = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DR); 1196 else 1197 cost_Dr = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DR]; 1198 1199 inc_costs(DrRes, cost_Dr, 1200 CLG_(current_state).cost + fullOffset(EG_DR) ); 1201 } 1202 } 1203 1204 1205 /* Instruction doing a write access */ 1206 1207 VG_REGPARM(3) 1208 static void log_1I1Dw(InstrInfo* ii, Addr data_addr, Word data_size) 1209 { 1210 CacheModelResult IrRes, DwRes; 1211 1212 current_ii = ii; 1213 IrRes = (*simulator.I1_Read)(CLG_(bb_base) + ii->instr_offset, ii->instr_size); 1214 DwRes = (*simulator.D1_Write)(data_addr, data_size); 1215 1216 CLG_DEBUG(6, "log_1I1Dw: Ir %#lx/%u => %s, Dw %#lx/%lu => %s\n", 1217 CLG_(bb_base) + ii->instr_offset, ii->instr_size, cacheRes(IrRes), 1218 data_addr, data_size, cacheRes(DwRes)); 1219 1220 if (CLG_(current_state).collect) { 1221 ULong *cost_Ir, *cost_Dw; 1222 1223 if (CLG_(current_state).nonskipped) { 1224 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR); 1225 cost_Dw = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DW); 1226 } 1227 else { 1228 cost_Ir = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_IR]; 1229 cost_Dw = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DW]; 1230 } 1231 1232 inc_costs(IrRes, cost_Ir, 1233 CLG_(current_state).cost + fullOffset(EG_IR) ); 1234 inc_costs(DwRes, cost_Dw, 1235 CLG_(current_state).cost + fullOffset(EG_DW) ); 1236 } 1237 } 1238 1239 VG_REGPARM(3) 1240 static void log_0I1Dw(InstrInfo* ii, Addr data_addr, Word data_size) 1241 { 1242 CacheModelResult DwRes; 1243 1244 current_ii = ii; 1245 DwRes = (*simulator.D1_Write)(data_addr, data_size); 1246 1247 CLG_DEBUG(6, "log_0I1Dw: Dw %#lx/%lu => %s\n", 1248 data_addr, data_size, cacheRes(DwRes)); 1249 1250 if (CLG_(current_state).collect) { 1251 ULong *cost_Dw; 1252 1253 if (CLG_(current_state).nonskipped) 1254 cost_Dw = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DW); 1255 else 1256 cost_Dw = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DW]; 1257 1258 inc_costs(DwRes, cost_Dw, 1259 CLG_(current_state).cost + fullOffset(EG_DW) ); 1260 } 1261 } 1262 1263 1264 1265 /*------------------------------------------------------------*/ 1266 /*--- Cache configuration ---*/ 1267 /*------------------------------------------------------------*/ 1268 1269 static cache_t clo_I1_cache = UNDEFINED_CACHE; 1270 static cache_t clo_D1_cache = UNDEFINED_CACHE; 1271 static cache_t clo_LL_cache = UNDEFINED_CACHE; 1272 1273 /* Initialize and clear simulator state */ 1274 static void cachesim_post_clo_init(void) 1275 { 1276 /* Cache configurations. */ 1277 cache_t I1c, D1c, LLc; 1278 1279 /* Initialize access handlers */ 1280 if (!CLG_(clo).simulate_cache) { 1281 CLG_(cachesim).log_1I0D = 0; 1282 CLG_(cachesim).log_1I0D_name = "(no function)"; 1283 CLG_(cachesim).log_2I0D = 0; 1284 CLG_(cachesim).log_2I0D_name = "(no function)"; 1285 CLG_(cachesim).log_3I0D = 0; 1286 CLG_(cachesim).log_3I0D_name = "(no function)"; 1287 1288 CLG_(cachesim).log_1I1Dr = 0; 1289 CLG_(cachesim).log_1I1Dr_name = "(no function)"; 1290 CLG_(cachesim).log_1I1Dw = 0; 1291 CLG_(cachesim).log_1I1Dw_name = "(no function)"; 1292 1293 CLG_(cachesim).log_0I1Dr = 0; 1294 CLG_(cachesim).log_0I1Dr_name = "(no function)"; 1295 CLG_(cachesim).log_0I1Dw = 0; 1296 CLG_(cachesim).log_0I1Dw_name = "(no function)"; 1297 return; 1298 } 1299 1300 /* Configuration of caches only needed with real cache simulation */ 1301 VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc, 1302 &clo_I1_cache, 1303 &clo_D1_cache, 1304 &clo_LL_cache); 1305 1306 I1.name = "I1"; 1307 D1.name = "D1"; 1308 LL.name = "LL"; 1309 1310 // min_line_size is used to make sure that we never feed 1311 // accesses to the simulator straddling more than two 1312 // cache lines at any cache level 1313 CLG_(min_line_size) = (I1c.line_size < D1c.line_size) 1314 ? I1c.line_size : D1c.line_size; 1315 CLG_(min_line_size) = (LLc.line_size < CLG_(min_line_size)) 1316 ? LLc.line_size : CLG_(min_line_size); 1317 1318 Int largest_load_or_store_size 1319 = VG_(machine_get_size_of_largest_guest_register)(); 1320 if (CLG_(min_line_size) < largest_load_or_store_size) { 1321 /* We can't continue, because the cache simulation might 1322 straddle more than 2 lines, and it will assert. So let's 1323 just stop before we start. */ 1324 VG_(umsg)("Callgrind: cannot continue: the minimum line size (%d)\n", 1325 (Int)CLG_(min_line_size)); 1326 VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n", 1327 largest_load_or_store_size ); 1328 VG_(umsg)(" but it is not. Exiting now.\n"); 1329 VG_(exit)(1); 1330 } 1331 1332 cachesim_initcache(I1c, &I1); 1333 cachesim_initcache(D1c, &D1); 1334 cachesim_initcache(LLc, &LL); 1335 1336 /* the other cache simulators use the standard helpers 1337 * with dispatching via simulator struct */ 1338 1339 CLG_(cachesim).log_1I0D = log_1I0D; 1340 CLG_(cachesim).log_1I0D_name = "log_1I0D"; 1341 CLG_(cachesim).log_2I0D = log_2I0D; 1342 CLG_(cachesim).log_2I0D_name = "log_2I0D"; 1343 CLG_(cachesim).log_3I0D = log_3I0D; 1344 CLG_(cachesim).log_3I0D_name = "log_3I0D"; 1345 1346 CLG_(cachesim).log_1I1Dr = log_1I1Dr; 1347 CLG_(cachesim).log_1I1Dw = log_1I1Dw; 1348 CLG_(cachesim).log_1I1Dr_name = "log_1I1Dr"; 1349 CLG_(cachesim).log_1I1Dw_name = "log_1I1Dw"; 1350 1351 CLG_(cachesim).log_0I1Dr = log_0I1Dr; 1352 CLG_(cachesim).log_0I1Dw = log_0I1Dw; 1353 CLG_(cachesim).log_0I1Dr_name = "log_0I1Dr"; 1354 CLG_(cachesim).log_0I1Dw_name = "log_0I1Dw"; 1355 1356 if (clo_collect_cacheuse) { 1357 1358 /* Output warning for not supported option combinations */ 1359 if (clo_simulate_hwpref) { 1360 VG_(message)(Vg_DebugMsg, 1361 "warning: prefetch simulation can not be " 1362 "used with cache usage\n"); 1363 clo_simulate_hwpref = False; 1364 } 1365 1366 if (clo_simulate_writeback) { 1367 VG_(message)(Vg_DebugMsg, 1368 "warning: write-back simulation can not be " 1369 "used with cache usage\n"); 1370 clo_simulate_writeback = False; 1371 } 1372 1373 simulator.I1_Read = cacheuse_I1_doRead; 1374 simulator.D1_Read = cacheuse_D1_doRead; 1375 simulator.D1_Write = cacheuse_D1_doRead; 1376 return; 1377 } 1378 1379 if (clo_simulate_hwpref) { 1380 prefetch_clear(); 1381 1382 if (clo_simulate_writeback) { 1383 simulator.I1_Read = prefetch_I1_Read; 1384 simulator.D1_Read = prefetch_D1_Read; 1385 simulator.D1_Write = prefetch_D1_Write; 1386 } 1387 else { 1388 simulator.I1_Read = prefetch_I1_ref; 1389 simulator.D1_Read = prefetch_D1_ref; 1390 simulator.D1_Write = prefetch_D1_ref; 1391 } 1392 1393 return; 1394 } 1395 1396 if (clo_simulate_writeback) { 1397 simulator.I1_Read = cachesim_I1_Read; 1398 simulator.D1_Read = cachesim_D1_Read; 1399 simulator.D1_Write = cachesim_D1_Write; 1400 } 1401 else { 1402 simulator.I1_Read = cachesim_I1_ref; 1403 simulator.D1_Read = cachesim_D1_ref; 1404 simulator.D1_Write = cachesim_D1_ref; 1405 } 1406 } 1407 1408 1409 /* Clear simulator state. Has to be initialized before */ 1410 static 1411 void cachesim_clear(void) 1412 { 1413 cachesim_clearcache(&I1); 1414 cachesim_clearcache(&D1); 1415 cachesim_clearcache(&LL); 1416 1417 prefetch_clear(); 1418 } 1419 1420 1421 static void cachesim_getdesc(Char* buf) 1422 { 1423 Int p; 1424 p = VG_(sprintf)(buf, "\ndesc: I1 cache: %s\n", I1.desc_line); 1425 p += VG_(sprintf)(buf+p, "desc: D1 cache: %s\n", D1.desc_line); 1426 VG_(sprintf)(buf+p, "desc: LL cache: %s\n", LL.desc_line); 1427 } 1428 1429 static 1430 void cachesim_print_opts(void) 1431 { 1432 VG_(printf)( 1433 "\n cache simulator options (does cache simulation if used):\n" 1434 " --simulate-wb=no|yes Count write-back events [no]\n" 1435 " --simulate-hwpref=no|yes Simulate hardware prefetch [no]\n" 1436 #if CLG_EXPERIMENTAL 1437 " --simulate-sectors=no|yes Simulate sectored behaviour [no]\n" 1438 #endif 1439 " --cacheuse=no|yes Collect cache block use [no]\n"); 1440 VG_(print_cache_clo_opts)(); 1441 } 1442 1443 /* Check for command line option for cache configuration. 1444 * Return False if unknown and not handled. 1445 * 1446 * Called from CLG_(process_cmd_line_option)() in clo.c 1447 */ 1448 static Bool cachesim_parse_opt(Char* arg) 1449 { 1450 if VG_BOOL_CLO(arg, "--simulate-wb", clo_simulate_writeback) {} 1451 else if VG_BOOL_CLO(arg, "--simulate-hwpref", clo_simulate_hwpref) {} 1452 else if VG_BOOL_CLO(arg, "--simulate-sectors", clo_simulate_sectors) {} 1453 1454 else if VG_BOOL_CLO(arg, "--cacheuse", clo_collect_cacheuse) { 1455 if (clo_collect_cacheuse) { 1456 /* Use counters only make sense with fine dumping */ 1457 CLG_(clo).dump_instr = True; 1458 } 1459 } 1460 1461 else if (VG_(str_clo_cache_opt)(arg, 1462 &clo_I1_cache, 1463 &clo_D1_cache, 1464 &clo_LL_cache)) {} 1465 1466 else 1467 return False; 1468 1469 return True; 1470 } 1471 1472 /* Adds commas to ULong, right justifying in a field field_width wide, returns 1473 * the string in buf. */ 1474 static 1475 Int commify(ULong n, int field_width, char* buf) 1476 { 1477 int len, n_commas, i, j, new_len, space; 1478 1479 VG_(sprintf)(buf, "%llu", n); 1480 len = VG_(strlen)(buf); 1481 n_commas = (len - 1) / 3; 1482 new_len = len + n_commas; 1483 space = field_width - new_len; 1484 1485 /* Allow for printing a number in a field_width smaller than it's size */ 1486 if (space < 0) space = 0; 1487 1488 /* Make j = -1 because we copy the '\0' before doing the numbers in groups 1489 * of three. */ 1490 for (j = -1, i = len ; i >= 0; i--) { 1491 buf[i + n_commas + space] = buf[i]; 1492 1493 if ((i>0) && (3 == ++j)) { 1494 j = 0; 1495 n_commas--; 1496 buf[i + n_commas + space] = ','; 1497 } 1498 } 1499 /* Right justify in field. */ 1500 for (i = 0; i < space; i++) buf[i] = ' '; 1501 return new_len; 1502 } 1503 1504 static 1505 void percentify(Int n, Int ex, Int field_width, char buf[]) 1506 { 1507 int i, len, space; 1508 1509 VG_(sprintf)(buf, "%d.%d%%", n / ex, n % ex); 1510 len = VG_(strlen)(buf); 1511 space = field_width - len; 1512 if (space < 0) space = 0; /* Allow for v. small field_width */ 1513 i = len; 1514 1515 /* Right justify in field */ 1516 for ( ; i >= 0; i--) buf[i + space] = buf[i]; 1517 for (i = 0; i < space; i++) buf[i] = ' '; 1518 } 1519 1520 static 1521 void cachesim_printstat(Int l1, Int l2, Int l3) 1522 { 1523 FullCost total = CLG_(total_cost), D_total = 0; 1524 ULong LL_total_m, LL_total_mr, LL_total_mw, 1525 LL_total, LL_total_r, LL_total_w; 1526 char buf1[RESULTS_BUF_LEN], 1527 buf2[RESULTS_BUF_LEN], 1528 buf3[RESULTS_BUF_LEN]; 1529 Int p; 1530 1531 if ((VG_(clo_verbosity) >1) && clo_simulate_hwpref) { 1532 VG_(message)(Vg_DebugMsg, "Prefetch Up: %llu\n", 1533 prefetch_up); 1534 VG_(message)(Vg_DebugMsg, "Prefetch Down: %llu\n", 1535 prefetch_down); 1536 VG_(message)(Vg_DebugMsg, "\n"); 1537 } 1538 1539 commify(total[fullOffset(EG_IR) +1], l1, buf1); 1540 VG_(message)(Vg_UserMsg, "I1 misses: %s\n", buf1); 1541 1542 commify(total[fullOffset(EG_IR) +2], l1, buf1); 1543 VG_(message)(Vg_UserMsg, "LLi misses: %s\n", buf1); 1544 1545 p = 100; 1546 1547 if (0 == total[fullOffset(EG_IR)]) 1548 total[fullOffset(EG_IR)] = 1; 1549 1550 percentify(total[fullOffset(EG_IR)+1] * 100 * p / 1551 total[fullOffset(EG_IR)], p, l1+1, buf1); 1552 VG_(message)(Vg_UserMsg, "I1 miss rate: %s\n", buf1); 1553 1554 percentify(total[fullOffset(EG_IR)+2] * 100 * p / 1555 total[fullOffset(EG_IR)], p, l1+1, buf1); 1556 VG_(message)(Vg_UserMsg, "LLi miss rate: %s\n", buf1); 1557 VG_(message)(Vg_UserMsg, "\n"); 1558 1559 /* D cache results. 1560 Use the D_refs.rd and D_refs.wr values to determine the 1561 * width of columns 2 & 3. */ 1562 1563 D_total = CLG_(get_eventset_cost)( CLG_(sets).full ); 1564 CLG_(init_cost)( CLG_(sets).full, D_total); 1565 // we only use the first 3 values of D_total, adding up Dr and Dw costs 1566 CLG_(copy_cost)( CLG_(get_event_set)(EG_DR), D_total, total + fullOffset(EG_DR) ); 1567 CLG_(add_cost) ( CLG_(get_event_set)(EG_DW), D_total, total + fullOffset(EG_DW) ); 1568 1569 commify( D_total[0], l1, buf1); 1570 commify(total[fullOffset(EG_DR)], l2, buf2); 1571 commify(total[fullOffset(EG_DW)], l3, buf3); 1572 VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)\n", 1573 buf1, buf2, buf3); 1574 1575 commify( D_total[1], l1, buf1); 1576 commify(total[fullOffset(EG_DR)+1], l2, buf2); 1577 commify(total[fullOffset(EG_DW)+1], l3, buf3); 1578 VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)\n", 1579 buf1, buf2, buf3); 1580 1581 commify( D_total[2], l1, buf1); 1582 commify(total[fullOffset(EG_DR)+2], l2, buf2); 1583 commify(total[fullOffset(EG_DW)+2], l3, buf3); 1584 VG_(message)(Vg_UserMsg, "LLd misses: %s (%s rd + %s wr)\n", 1585 buf1, buf2, buf3); 1586 1587 p = 10; 1588 1589 if (0 == D_total[0]) D_total[0] = 1; 1590 if (0 == total[fullOffset(EG_DR)]) total[fullOffset(EG_DR)] = 1; 1591 if (0 == total[fullOffset(EG_DW)]) total[fullOffset(EG_DW)] = 1; 1592 1593 percentify( D_total[1] * 100 * p / D_total[0], p, l1+1, buf1); 1594 percentify(total[fullOffset(EG_DR)+1] * 100 * p / 1595 total[fullOffset(EG_DR)], p, l2+1, buf2); 1596 percentify(total[fullOffset(EG_DW)+1] * 100 * p / 1597 total[fullOffset(EG_DW)], p, l3+1, buf3); 1598 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )\n", 1599 buf1, buf2,buf3); 1600 1601 percentify( D_total[2] * 100 * p / D_total[0], p, l1+1, buf1); 1602 percentify(total[fullOffset(EG_DR)+2] * 100 * p / 1603 total[fullOffset(EG_DR)], p, l2+1, buf2); 1604 percentify(total[fullOffset(EG_DW)+2] * 100 * p / 1605 total[fullOffset(EG_DW)], p, l3+1, buf3); 1606 VG_(message)(Vg_UserMsg, "LLd miss rate: %s (%s + %s )\n", 1607 buf1, buf2,buf3); 1608 VG_(message)(Vg_UserMsg, "\n"); 1609 1610 1611 1612 /* LL overall results */ 1613 1614 LL_total = 1615 total[fullOffset(EG_DR) +1] + 1616 total[fullOffset(EG_DW) +1] + 1617 total[fullOffset(EG_IR) +1]; 1618 LL_total_r = 1619 total[fullOffset(EG_DR) +1] + 1620 total[fullOffset(EG_IR) +1]; 1621 LL_total_w = total[fullOffset(EG_DW) +1]; 1622 commify(LL_total, l1, buf1); 1623 commify(LL_total_r, l2, buf2); 1624 commify(LL_total_w, l3, buf3); 1625 VG_(message)(Vg_UserMsg, "LL refs: %s (%s rd + %s wr)\n", 1626 buf1, buf2, buf3); 1627 1628 LL_total_m = 1629 total[fullOffset(EG_DR) +2] + 1630 total[fullOffset(EG_DW) +2] + 1631 total[fullOffset(EG_IR) +2]; 1632 LL_total_mr = 1633 total[fullOffset(EG_DR) +2] + 1634 total[fullOffset(EG_IR) +2]; 1635 LL_total_mw = total[fullOffset(EG_DW) +2]; 1636 commify(LL_total_m, l1, buf1); 1637 commify(LL_total_mr, l2, buf2); 1638 commify(LL_total_mw, l3, buf3); 1639 VG_(message)(Vg_UserMsg, "LL misses: %s (%s rd + %s wr)\n", 1640 buf1, buf2, buf3); 1641 1642 percentify(LL_total_m * 100 * p / 1643 (total[fullOffset(EG_IR)] + D_total[0]), p, l1+1, buf1); 1644 percentify(LL_total_mr * 100 * p / 1645 (total[fullOffset(EG_IR)] + total[fullOffset(EG_DR)]), 1646 p, l2+1, buf2); 1647 percentify(LL_total_mw * 100 * p / 1648 total[fullOffset(EG_DW)], p, l3+1, buf3); 1649 VG_(message)(Vg_UserMsg, "LL miss rate: %s (%s + %s )\n", 1650 buf1, buf2,buf3); 1651 } 1652 1653 1654 /*------------------------------------------------------------*/ 1655 /*--- Setup for Event set. ---*/ 1656 /*------------------------------------------------------------*/ 1657 1658 struct event_sets CLG_(sets); 1659 1660 void CLG_(init_eventsets)() 1661 { 1662 // Event groups from which the event sets are composed 1663 // the "Use" group only is used with "cacheuse" simulation 1664 if (clo_collect_cacheuse) 1665 CLG_(register_event_group4)(EG_USE, 1666 "AcCost1", "SpLoss1", "AcCost2", "SpLoss2"); 1667 1668 if (!CLG_(clo).simulate_cache) 1669 CLG_(register_event_group)(EG_IR, "Ir"); 1670 else if (!clo_simulate_writeback) { 1671 CLG_(register_event_group3)(EG_IR, "Ir", "I1mr", "ILmr"); 1672 CLG_(register_event_group3)(EG_DR, "Dr", "D1mr", "DLmr"); 1673 CLG_(register_event_group3)(EG_DW, "Dw", "D1mw", "DLmw"); 1674 } 1675 else { // clo_simulate_writeback 1676 CLG_(register_event_group4)(EG_IR, "Ir", "I1mr", "ILmr", "ILdmr"); 1677 CLG_(register_event_group4)(EG_DR, "Dr", "D1mr", "DLmr", "DLdmr"); 1678 CLG_(register_event_group4)(EG_DW, "Dw", "D1mw", "DLmw", "DLdmw"); 1679 } 1680 1681 if (CLG_(clo).simulate_branch) { 1682 CLG_(register_event_group2)(EG_BC, "Bc", "Bcm"); 1683 CLG_(register_event_group2)(EG_BI, "Bi", "Bim"); 1684 } 1685 1686 if (CLG_(clo).collect_bus) 1687 CLG_(register_event_group)(EG_BUS, "Ge"); 1688 1689 if (CLG_(clo).collect_alloc) 1690 CLG_(register_event_group2)(EG_ALLOC, "allocCount", "allocSize"); 1691 1692 if (CLG_(clo).collect_systime) 1693 CLG_(register_event_group2)(EG_SYS, "sysCount", "sysTime"); 1694 1695 // event set used as base for instruction self cost 1696 CLG_(sets).base = CLG_(get_event_set2)(EG_USE, EG_IR); 1697 1698 // event set comprising all event groups, used for inclusive cost 1699 CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).base, EG_DR, EG_DW); 1700 CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_BC, EG_BI); 1701 CLG_(sets).full = CLG_(add_event_group) (CLG_(sets).full, EG_BUS); 1702 CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_ALLOC, EG_SYS); 1703 1704 CLG_DEBUGIF(1) { 1705 CLG_DEBUG(1, "EventSets:\n"); 1706 CLG_(print_eventset)(-2, CLG_(sets).base); 1707 CLG_(print_eventset)(-2, CLG_(sets).full); 1708 } 1709 1710 /* Not-existing events are silently ignored */ 1711 CLG_(dumpmap) = CLG_(get_eventmapping)(CLG_(sets).full); 1712 CLG_(append_event)(CLG_(dumpmap), "Ir"); 1713 CLG_(append_event)(CLG_(dumpmap), "Dr"); 1714 CLG_(append_event)(CLG_(dumpmap), "Dw"); 1715 CLG_(append_event)(CLG_(dumpmap), "I1mr"); 1716 CLG_(append_event)(CLG_(dumpmap), "D1mr"); 1717 CLG_(append_event)(CLG_(dumpmap), "D1mw"); 1718 CLG_(append_event)(CLG_(dumpmap), "ILmr"); 1719 CLG_(append_event)(CLG_(dumpmap), "DLmr"); 1720 CLG_(append_event)(CLG_(dumpmap), "DLmw"); 1721 CLG_(append_event)(CLG_(dumpmap), "ILdmr"); 1722 CLG_(append_event)(CLG_(dumpmap), "DLdmr"); 1723 CLG_(append_event)(CLG_(dumpmap), "DLdmw"); 1724 CLG_(append_event)(CLG_(dumpmap), "Bc"); 1725 CLG_(append_event)(CLG_(dumpmap), "Bcm"); 1726 CLG_(append_event)(CLG_(dumpmap), "Bi"); 1727 CLG_(append_event)(CLG_(dumpmap), "Bim"); 1728 CLG_(append_event)(CLG_(dumpmap), "AcCost1"); 1729 CLG_(append_event)(CLG_(dumpmap), "SpLoss1"); 1730 CLG_(append_event)(CLG_(dumpmap), "AcCost2"); 1731 CLG_(append_event)(CLG_(dumpmap), "SpLoss2"); 1732 CLG_(append_event)(CLG_(dumpmap), "Ge"); 1733 CLG_(append_event)(CLG_(dumpmap), "allocCount"); 1734 CLG_(append_event)(CLG_(dumpmap), "allocSize"); 1735 CLG_(append_event)(CLG_(dumpmap), "sysCount"); 1736 CLG_(append_event)(CLG_(dumpmap), "sysTime"); 1737 } 1738 1739 1740 /* this is called at dump time for every instruction executed */ 1741 static void cachesim_add_icost(SimCost cost, BBCC* bbcc, 1742 InstrInfo* ii, ULong exe_count) 1743 { 1744 if (!CLG_(clo).simulate_cache) 1745 cost[ fullOffset(EG_IR) ] += exe_count; 1746 1747 if (ii->eventset) 1748 CLG_(add_and_zero_cost2)( CLG_(sets).full, cost, 1749 ii->eventset, bbcc->cost + ii->cost_offset); 1750 } 1751 1752 static 1753 void cachesim_finish(void) 1754 { 1755 if (clo_collect_cacheuse) 1756 cacheuse_finish(); 1757 } 1758 1759 /*------------------------------------------------------------*/ 1760 /*--- The simulator defined in this file ---*/ 1761 /*------------------------------------------------------------*/ 1762 1763 struct cachesim_if CLG_(cachesim) = { 1764 .print_opts = cachesim_print_opts, 1765 .parse_opt = cachesim_parse_opt, 1766 .post_clo_init = cachesim_post_clo_init, 1767 .clear = cachesim_clear, 1768 .getdesc = cachesim_getdesc, 1769 .printstat = cachesim_printstat, 1770 .add_icost = cachesim_add_icost, 1771 .finish = cachesim_finish, 1772 1773 /* these will be set by cachesim_post_clo_init */ 1774 .log_1I0D = 0, 1775 .log_2I0D = 0, 1776 .log_3I0D = 0, 1777 1778 .log_1I1Dr = 0, 1779 .log_1I1Dw = 0, 1780 1781 .log_0I1Dr = 0, 1782 .log_0I1Dw = 0, 1783 1784 .log_1I0D_name = "(no function)", 1785 .log_2I0D_name = "(no function)", 1786 .log_3I0D_name = "(no function)", 1787 1788 .log_1I1Dr_name = "(no function)", 1789 .log_1I1Dw_name = "(no function)", 1790 1791 .log_0I1Dr_name = "(no function)", 1792 .log_0I1Dw_name = "(no function)", 1793 }; 1794 1795 1796 /*--------------------------------------------------------------------*/ 1797 /*--- end ct_sim.c ---*/ 1798 /*--------------------------------------------------------------------*/ 1799 1800