Home | History | Annotate | Download | only in cachegrind
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Cache simulation                                    cg_sim.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Cachegrind, a Valgrind tool for cache
      8    profiling programs.
      9 
     10    Copyright (C) 2002-2012 Nicholas Nethercote
     11       njn (at) valgrind.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 /* Notes:
     32   - simulates a write-allocate cache
     33   - (block --> set) hash function uses simple bit selection
     34   - handling of references straddling two cache blocks:
     35       - counts as only one cache access (not two)
     36       - both blocks hit                  --> one hit
     37       - one block hits, the other misses --> one miss
     38       - both blocks miss                 --> one miss (not two)
     39 */
     40 
     41 typedef struct {
     42    Int          size;                   /* bytes */
     43    Int          assoc;
     44    Int          line_size;              /* bytes */
     45    Int          sets;
     46    Int          sets_min_1;
     47    Int          line_size_bits;
     48    Int          tag_shift;
     49    Char         desc_line[128];
     50    UWord*       tags;
     51 } cache_t2;
     52 
     53 /* By this point, the size/assoc/line_size has been checked. */
     54 static void cachesim_initcache(cache_t config, cache_t2* c)
     55 {
     56    Int i;
     57 
     58    c->size      = config.size;
     59    c->assoc     = config.assoc;
     60    c->line_size = config.line_size;
     61 
     62    c->sets           = (c->size / c->line_size) / c->assoc;
     63    c->sets_min_1     = c->sets - 1;
     64    c->line_size_bits = VG_(log2)(c->line_size);
     65    c->tag_shift      = c->line_size_bits + VG_(log2)(c->sets);
     66 
     67    if (c->assoc == 1) {
     68       VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",
     69                                  c->size, c->line_size);
     70    } else {
     71       VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
     72                                  c->size, c->line_size, c->assoc);
     73    }
     74 
     75    c->tags = VG_(malloc)("cg.sim.ci.1",
     76                          sizeof(UWord) * c->sets * c->assoc);
     77 
     78    for (i = 0; i < c->sets * c->assoc; i++)
     79       c->tags[i] = 0;
     80 }
     81 
     82 /* This is done as a macro rather than by passing in the cache_t2 as an
     83  * arg because it slows things down by a small amount (3-5%) due to all
     84  * that extra indirection. */
     85 
     86 #define CACHESIM(L, MISS_TREATMENT)                                         \
     87 /* The cache and associated bits and pieces. */                             \
     88 static cache_t2 L;                                                          \
     89                                                                             \
     90 static void cachesim_##L##_initcache(cache_t config)                        \
     91 {                                                                           \
     92     cachesim_initcache(config, &L);                                         \
     93 }                                                                           \
     94                                                                             \
     95 /* This attribute forces GCC to inline this function, even though it's */   \
     96 /* bigger than its usual limit.  Inlining gains around 5--10% speedup. */   \
     97 __attribute__((always_inline))                                              \
     98 static __inline__                                                           \
     99 void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *mL)         \
    100 {                                                                           \
    101    UInt  set1 = ( a         >> L.line_size_bits) & (L.sets_min_1);          \
    102    UInt  set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1);          \
    103    UWord tag  = a >> L.tag_shift;                                           \
    104    UWord tag2;                                                              \
    105    Int i, j;                                                                \
    106    Bool is_miss = False;                                                    \
    107    UWord* set;                                                              \
    108                                                                             \
    109    /* First case: word entirely within line. */                             \
    110    if (set1 == set2) {                                                      \
    111                                                                             \
    112       set = &(L.tags[set1 * L.assoc]);                                      \
    113                                                                             \
    114       /* This loop is unrolled for just the first case, which is the most */\
    115       /* common.  We can't unroll any further because it would screw up   */\
    116       /* if we have a direct-mapped (1-way) cache.                        */\
    117       if (tag == set[0]) {                                                  \
    118          return;                                                            \
    119       }                                                                     \
    120       /* If the tag is one other than the MRU, move it into the MRU spot  */\
    121       /* and shuffle the rest down.                                       */\
    122       for (i = 1; i < L.assoc; i++) {                                       \
    123          if (tag == set[i]) {                                               \
    124             for (j = i; j > 0; j--) {                                       \
    125                set[j] = set[j - 1];                                         \
    126             }                                                               \
    127             set[0] = tag;                                                   \
    128             return;                                                         \
    129          }                                                                  \
    130       }                                                                     \
    131                                                                             \
    132       /* A miss;  install this tag as MRU, shuffle rest down. */            \
    133       for (j = L.assoc - 1; j > 0; j--) {                                   \
    134          set[j] = set[j - 1];                                               \
    135       }                                                                     \
    136       set[0] = tag;                                                         \
    137       MISS_TREATMENT;                                                       \
    138       return;                                                               \
    139                                                                             \
    140    /* Second case: word straddles two lines. */                             \
    141    /* Nb: this is a fast way of doing ((set1+1) % L.sets) */                \
    142    } else if (((set1 + 1) & (L.sets_min_1)) == set2) {                      \
    143       set = &(L.tags[set1 * L.assoc]);                                      \
    144       if (tag == set[0]) {                                                  \
    145          goto block2;                                                       \
    146       }                                                                     \
    147       for (i = 1; i < L.assoc; i++) {                                       \
    148          if (tag == set[i]) {                                               \
    149             for (j = i; j > 0; j--) {                                       \
    150                set[j] = set[j - 1];                                         \
    151             }                                                               \
    152             set[0] = tag;                                                   \
    153             goto block2;                                                    \
    154          }                                                                  \
    155       }                                                                     \
    156       for (j = L.assoc - 1; j > 0; j--) {                                   \
    157          set[j] = set[j - 1];                                               \
    158       }                                                                     \
    159       set[0] = tag;                                                         \
    160       is_miss = True;                                                       \
    161 block2:                                                                     \
    162       set = &(L.tags[set2 * L.assoc]);                                      \
    163       tag2 = (a+size-1) >> L.tag_shift;                                     \
    164       if (tag2 == set[0]) {                                                 \
    165          goto miss_treatment;                                               \
    166       }                                                                     \
    167       for (i = 1; i < L.assoc; i++) {                                       \
    168          if (tag2 == set[i]) {                                              \
    169             for (j = i; j > 0; j--) {                                       \
    170                set[j] = set[j - 1];                                         \
    171             }                                                               \
    172             set[0] = tag2;                                                  \
    173             goto miss_treatment;                                            \
    174          }                                                                  \
    175       }                                                                     \
    176       for (j = L.assoc - 1; j > 0; j--) {                                   \
    177          set[j] = set[j - 1];                                               \
    178       }                                                                     \
    179       set[0] = tag2;                                                        \
    180       is_miss = True;                                                       \
    181 miss_treatment:                                                             \
    182       if (is_miss) { MISS_TREATMENT; }                                      \
    183                                                                             \
    184    } else {                                                                 \
    185        VG_(printf)("addr: %lx  size: %u  sets: %d %d", a, size, set1, set2);\
    186        VG_(tool_panic)("item straddles more than two cache sets");          \
    187    }                                                                        \
    188    return;                                                                  \
    189 }
    190 
    191 CACHESIM(LL, (*mL)++ );
    192 CACHESIM(I1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } );
    193 CACHESIM(D1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } );
    194 
    195 /*--------------------------------------------------------------------*/
    196 /*--- end                                                 cg_sim.c ---*/
    197 /*--------------------------------------------------------------------*/
    198 
    199