Home | History | Annotate | Download | only in cachegrind
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Cache simulation                                    cg_sim.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Cachegrind, a Valgrind tool for cache
      8    profiling programs.
      9 
     10    Copyright (C) 2002-2017 Nicholas Nethercote
     11       njn (at) valgrind.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 /* Notes:
     32   - simulates a write-allocate cache
     33   - (block --> set) hash function uses simple bit selection
     34   - handling of references straddling two cache blocks:
     35       - counts as only one cache access (not two)
     36       - both blocks hit                  --> one hit
     37       - one block hits, the other misses --> one miss
     38       - both blocks miss                 --> one miss (not two)
     39 */
     40 
     41 typedef struct {
     42    Int          size;                   /* bytes */
     43    Int          assoc;
     44    Int          line_size;              /* bytes */
     45    Int          sets;
     46    Int          sets_min_1;
     47    Int          line_size_bits;
     48    Int          tag_shift;
     49    HChar        desc_line[128];         /* large enough */
     50    UWord*       tags;
     51 } cache_t2;
     52 
     53 /* By this point, the size/assoc/line_size has been checked. */
     54 static void cachesim_initcache(cache_t config, cache_t2* c)
     55 {
     56    Int i;
     57 
     58    c->size      = config.size;
     59    c->assoc     = config.assoc;
     60    c->line_size = config.line_size;
     61 
     62    c->sets           = (c->size / c->line_size) / c->assoc;
     63    c->sets_min_1     = c->sets - 1;
     64    c->line_size_bits = VG_(log2)(c->line_size);
     65    c->tag_shift      = c->line_size_bits + VG_(log2)(c->sets);
     66 
     67    if (c->assoc == 1) {
     68       VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",
     69                                  c->size, c->line_size);
     70    } else {
     71       VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
     72                                  c->size, c->line_size, c->assoc);
     73    }
     74 
     75    c->tags = VG_(malloc)("cg.sim.ci.1",
     76                          sizeof(UWord) * c->sets * c->assoc);
     77 
     78    for (i = 0; i < c->sets * c->assoc; i++)
     79       c->tags[i] = 0;
     80 }
     81 
     82 /* This attribute forces GCC to inline the function, getting rid of a
     83  * lot of indirection around the cache_t2 pointer, if it is known to be
     84  * constant in the caller (the caller is inlined itself).
     85  * Without inlining of simulator functions, cachegrind can get 40% slower.
     86  */
     87 __attribute__((always_inline))
     88 static __inline__
     89 Bool cachesim_setref_is_miss(cache_t2* c, UInt set_no, UWord tag)
     90 {
     91    int i, j;
     92    UWord *set;
     93 
     94    set = &(c->tags[set_no * c->assoc]);
     95 
     96    /* This loop is unrolled for just the first case, which is the most */
     97    /* common.  We can't unroll any further because it would screw up   */
     98    /* if we have a direct-mapped (1-way) cache.                        */
     99    if (tag == set[0])
    100       return False;
    101 
    102    /* If the tag is one other than the MRU, move it into the MRU spot  */
    103    /* and shuffle the rest down.                                       */
    104    for (i = 1; i < c->assoc; i++) {
    105       if (tag == set[i]) {
    106          for (j = i; j > 0; j--) {
    107             set[j] = set[j - 1];
    108          }
    109          set[0] = tag;
    110 
    111          return False;
    112       }
    113    }
    114 
    115    /* A miss;  install this tag as MRU, shuffle rest down. */
    116    for (j = c->assoc - 1; j > 0; j--) {
    117       set[j] = set[j - 1];
    118    }
    119    set[0] = tag;
    120 
    121    return True;
    122 }
    123 
    124 __attribute__((always_inline))
    125 static __inline__
    126 Bool cachesim_ref_is_miss(cache_t2* c, Addr a, UChar size)
    127 {
    128    /* A memory block has the size of a cache line */
    129    UWord block1 =  a         >> c->line_size_bits;
    130    UWord block2 = (a+size-1) >> c->line_size_bits;
    131    UInt  set1   = block1 & c->sets_min_1;
    132 
    133    /* Tags used in real caches are minimal to save space.
    134     * As the last bits of the block number of addresses mapping
    135     * into one cache set are the same, real caches use as tag
    136     *   tag = block >> log2(#sets)
    137     * But using the memory block as more specific tag is fine,
    138     * and saves instructions.
    139     */
    140    UWord tag1   = block1;
    141 
    142    /* Access entirely within line. */
    143    if (block1 == block2)
    144       return cachesim_setref_is_miss(c, set1, tag1);
    145 
    146    /* Access straddles two lines. */
    147    else if (block1 + 1 == block2) {
    148       UInt  set2 = block2 & c->sets_min_1;
    149       UWord tag2 = block2;
    150 
    151       /* always do both, as state is updated as side effect */
    152       if (cachesim_setref_is_miss(c, set1, tag1)) {
    153          cachesim_setref_is_miss(c, set2, tag2);
    154          return True;
    155       }
    156       return cachesim_setref_is_miss(c, set2, tag2);
    157    }
    158    VG_(printf)("addr: %lx  size: %u  blocks: %lu %lu",
    159                a, size, block1, block2);
    160    VG_(tool_panic)("item straddles more than two cache sets");
    161    /* not reached */
    162    return True;
    163 }
    164 
    165 
    166 static cache_t2 LL;
    167 static cache_t2 I1;
    168 static cache_t2 D1;
    169 
    170 static void cachesim_initcaches(cache_t I1c, cache_t D1c, cache_t LLc)
    171 {
    172    cachesim_initcache(I1c, &I1);
    173    cachesim_initcache(D1c, &D1);
    174    cachesim_initcache(LLc, &LL);
    175 }
    176 
    177 __attribute__((always_inline))
    178 static __inline__
    179 void cachesim_I1_doref_Gen(Addr a, UChar size, ULong* m1, ULong *mL)
    180 {
    181    if (cachesim_ref_is_miss(&I1, a, size)) {
    182       (*m1)++;
    183       if (cachesim_ref_is_miss(&LL, a, size))
    184          (*mL)++;
    185    }
    186 }
    187 
    188 // common special case IrNoX
    189 __attribute__((always_inline))
    190 static __inline__
    191 void cachesim_I1_doref_NoX(Addr a, UChar size, ULong* m1, ULong *mL)
    192 {
    193    UWord block  = a >> I1.line_size_bits;
    194    UInt  I1_set = block & I1.sets_min_1;
    195 
    196    // use block as tag
    197    if (cachesim_setref_is_miss(&I1, I1_set, block)) {
    198       UInt  LL_set = block & LL.sets_min_1;
    199       (*m1)++;
    200       // can use block as tag as L1I and LL cache line sizes are equal
    201       if (cachesim_setref_is_miss(&LL, LL_set, block))
    202          (*mL)++;
    203    }
    204 }
    205 
    206 __attribute__((always_inline))
    207 static __inline__
    208 void cachesim_D1_doref(Addr a, UChar size, ULong* m1, ULong *mL)
    209 {
    210    if (cachesim_ref_is_miss(&D1, a, size)) {
    211       (*m1)++;
    212       if (cachesim_ref_is_miss(&LL, a, size))
    213          (*mL)++;
    214    }
    215 }
    216 
    217 /* Check for special case IrNoX. Called at instrumentation time.
    218  *
    219  * Does this Ir only touch one cache line, and are L1I/LL cache
    220  * line sizes the same? This allows to get rid of a runtime check.
    221  *
    222  * Returning false is always fine, as this calls the generic case
    223  */
    224 static Bool cachesim_is_IrNoX(Addr a, UChar size)
    225 {
    226    UWord block1, block2;
    227 
    228    if (I1.line_size_bits != LL.line_size_bits) return False;
    229    block1 =  a         >> I1.line_size_bits;
    230    block2 = (a+size-1) >> I1.line_size_bits;
    231    if (block1 != block2) return False;
    232 
    233    return True;
    234 }
    235 
    236 /*--------------------------------------------------------------------*/
    237 /*--- end                                                 cg_sim.c ---*/
    238 /*--------------------------------------------------------------------*/
    239 
    240