Home | History | Annotate | Download | only in cachegrind
      1 /*--------------------------------------------------------------------*/
      2 /*--- Cachegrind: cache configuration.                   cg-arch.c ---*/
      3 /*--------------------------------------------------------------------*/
      4 
      5 /*
      6    This file is part of Cachegrind, a Valgrind tool for cache
      7    profiling programs.
      8 
      9    Copyright (C) 2011-2013 Nicholas Nethercote
     10       njn (at) valgrind.org
     11 
     12    This program is free software; you can redistribute it and/or
     13    modify it under the terms of the GNU General Public License as
     14    published by the Free Software Foundation; either version 2 of the
     15    License, or (at your option) any later version.
     16 
     17    This program is distributed in the hope that it will be useful, but
     18    WITHOUT ANY WARRANTY; without even the implied warranty of
     19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     20    General Public License for more details.
     21 
     22    You should have received a copy of the GNU General Public License
     23    along with this program; if not, write to the Free Software
     24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     25    02111-1307, USA.
     26 
     27    The GNU General Public License is contained in the file COPYING.
     28 */
     29 
     30 #include "pub_tool_basics.h"
     31 #include "pub_tool_libcassert.h"
     32 #include "pub_tool_libcbase.h"
     33 #include "pub_tool_libcprint.h"
     34 #include "pub_tool_options.h"
     35 #include "pub_tool_machine.h"
     36 
     37 #include "cg_arch.h"
     38 
     39 static void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* LLc,
     40                              Bool all_caches_clo_defined);
     41 
     42 // Checks cache config is ok.  Returns NULL if ok, or a pointer to an error
     43 // string otherwise.
     44 static const HChar* check_cache(cache_t* cache)
     45 {
     46    // Simulator requires set count to be a power of two.
     47    if ((cache->size % (cache->line_size * cache->assoc) != 0) ||
     48        (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc)))
     49    {
     50       return "Cache set count is not a power of two.\n";
     51    }
     52 
     53    // Simulator requires line size to be a power of two.
     54    if (-1 == VG_(log2)(cache->line_size)) {
     55       return "Cache line size is not a power of two.\n";
     56    }
     57 
     58    // Then check line size >= 16 -- any smaller and a single instruction could
     59    // straddle three cache lines, which breaks a simulation assertion and is
     60    // stupid anyway.
     61    if (cache->line_size < MIN_LINE_SIZE) {
     62       return "Cache line size is too small.\n";
     63    }
     64 
     65    /* Then check cache size > line size (causes seg faults if not). */
     66    if (cache->size <= cache->line_size) {
     67       return "Cache size <= line size.\n";
     68    }
     69 
     70    /* Then check assoc <= (size / line size) (seg faults otherwise). */
     71    if (cache->assoc > (cache->size / cache->line_size)) {
     72       return "Cache associativity > (size / line size).\n";
     73    }
     74 
     75    return NULL;
     76 }
     77 
     78 
     79 static void parse_cache_opt ( cache_t* cache, const HChar* opt,
     80                               const HChar* optval )
     81 {
     82    Long i1, i2, i3;
     83    HChar* endptr;
     84    const HChar* checkRes;
     85 
     86    // Option argument looks like "65536,2,64".  Extract them.
     87    i1 = VG_(strtoll10)(optval,   &endptr); if (*endptr != ',')  goto bad;
     88    i2 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != ',')  goto bad;
     89    i3 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != '\0') goto bad;
     90 
     91    // Check for overflow.
     92    cache->size      = (Int)i1;
     93    cache->assoc     = (Int)i2;
     94    cache->line_size = (Int)i3;
     95    if (cache->size      != i1) goto overflow;
     96    if (cache->assoc     != i2) goto overflow;
     97    if (cache->line_size != i3) goto overflow;
     98 
     99    checkRes = check_cache(cache);
    100    if (checkRes) {
    101       VG_(fmsg)("%s", checkRes);
    102       goto bad;
    103    }
    104 
    105    return;
    106 
    107   bad:
    108    VG_(fmsg_bad_option)(opt, "");
    109 
    110   overflow:
    111    VG_(fmsg_bad_option)(opt,
    112       "One of the cache parameters was too large and overflowed.\n");
    113 }
    114 
    115 
    116 Bool VG_(str_clo_cache_opt)(const HChar *arg,
    117                             cache_t* clo_I1c,
    118                             cache_t* clo_D1c,
    119                             cache_t* clo_LLc)
    120 {
    121    const HChar* tmp_str;
    122 
    123    if      VG_STR_CLO(arg, "--I1", tmp_str) {
    124       parse_cache_opt(clo_I1c, arg, tmp_str);
    125       return True;
    126    } else if VG_STR_CLO(arg, "--D1", tmp_str) {
    127       parse_cache_opt(clo_D1c, arg, tmp_str);
    128       return True;
    129    } else if (VG_STR_CLO(arg, "--L2", tmp_str) || // for backwards compatibility
    130               VG_STR_CLO(arg, "--LL", tmp_str)) {
    131       parse_cache_opt(clo_LLc, arg, tmp_str);
    132       return True;
    133    } else
    134       return False;
    135 }
    136 
    137 static void umsg_cache_img(const HChar* desc, cache_t* c)
    138 {
    139    VG_(umsg)("  %s: %'d B, %d-way, %d B lines\n", desc,
    140              c->size, c->assoc, c->line_size);
    141 }
    142 
    143 // Verifies if c is a valid cache.
    144 // An invalid value causes an assert, unless clo_redefined is True.
    145 static void check_cache_or_override(const HChar* desc, cache_t* c, Bool clo_redefined)
    146 {
    147    const HChar* checkRes;
    148 
    149    checkRes = check_cache(c);
    150    if (checkRes) {
    151       VG_(umsg)("Auto-detected %s cache configuration not supported: %s",
    152                 desc, checkRes);
    153       umsg_cache_img(desc, c);
    154       if (!clo_redefined) {
    155          VG_(umsg)("As it probably should be supported, please report a bug!\n");
    156          VG_(umsg)("Bypass this message by using option --%s=...\n", desc);
    157          tl_assert(0);
    158       }
    159    }
    160 }
    161 
    162 
    163 /* If the LL cache config isn't something the simulation functions
    164    can handle, try to adjust it so it is.  Caches are characterised
    165    by (total size T, line size L, associativity A), and then we
    166    have
    167 
    168      number of sets S = T / (L * A)
    169 
    170    The required constraints are:
    171 
    172    * L must be a power of 2, but it always is in practice, so
    173      no problem there
    174 
    175    * A can be any value >= 1
    176 
    177    * T can be any value, but ..
    178 
    179    * S must be a power of 2.
    180 
    181    That sometimes gives a problem.  For example, some Core iX based
    182    Intel CPUs have T = 12MB, A = 16, L = 64, which gives 12288
    183    sets.  The "fix" in this case is to increase the associativity
    184    by 50% to 24, which reduces the number of sets to 8192, making
    185    it a power of 2.  That's what the following code does (handing
    186    the "3/2 rescaling case".)  We might need to deal with other
    187    ratios later (5/4 ?).
    188 
    189    The "fix" is "justified" (cough, cough) by alleging that
    190    increases of associativity above about 4 have very little effect
    191    on the actual miss rate.  It would be far more inaccurate to
    192    fudge this by changing the size of the simulated cache --
    193    changing the associativity is a much better option.
    194 */
    195 
    196 static void
    197 maybe_tweak_LLc(cache_t *LLc)
    198 {
    199   if (LLc->size > 0 && LLc->assoc > 0 && LLc->line_size > 0) {
    200       Long nSets = (Long)LLc->size / (Long)(LLc->line_size * LLc->assoc);
    201       if (/* stay sane */
    202           nSets >= 4
    203           /* nSets is not a power of 2 */
    204           && VG_(log2_64)( (ULong)nSets ) == -1
    205           /* nSets is 50% above a power of 2 */
    206           && VG_(log2_64)( (ULong)((2 * nSets) / (Long)3) ) != -1
    207           /* associativity can be increased by exactly 50% */
    208           && (LLc->assoc % 2) == 0
    209          ) {
    210          /* # sets is 1.5 * a power of two, but the associativity is
    211             even, so we can increase that up by 50% and implicitly
    212             scale the # sets down accordingly. */
    213          Int new_assoc = LLc->assoc + (LLc->assoc / 2);
    214          VG_(dmsg)("warning: pretending that LL cache has associativity"
    215                    " %d instead of actual %d\n", new_assoc, LLc->assoc);
    216          LLc->assoc = new_assoc;
    217       }
    218    }
    219 }
    220 
    221 void VG_(post_clo_init_configure_caches)(cache_t* I1c,
    222                                          cache_t* D1c,
    223                                          cache_t* LLc,
    224                                          cache_t* clo_I1c,
    225                                          cache_t* clo_D1c,
    226                                          cache_t* clo_LLc)
    227 {
    228 #define DEFINED(L)   (-1 != L->size  || -1 != L->assoc || -1 != L->line_size)
    229 
    230    // Count how many were defined on the command line.
    231    Bool all_caches_clo_defined =
    232       (DEFINED(clo_I1c) &&
    233        DEFINED(clo_D1c) &&
    234        DEFINED(clo_LLc));
    235 
    236    // Set the cache config (using auto-detection, if supported by the
    237    // architecture).
    238    configure_caches( I1c, D1c, LLc, all_caches_clo_defined );
    239 
    240    maybe_tweak_LLc( LLc );
    241 
    242    // Check the default/auto-detected values.
    243    // Allow the user to override invalid auto-detected caches
    244    // with command line.
    245    check_cache_or_override ("I1", I1c, DEFINED(clo_I1c));
    246    check_cache_or_override ("D1", D1c, DEFINED(clo_D1c));
    247    check_cache_or_override ("LL", LLc, DEFINED(clo_LLc));
    248 
    249    // Then replace with any defined on the command line.  (Already checked in
    250    // VG(parse_clo_cache_opt)().)
    251    if (DEFINED(clo_I1c)) { *I1c = *clo_I1c; }
    252    if (DEFINED(clo_D1c)) { *D1c = *clo_D1c; }
    253    if (DEFINED(clo_LLc)) { *LLc = *clo_LLc; }
    254 
    255    if (VG_(clo_verbosity) >= 2) {
    256       VG_(umsg)("Cache configuration used:\n");
    257       umsg_cache_img ("I1", I1c);
    258       umsg_cache_img ("D1", D1c);
    259       umsg_cache_img ("LL", LLc);
    260    }
    261 #undef DEFINED
    262 }
    263 
    264 void VG_(print_cache_clo_opts)()
    265 {
    266    VG_(printf)(
    267 "    --I1=<size>,<assoc>,<line_size>  set I1 cache manually\n"
    268 "    --D1=<size>,<assoc>,<line_size>  set D1 cache manually\n"
    269 "    --LL=<size>,<assoc>,<line_size>  set LL cache manually\n"
    270                );
    271 }
    272 
    273 
    274 // Traverse the cache info and return a cache of the given kind and level.
    275 // Return NULL if no such cache exists.
    276 static const VexCache *
    277 locate_cache(const VexCacheInfo *ci, VexCacheKind kind, UInt level)
    278 {
    279    const VexCache *c;
    280 
    281    for (c = ci->caches; c != ci->caches + ci->num_caches; ++c) {
    282       if (c->level == level && c->kind == kind) {
    283          return c;
    284       }
    285    }
    286    return NULL;  // not found
    287 }
    288 
    289 
    290 // Gives the auto-detected configuration of I1, D1 and LL caches.  They get
    291 // overridden by any cache configurations specified on the command line.
    292 static void
    293 configure_caches(cache_t *I1c, cache_t *D1c, cache_t *LLc,
    294                  Bool all_caches_clo_defined)
    295 {
    296    VexArchInfo vai;
    297    const VexCacheInfo *ci;
    298    const VexCache *i1, *d1, *ll;
    299 
    300    VG_(machine_get_VexArchInfo)(NULL, &vai);
    301    ci = &vai.hwcache_info;
    302 
    303    // Extract what we need
    304    i1 = locate_cache(ci, INSN_CACHE, 1);
    305    d1 = locate_cache(ci, DATA_CACHE, 1);
    306    ll = locate_cache(ci, UNIFIED_CACHE, ci->num_levels);
    307 
    308    if (ci->num_caches > 0 && ll == NULL) {
    309       VG_(dmsg)("warning: L2 cache not installed, ignore LL results.\n");
    310    }
    311 
    312    if (ll && ci->num_levels > 2) {
    313       VG_(dmsg)("warning: L%u cache found, using its data for the "
    314                 "LL simulation.\n", ci->num_levels);
    315    }
    316 
    317    if (i1 && d1 && ll) {
    318       if (i1->is_trace_cache) {
    319          /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
    320           * conversion to byte size is a total guess;  treat the 12K and 16K
    321           * cases the same since the cache byte size must be a power of two for
    322           * everything to work!.  Also guessing 32 bytes for the line size...
    323           */
    324          UInt adjusted_size, guessed_line_size = 32;
    325 
    326          if (i1->sizeB == 12 * 1024 || i1->sizeB == 16 * 1024) {
    327             adjusted_size = 16 * 1024;
    328          } else {
    329             adjusted_size = 32 * 1024;
    330          }
    331          VG_(dmsg)("warning: Pentium 4 with %u KB micro-op instruction trace cache\n",
    332                    i1->sizeB / 1024);
    333          VG_(dmsg)("         Simulating a %d KB I-cache with %d B lines\n",
    334                    adjusted_size / 1024, guessed_line_size);
    335 
    336          *I1c = (cache_t) { adjusted_size, i1->assoc, guessed_line_size };
    337       } else {
    338          *I1c = (cache_t) { i1->sizeB, i1->assoc, i1->line_sizeB };
    339       }
    340       *D1c = (cache_t) { d1->sizeB, d1->assoc, d1->line_sizeB };
    341       *LLc = (cache_t) { ll->sizeB, ll->assoc, ll->line_sizeB };
    342 
    343       return;
    344    }
    345 
    346    // Cache information could not be queried; choose some default
    347    // architecture specific default setting.
    348 
    349 #if defined(VGA_ppc32)
    350 
    351    // Default cache configuration
    352    *I1c = (cache_t) {  65536, 2, 64 };
    353    *D1c = (cache_t) {  65536, 2, 64 };
    354    *LLc = (cache_t) { 262144, 8, 64 };
    355 
    356 #elif defined(VGA_ppc64)
    357 
    358    // Default cache configuration
    359    *I1c = (cache_t) {  65536, 2, 64 };
    360    *D1c = (cache_t) {  65536, 2, 64 };
    361    *LLc = (cache_t) { 262144, 8, 64 };
    362 
    363 #elif defined(VGA_arm)
    364 
    365    // Set caches to default (for Cortex-A8 ?)
    366    *I1c = (cache_t) {  16384, 4, 64 };
    367    *D1c = (cache_t) {  16384, 4, 64 };
    368    *LLc = (cache_t) { 262144, 8, 64 };
    369 
    370 #elif defined(VGA_arm64)
    371 
    372    // Copy the 32-bit ARM version until such time as we have
    373    // some real hardware to run on
    374    *I1c = (cache_t) {  16384, 4, 64 };
    375    *D1c = (cache_t) {  16384, 4, 64 };
    376    *LLc = (cache_t) { 262144, 8, 64 };
    377 
    378 #elif defined(VGA_s390x)
    379    //
    380    // Here is the cache data from older machine models:
    381    //
    382    //           I1            D1      I/D L2
    383    // z900  256k/256/4    256k/256/4   16MB
    384    // z800  256k/256/4    256k/256/4    8MB
    385    // z990  256k/256/4    256k/256/4   32MB
    386    // z890  256k/256/4    256k/256/4   32MB
    387    // z9    256k/256/4    256k/256/4   40MB
    388    //
    389    // Sources:
    390    // (1) IBM System z9 109 Technical Introduction
    391    //     www.redbooks.ibm.com/redbooks/pdfs/sg246669.pdf
    392    // (2) The microarchitecture of the IBM eServer z900 processor
    393    //     IBM Journal of Research and Development
    394    //     Volume 46, Number 4/5, pp 381-395, July/September 2002
    395    // (3) The IBM eServer z990 microprocessor
    396    //     IBM Journal of Research and Development
    397    //     Volume 48, Number 3/4, pp 295-309, May/July 2004
    398    // (4) Charles Webb, IBM
    399    //
    400    // L2 data is unfortunately incomplete. Otherwise, we could support
    401    // machines without the ECAG insn by looking at VEX_S390X_MODEL(hwcaps).
    402 
    403    // Default cache configuration is z10-EC  (Source: ECAG insn)
    404    *I1c = (cache_t) {    65536,  4, 256 };
    405    *D1c = (cache_t) {   131072,  8, 256 };
    406    *LLc = (cache_t) { 50331648, 24, 256 };
    407 
    408 #elif defined(VGA_mips32)
    409 
    410    // Set caches to default (for MIPS32-r2(mips 74kc))
    411    *I1c = (cache_t) {  32768, 4, 32 };
    412    *D1c = (cache_t) {  32768, 4, 32 };
    413    *LLc = (cache_t) { 524288, 8, 32 };
    414 
    415 #elif defined(VGA_mips64)
    416 
    417    // Set caches to default (for MIPS64 - 5kc)
    418    *I1c = (cache_t) {  32768, 4, 32 };
    419    *D1c = (cache_t) {  32768, 4, 32 };
    420    *LLc = (cache_t) { 524288, 8, 32 };
    421 
    422 #elif defined(VGA_x86) || defined(VGA_amd64)
    423 
    424    *I1c = (cache_t) {  65536, 2, 64 };
    425    *D1c = (cache_t) {  65536, 2, 64 };
    426    *LLc = (cache_t) { 262144, 8, 64 };
    427 
    428 #else
    429 
    430 #error "Unknown arch"
    431 
    432 #endif
    433 
    434    if (!all_caches_clo_defined) {
    435       const HChar warning[] =
    436         "Warning: Cannot auto-detect cache config, using defaults.\n"
    437         "         Run with -v to see.\n";
    438       VG_(dmsg)("%s", warning);
    439    }
    440 }
    441 
    442 /*--------------------------------------------------------------------*/
    443 /*--- end                                                          ---*/
    444 /*--------------------------------------------------------------------*/
    445