Home | History | Annotate | Download | only in coregrind
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 
      3 /*--------------------------------------------------------------------*/
      4 /*--- Cache-related stuff.                               m_cache.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2002-2017 Nicholas Nethercote
     12       njn (at) valgrind.org
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #include "pub_core_basics.h"
     33 #include "pub_core_libcbase.h"
     34 #include "pub_core_libcassert.h"
     35 #include "pub_core_libcprint.h"
     36 #include "pub_core_mallocfree.h"
     37 #include "pub_core_machine.h"
     38 #include "pub_core_debuglog.h"
     39 #include "libvex.h"
     40 
     41 #if defined(VGA_x86) || defined(VGA_amd64)
     42 
     43 #include "pub_core_cpuid.h"
     44 
     45 // All CPUID info taken from sandpile.org/ia32/cpuid.htm */
     46 // Probably only works for Intel and AMD chips, and probably only for some of
     47 // them.
     48 
     49 static void
     50 add_cache(VexCacheInfo *ci, VexCache cache)
     51 {
     52    static UInt num_allocated = 0;
     53 
     54    if (ci->num_caches == num_allocated) {
     55       num_allocated += 6;
     56       ci->caches = VG_(realloc)("m_cache", ci->caches,
     57                                 num_allocated * sizeof *ci->caches);
     58    }
     59 
     60    if (ci->num_levels < cache.level) ci->num_levels = cache.level;
     61    ci->caches[ci->num_caches++] = cache;
     62 }
     63 
     64 /* Convenience macros */
     65 #define add_icache(level, size, assoc, linesize) \
     66    do { \
     67       add_cache(ci, \
     68                 VEX_CACHE_INIT(INSN_CACHE, level, size, linesize, assoc)); \
     69    } while (0)
     70 
     71 #define add_dcache(level, size, assoc, linesize) \
     72    do { \
     73       add_cache(ci, \
     74                 VEX_CACHE_INIT(DATA_CACHE, level, size, linesize, assoc)); \
     75    } while (0)
     76 
     77 #define add_ucache(level, size, assoc, linesize) \
     78    do { \
     79       add_cache(ci, \
     80                 VEX_CACHE_INIT(UNIFIED_CACHE, level, size, linesize, assoc)); \
     81    } while (0)
     82 
     83 #define add_itcache(level, size, assoc) \
     84    do { \
     85       VexCache c = \
     86           VEX_CACHE_INIT(INSN_CACHE, level, size, 0, assoc); \
     87       c.is_trace_cache = True; \
     88       add_cache(ci, c); \
     89    } while (0)
     90 
     91 #define add_I1(size, assoc, linesize) add_icache(1, size, assoc, linesize)
     92 #define add_D1(size, assoc, linesize) add_dcache(1, size, assoc, linesize)
     93 #define add_U1(size, assoc, linesize) add_ucache(1, size, assoc, linesize)
     94 #define add_I2(size, assoc, linesize) add_icache(2, size, assoc, linesize)
     95 #define add_D2(size, assoc, linesize) add_dcache(2, size, assoc, linesize)
     96 #define add_U2(size, assoc, linesize) add_ucache(2, size, assoc, linesize)
     97 #define add_I3(size, assoc, linesize) add_icache(3, size, assoc, linesize)
     98 #define add_D3(size, assoc, linesize) add_dcache(3, size, assoc, linesize)
     99 #define add_U3(size, assoc, linesize) add_ucache(3, size, assoc, linesize)
    100 
    101 #define add_I1T(size, assoc) \
    102    add_itcache(1, size, assoc)
    103 
    104 /* Intel method is truly wretched.  We have to do an insane indexing into an
    105  * array of pre-defined configurations for various parts of the memory
    106  * hierarchy.
    107  * According to Intel Processor Identification, App Note 485.
    108  *
    109  * If a L3 cache is found, then data for it rather than the L2
    110  * is returned via *LLc.
    111  */
    112 static Int
    113 Intel_cache_info(Int level, VexCacheInfo *ci)
    114 {
    115    UInt cpuid1_eax;
    116    UInt cpuid1_ignore;
    117    Int family;
    118    Int model;
    119    UChar info[16];
    120    Int   i, j, trials;
    121 
    122    if (level < 2) {
    123       VG_(debugLog)(1, "cache", "warning: CPUID level < 2 for Intel "
    124                     "processor (%d)\n", level);
    125       return -1;
    126    }
    127 
    128    /* family/model needed to distinguish code reuse (currently 0x49) */
    129    VG_(cpuid)(1, 0, &cpuid1_eax, &cpuid1_ignore,
    130 	      &cpuid1_ignore, &cpuid1_ignore);
    131    family = (((cpuid1_eax >> 20) & 0xff) << 4) + ((cpuid1_eax >> 8) & 0xf);
    132    model =  (((cpuid1_eax >> 16) & 0xf) << 4) + ((cpuid1_eax >> 4) & 0xf);
    133 
    134    VG_(cpuid)(2, 0, (UInt*)&info[0], (UInt*)&info[4],
    135                     (UInt*)&info[8], (UInt*)&info[12]);
    136    trials  = info[0] - 1;   /* AL register - bits 0..7 of %eax */
    137    info[0] = 0x0;           /* reset AL */
    138 
    139    if (0 != trials) {
    140       VG_(debugLog)(1, "cache", "warning: non-zero CPUID trials for Intel "
    141                     "processor (%d)\n", trials);
    142       return -1;
    143    }
    144 
    145    ci->num_levels = 0;
    146    ci->num_caches = 0;
    147    ci->icaches_maintain_coherence = True;
    148    ci->caches = NULL;
    149 
    150    for (i = 0; i < 16; i++) {
    151 
    152       switch (info[i]) {
    153 
    154       case 0x0:       /* ignore zeros */
    155           break;
    156 
    157       /* TLB info, ignore */
    158       case 0x01: case 0x02: case 0x03: case 0x04: case 0x05:
    159       case 0x0b:
    160       case 0x4f: case 0x50: case 0x51: case 0x52: case 0x55:
    161       case 0x56: case 0x57: case 0x59:
    162       case 0x5a: case 0x5b: case 0x5c: case 0x5d:
    163       case 0x76:
    164       case 0xb0: case 0xb1: case 0xb2:
    165       case 0xb3: case 0xb4: case 0xba: case 0xc0:
    166       case 0xca:
    167           break;
    168 
    169       case 0x06: add_I1( 8, 4, 32); break;
    170       case 0x08: add_I1(16, 4, 32); break;
    171       case 0x09: add_I1(32, 4, 64); break;
    172       case 0x30: add_I1(32, 8, 64); break;
    173 
    174       case 0x0a: add_D1( 8, 2, 32); break;
    175       case 0x0c: add_D1(16, 4, 32); break;
    176       case 0x0d: add_D1(16, 4, 64); break;
    177       case 0x0e: add_D1(24, 6, 64); break;
    178       case 0x2c: add_D1(32, 8, 64); break;
    179 
    180       /* IA-64 info -- panic! */
    181       case 0x10: case 0x15: case 0x1a:
    182       case 0x88: case 0x89: case 0x8a: case 0x8d:
    183       case 0x90: case 0x96: case 0x9b:
    184          VG_(core_panic)("IA-64 cache detected?!");
    185 
    186       /* L3 cache info. */
    187       case 0x22: add_U3(512,    4, 64); break;
    188       case 0x23: add_U3(1024,   8, 64); break;
    189       case 0x25: add_U3(2048,   8, 64); break;
    190       case 0x29: add_U3(4096,   8, 64); break;
    191       case 0x46: add_U3(4096,   4, 64); break;
    192       case 0x47: add_U3(8192,   8, 64); break;
    193       case 0x4a: add_U3(6144,  12, 64); break;
    194       case 0x4b: add_U3(8192,  16, 64); break;
    195       case 0x4c: add_U3(12288, 12, 64); break;
    196       case 0x4d: add_U3(16384, 16, 64); break;
    197       case 0xd0: add_U3(512,    4, 64); break;
    198       case 0xd1: add_U3(1024,   4, 64); break;
    199       case 0xd2: add_U3(2048,   4, 64); break;
    200       case 0xd6: add_U3(1024,   8, 64); break;
    201       case 0xd7: add_U3(2048,   8, 64); break;
    202       case 0xd8: add_U3(4096,   8, 64); break;
    203       case 0xdc: add_U3(1536,  12, 64); break;
    204       case 0xdd: add_U3(3072,  12, 64); break;
    205       case 0xde: add_U3(6144,  12, 64); break;
    206       case 0xe2: add_U3(2048,  16, 64); break;
    207       case 0xe3: add_U3(4096,  16, 64); break;
    208       case 0xe4: add_U3(8192,  16, 64); break;
    209       case 0xea: add_U3(12288, 24, 64); break;
    210       case 0xeb: add_U3(18432, 24, 64); break;
    211       case 0xec: add_U3(24576, 24, 64); break;
    212 
    213       /* Described as "MLC" in Intel documentation */
    214       case 0x21: add_U2(256, 8, 64); break;
    215 
    216       /* These are sectored, whatever that means */
    217          // FIXME: I did not find these in the Intel docs
    218       case 0x39: add_U2(128, 4, 64); break;
    219       case 0x3c: add_U2(256, 4, 64); break;
    220 
    221       /* If a P6 core, this means "no L2 cache".
    222          If a P4 core, this means "no L3 cache".
    223          We don't know what core it is, so don't issue a warning.  To detect
    224          a missing L2 cache, we use 'L2_found'. */
    225       case 0x40:
    226           break;
    227 
    228       case 0x41: add_U2(  128,  4, 32); break;
    229       case 0x42: add_U2(  256,  4, 32); break;
    230       case 0x43: add_U2(  512,  4, 32); break;
    231       case 0x44: add_U2( 1024,  4, 32); break;
    232       case 0x45: add_U2( 2048,  4, 32); break;
    233       case 0x48: add_U2( 3072, 12, 64); break;
    234       case 0x4e: add_U2( 6144, 24, 64); break;
    235       case 0x49:
    236          if (family == 15 && model == 6) {
    237             /* On Xeon MP (family F, model 6), this is for L3 */
    238             add_U3(4096, 16, 64);
    239          } else {
    240 	    add_U2(4096, 16, 64);
    241          }
    242          break;
    243 
    244       /* These are sectored, whatever that means */
    245       case 0x60: add_D1(16, 8, 64);  break;      /* sectored */
    246       case 0x66: add_D1( 8, 4, 64);  break;      /* sectored */
    247       case 0x67: add_D1(16, 4, 64);  break;      /* sectored */
    248       case 0x68: add_D1(32, 4, 64);  break;      /* sectored */
    249 
    250       /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
    251        * conversion to byte size is a total guess;  treat the 12K and 16K
    252        * cases the same since the cache byte size must be a power of two for
    253        * everything to work!.  Also guessing 32 bytes for the line size...
    254        */
    255       case 0x70:    /* 12K micro-ops, 8-way */
    256          add_I1T(12, 8);
    257          break;
    258       case 0x71:    /* 16K micro-ops, 8-way */
    259          add_I1T(16, 8);
    260          break;
    261       case 0x72:    /* 32K micro-ops, 8-way */
    262          add_I1T(32, 8);
    263          break;
    264 
    265       /* not sectored, whatever that might mean */
    266       case 0x78: add_U2(1024, 4,  64);  break;
    267 
    268       /* These are sectored, whatever that means */
    269       case 0x79: add_U2( 128, 8,  64);  break;
    270       case 0x7a: add_U2( 256, 8,  64);  break;
    271       case 0x7b: add_U2( 512, 8,  64);  break;
    272       case 0x7c: add_U2(1024, 8,  64);  break;
    273       case 0x7d: add_U2(2048, 8,  64);  break;
    274       case 0x7e: add_U2( 256, 8, 128);  break;
    275       case 0x7f: add_U2( 512, 2,  64);  break;
    276       case 0x80: add_U2( 512, 8,  64);  break;
    277       case 0x81: add_U2( 128, 8,  32);  break;
    278       case 0x82: add_U2( 256, 8,  32);  break;
    279       case 0x83: add_U2( 512, 8,  32);  break;
    280       case 0x84: add_U2(1024, 8,  32);  break;
    281       case 0x85: add_U2(2048, 8,  32);  break;
    282       case 0x86: add_U2( 512, 4,  64);  break;
    283       case 0x87: add_U2(1024, 8,  64);  break;
    284 
    285       /* Ignore prefetch information */
    286       case 0xf0: case 0xf1:
    287          break;
    288 
    289       case 0xff:
    290          j = 0;
    291          VG_(cpuid)(4, j++, (UInt*)&info[0], (UInt*)&info[4],
    292                             (UInt*)&info[8], (UInt*)&info[12]);
    293 
    294          while ((info[0] & 0x1f) != 0) {
    295             UInt assoc = ((*(UInt *)&info[4] >> 22) & 0x3ff) + 1;
    296             UInt parts = ((*(UInt *)&info[4] >> 12) & 0x3ff) + 1;
    297             UInt line_size = (*(UInt *)&info[4] & 0x7ff) + 1;
    298             UInt sets = *(UInt *)&info[8] + 1;
    299 
    300             UInt size = assoc * parts * line_size * sets / 1024;
    301 
    302             switch ((info[0] & 0xe0) >> 5)
    303             {
    304             case 1:
    305                switch (info[0] & 0x1f)
    306                {
    307                case 1: add_D1(size, assoc, line_size); break;
    308                case 2: add_I1(size, assoc, line_size); break;
    309                case 3: add_U1(size, assoc, line_size); break;
    310                default:
    311                   VG_(debugLog)(1, "cache",
    312                                 "warning: L1 cache of unknown type ignored\n");
    313                   break;
    314                }
    315                break;
    316             case 2:
    317                switch (info[0] & 0x1f)
    318                {
    319                case 1: add_D2(size, assoc, line_size); break;
    320                case 2: add_I2(size, assoc, line_size); break;
    321                case 3: add_U2(size, assoc, line_size); break;
    322                default:
    323                   VG_(debugLog)(1, "cache",
    324                                 "warning: L2 cache of unknown type ignored\n");
    325                   break;
    326                }
    327                break;
    328             case 3:
    329                switch (info[0] & 0x1f)
    330                {
    331                case 1: add_D3(size, assoc, line_size); break;
    332                case 2: add_I3(size, assoc, line_size); break;
    333                case 3: add_U3(size, assoc, line_size); break;
    334                default:
    335                   VG_(debugLog)(1, "cache",
    336                                 "warning: L3 cache of unknown type ignored\n");
    337                   break;
    338                }
    339                break;
    340             default:
    341                VG_(debugLog)(1, "cache", "warning: L%u cache ignored\n",
    342                              (info[0] & 0xe0) >> 5);
    343                break;
    344             }
    345 
    346             VG_(cpuid)(4, j++, (UInt*)&info[0], (UInt*)&info[4],
    347                                (UInt*)&info[8], (UInt*)&info[12]);
    348          }
    349          break;
    350 
    351       default:
    352          VG_(debugLog)(1, "cache",
    353                        "warning: Unknown Intel cache config value (0x%x), "
    354                        "ignoring\n", info[i]);
    355          break;
    356       }
    357    }
    358 
    359    return 0;
    360 }
    361 
    362 /* AMD method is straightforward, just extract appropriate bits from the
    363  * result registers.
    364  *
    365  * Bits, for D1 and I1:
    366  *  31..24  data L1 cache size in KBs
    367  *  23..16  data L1 cache associativity (FFh=full)
    368  *  15.. 8  data L1 cache lines per tag
    369  *   7.. 0  data L1 cache line size in bytes
    370  *
    371  * Bits, for L2:
    372  *  31..16  unified L2 cache size in KBs
    373  *  15..12  unified L2 cache associativity (0=off, FFh=full)
    374  *  11.. 8  unified L2 cache lines per tag
    375  *   7.. 0  unified L2 cache line size in bytes
    376  *
    377  * #3  The AMD K7 processor's L2 cache must be configured prior to relying
    378  *     upon this information. (Whatever that means -- njn)
    379  *
    380  * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
    381  * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
    382  * so we detect that.
    383  *
    384  * Returns 0 on success, non-zero on failure.  As with the Intel code
    385  * above, if a L3 cache is found, then data for it rather than the L2
    386  * is returned via *LLc.
    387  */
    388 
    389 /* A small helper */
    390 static Int
    391 decode_AMD_cache_L2_L3_assoc ( Int bits_15_12 )
    392 {
    393    /* Decode a L2/L3 associativity indication.  It is encoded
    394       differently from the I1/D1 associativity.  Returns 1
    395       (direct-map) as a safe but suboptimal result for unknown
    396       encodings. */
    397    switch (bits_15_12 & 0xF) {
    398       case 1: return 1;    case 2: return 2;
    399       case 4: return 4;    case 6: return 8;
    400       case 8: return 16;   case 0xA: return 32;
    401       case 0xB: return 48; case 0xC: return 64;
    402       case 0xD: return 96; case 0xE: return 128;
    403       case 0xF: /* fully associative */
    404       case 0: /* L2/L3 cache or TLB is disabled */
    405       default:
    406         return 1;
    407    }
    408 }
    409 
    410 static Int
    411 AMD_cache_info(VexCacheInfo *ci)
    412 {
    413    UInt ext_level;
    414    UInt dummy, model;
    415    UInt I1i, D1i, L2i, L3i;
    416    UInt size, line_size, assoc;
    417 
    418    VG_(cpuid)(0x80000000, 0, &ext_level, &dummy, &dummy, &dummy);
    419 
    420    if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
    421       VG_(debugLog)(1, "cache", "warning: ext_level < 0x80000006 for AMD "
    422                     "processor (0x%x)\n", ext_level);
    423       return -1;
    424    }
    425 
    426    VG_(cpuid)(0x80000005, 0, &dummy, &dummy, &D1i, &I1i);
    427    VG_(cpuid)(0x80000006, 0, &dummy, &dummy, &L2i, &L3i);
    428 
    429    VG_(cpuid)(0x1, 0, &model, &dummy, &dummy, &dummy);
    430 
    431    /* Check for Duron bug */
    432    if (model == 0x630) {
    433       VG_(debugLog)(1, "cache", "warning: Buggy Duron stepping A0. "
    434                     "Assuming L2 size=65536 bytes\n");
    435       L2i = (64 << 16) | (L2i & 0xffff);
    436    }
    437 
    438    ci->num_levels = 2;
    439    ci->num_caches = 3;
    440    ci->icaches_maintain_coherence = True;
    441 
    442    /* Check for L3 cache */
    443    if (((L3i >> 18) & 0x3fff) > 0) {
    444       ci->num_levels = 3;
    445       ci->num_caches = 4;
    446    }
    447 
    448    ci->caches = VG_(malloc)("m_cache", ci->num_caches * sizeof *ci->caches);
    449 
    450    // D1
    451    size      = (D1i >> 24) & 0xff;
    452    assoc     = (D1i >> 16) & 0xff;
    453    line_size = (D1i >>  0) & 0xff;
    454    ci->caches[0] = VEX_CACHE_INIT(DATA_CACHE, 1, size, line_size, assoc);
    455 
    456    // I1
    457    size      = (I1i >> 24) & 0xff;
    458    assoc     = (I1i >> 16) & 0xff;
    459    line_size = (I1i >>  0) & 0xff;
    460    ci->caches[1] = VEX_CACHE_INIT(INSN_CACHE, 1, size, line_size, assoc);
    461 
    462    // L2    Nb: different bits used for L2
    463    size      = (L2i >> 16) & 0xffff;
    464    assoc     = decode_AMD_cache_L2_L3_assoc((L2i >> 12) & 0xf);
    465    line_size = (L2i >>  0) & 0xff;
    466    ci->caches[2] = VEX_CACHE_INIT(UNIFIED_CACHE, 2, size, line_size, assoc);
    467 
    468    // L3, if any
    469    if (((L3i >> 18) & 0x3fff) > 0) {
    470       /* There's an L3 cache. */
    471       /* NB: the test in the if is "if L3 size > 0 ".  I don't know if
    472          this is the right way to test presence-vs-absence of L3.  I
    473          can't see any guidance on this in the AMD documentation. */
    474       size      = ((L3i >> 18) & 0x3fff) * 512;
    475       assoc     = decode_AMD_cache_L2_L3_assoc((L3i >> 12) & 0xf);
    476       line_size = (L3i >>  0) & 0xff;
    477       ci->caches[3] = VEX_CACHE_INIT(UNIFIED_CACHE, 3, size, line_size, assoc);
    478    }
    479 
    480    return 0;
    481 }
    482 
    483 static Int
    484 get_caches_from_CPUID(VexCacheInfo *ci)
    485 {
    486    Int  ret, i;
    487    UInt level;
    488    HChar vendor_id[13];
    489 
    490    vg_assert(VG_(has_cpuid)());
    491 
    492    VG_(cpuid)(0, 0, &level, (UInt*)&vendor_id[0],
    493 	      (UInt*)&vendor_id[8], (UInt*)&vendor_id[4]);
    494    vendor_id[12] = '\0';
    495 
    496    if (0 == level) {    // CPUID level is 0, early Pentium?
    497       return -1;
    498    }
    499 
    500    /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
    501    if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
    502       ret = Intel_cache_info(level, ci);
    503 
    504    } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
    505       ret = AMD_cache_info(ci);
    506 
    507    } else if (0 == VG_(strcmp)(vendor_id, "CentaurHauls")) {
    508       /* Total kludge.  Pretend to be a VIA Nehemiah. */
    509       ci->num_levels = 2;
    510       ci->num_caches = 3;
    511       ci->icaches_maintain_coherence = True;
    512       ci->caches = VG_(malloc)("m_cache", ci->num_caches * sizeof *ci->caches);
    513       ci->caches[0] = VEX_CACHE_INIT(DATA_CACHE,    1, 64, 16, 16);
    514       ci->caches[1] = VEX_CACHE_INIT(INSN_CACHE,    1, 64, 16,  4);
    515       ci->caches[2] = VEX_CACHE_INIT(UNIFIED_CACHE, 2, 64, 16, 16);
    516 
    517       ret = 0;
    518 
    519    } else {
    520       VG_(debugLog)(1, "cache", "CPU vendor ID not recognised (%s)\n",
    521                     vendor_id);
    522       return -1;
    523    }
    524 
    525    /* Successful!  Convert sizes from KB to bytes */
    526    for (i = 0; i < ci->num_caches; ++i) {
    527       ci->caches[i].sizeB *= 1024;
    528    }
    529 
    530    return ret;
    531 }
    532 
    533 static Bool
    534 get_cache_info(VexArchInfo *vai)
    535 {
    536    Int ret = get_caches_from_CPUID(&vai->hwcache_info);
    537 
    538    return ret == 0 ? True : False;
    539 }
    540 
    541 #elif defined(VGA_arm) || defined(VGA_ppc32)    || \
    542    defined(VGA_ppc64be) || defined(VGA_ppc64le) || \
    543    defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_arm64)
    544 static Bool
    545 get_cache_info(VexArchInfo *vai)
    546 {
    547    vai->hwcache_info.icaches_maintain_coherence = False;
    548 
    549    return False;   // not yet
    550 }
    551 
    552 #elif defined(VGA_s390x)
    553 
    554 static ULong
    555 ecag(UInt ai, UInt li, UInt ti)
    556 {
    557    register ULong result asm("2") = 0;
    558    register ULong input  asm("3") = (ai << 4) | (li << 1) | ti;
    559 
    560    asm volatile(".short 0xeb20\n\t"
    561                 ".long  0x3000004c\n\t"
    562                  : "=d" (result) : "d" (input));
    563 
    564    return result;
    565 }
    566 
    567 static UInt
    568 get_cache_info_for_level(ULong topology, UInt level)
    569 {
    570    return (topology >> (56 - level * 8)) & 0xff;
    571 }
    572 
    573 static ULong
    574 get_line_size(UInt level, Bool is_insn_cache)
    575 {
    576    return ecag(1, level, is_insn_cache);
    577 }
    578 
    579 static ULong
    580 get_total_size(UInt level, Bool is_insn_cache)
    581 {
    582    return ecag(2, level, is_insn_cache);
    583 }
    584 
    585 static ULong
    586 get_associativity(UInt level, Bool is_insn_cache)
    587 {
    588    return ecag(3, level, is_insn_cache);
    589 }
    590 
    591 static VexCache
    592 get_cache(UInt level, VexCacheKind kind)
    593 {
    594    Bool is_insn_cache = kind == INSN_CACHE;
    595    UInt size = get_total_size(level, is_insn_cache);
    596    UInt line_size = get_line_size(level, is_insn_cache);
    597    UInt assoc = get_associativity(level, is_insn_cache);
    598 
    599    return VEX_CACHE_INIT(kind, level + 1, size, line_size, assoc);
    600 }
    601 
    602 static Bool
    603 get_cache_info(VexArchInfo *vai)
    604 {
    605    VexCacheInfo *ci = &vai->hwcache_info;
    606 
    607    ci->icaches_maintain_coherence = True;
    608 
    609    if (! (vai->hwcaps & VEX_HWCAPS_S390X_GIE)) {
    610       // ECAG is not available
    611       return False;
    612    }
    613 
    614    UInt level, cache_kind, info, i;
    615    ULong topology = ecag(0, 0, 0);   // get summary
    616 
    617    /* ECAG supports at most 8 levels of cache. Find out how many levels
    618       of cache and how many caches there are. */
    619    ci->num_levels = 0;
    620    ci->num_caches = 0;
    621    for (level = 0; level < 8; level++) {
    622       info = get_cache_info_for_level(topology, level);
    623 
    624       if ((info & 0xc) == 0) break;  // cache does not exist at this level
    625       ++ci->num_levels;
    626 
    627       cache_kind = info & 0x3;
    628       switch (cache_kind) {
    629       case 0:  ci->num_caches += 2; break; /* separate data and insn cache */
    630       case 1:  ci->num_caches += 1; break; /* only insn cache */
    631       case 2:  ci->num_caches += 1; break; /* only data cache */
    632       case 3:  ci->num_caches += 1; break; /* unified data and insn cache */
    633       }
    634    }
    635 
    636    ci->caches = VG_(malloc)("m_cache", ci->num_caches * sizeof *ci->caches);
    637 
    638    i = 0;
    639    for (level = 0; level < ci->num_levels; level++) {
    640       info = get_cache_info_for_level(topology, level);
    641       cache_kind = info & 0x3;
    642       switch (cache_kind) {
    643       case 0:   /* separate data and insn cache */
    644          ci->caches[i++] = get_cache(level, INSN_CACHE);
    645          ci->caches[i++] = get_cache(level, DATA_CACHE);
    646          break;
    647 
    648       case 1:   /* only insn cache */
    649          ci->caches[i++] = get_cache(level, INSN_CACHE);
    650          break;
    651 
    652       case 2:   /* only data cache */
    653          ci->caches[i++] = get_cache(level, DATA_CACHE);
    654          break;
    655 
    656       case 3:   /* unified data and insn cache */
    657          ci->caches[i++] = get_cache(level, UNIFIED_CACHE);
    658          break;
    659       }
    660    }
    661    return True;
    662 }
    663 
    664 #else
    665 
    666 #error "Unknown arch"
    667 
    668 #endif
    669 
    670 /* Debug information */
    671 static void
    672 write_cache_info(const VexCacheInfo *ci)
    673 {
    674    UInt i;
    675 
    676    VG_(debugLog)(1, "cache", "Cache info:\n");
    677    VG_(debugLog)(1, "cache", "  #levels = %u\n", ci->num_levels);
    678    VG_(debugLog)(1, "cache", "  #caches = %u\n", ci->num_caches);
    679    for (i = 0; i < ci->num_caches; ++i) {
    680       VexCache *c = ci->caches + i;
    681       const HChar *kind;
    682       VG_(debugLog)(1, "cache", "     cache #%u:\n", i);
    683       switch (c->kind) {
    684       case INSN_CACHE:    kind = "insn";    break;
    685       case DATA_CACHE:    kind = "data";    break;
    686       case UNIFIED_CACHE: kind = "unified"; break;
    687       default: kind = "unknown"; break;
    688       }
    689       VG_(debugLog)(1, "cache", "        kind = %s\n", kind);
    690       VG_(debugLog)(1, "cache", "        level = %u\n", c->level);
    691       VG_(debugLog)(1, "cache", "        size = %u bytes\n", c->sizeB);
    692       VG_(debugLog)(1, "cache", "        linesize = %u bytes\n", c->line_sizeB);
    693       VG_(debugLog)(1, "cache", "        assoc = %u\n", c->assoc);
    694    }
    695 }
    696 
    697 static Bool
    698 cache_info_is_sensible(const VexCacheInfo *ci)
    699 {
    700    UInt level, i;
    701    Bool sensible = True;
    702 
    703    /* There must be at most one cache of a given kind at the same level.
    704       If there is a unified cache at a given level, no other cache may
    705       exist at that level. */
    706    for (level = 1; level <= ci->num_levels; ++level) {
    707       UInt num_icache, num_dcache, num_ucache;
    708 
    709       num_icache = num_dcache = num_ucache = 0;
    710       for (i = 0; i < ci->num_caches; ++i) {
    711          if (ci->caches[i].level == level) {
    712             switch (ci->caches[i].kind) {
    713             case INSN_CACHE:    ++num_icache; break;
    714             case DATA_CACHE:    ++num_dcache; break;
    715             case UNIFIED_CACHE: ++num_ucache; break;
    716             }
    717          }
    718       }
    719       if (num_icache == 0 && num_dcache == 0 && num_ucache == 0) {
    720          VG_(debugLog)(1, "cache", "warning: No caches at level %u\n", level);
    721          sensible = False;
    722       }
    723       if (num_icache > 1 || num_dcache > 1 || num_ucache > 1) {
    724          VG_(debugLog)(1, "cache", "warning: More than one cache of a given "
    725                        "kind at level %u\n", level);
    726          sensible = False;
    727       }
    728       if (num_ucache != 0 && (num_icache > 0 || num_dcache > 0)) {
    729          VG_(debugLog)(1, "cache", "warning: Unified cache and I/D cache "
    730                        "at level %u\n", level);
    731          sensible = False;
    732       }
    733    }
    734 
    735    /* If there is a cache at level N > 1 there must be a cache at level N-1 */
    736    for (level = 2; level <= ci->num_levels; ++level) {
    737       Bool found = False;
    738       for (i = 0; i < ci->num_caches; ++i) {
    739          if (ci->caches[i].level == level - 1) {
    740             found = True;
    741             break;
    742          }
    743       }
    744       if (! found) {
    745          VG_(debugLog)(1, "cache", "warning: Cache at level %u but no cache "
    746                        "at level %u\n", level, level - 1);
    747          sensible = False;
    748       }
    749    }
    750 
    751    return sensible;
    752 }
    753 
    754 
    755 /* Autodetect the cache information for this host and stuff it into
    756    VexArchInfo::hwcache_info. Return True if successful. */
    757 Bool
    758 VG_(machine_get_cache_info)(VexArchInfo *vai)
    759 {
    760    Bool ok = get_cache_info(vai);
    761 
    762    VexCacheInfo *ci = &vai->hwcache_info;
    763 
    764    if (! ok) {
    765       VG_(debugLog)(1, "cache", "Could not autodetect cache info\n");
    766    } else {
    767       ok = cache_info_is_sensible(ci);
    768 
    769       if (! ok) {
    770          VG_(debugLog)(1, "cache",
    771                        "Autodetected cache info is not sensible\n");
    772       } else {
    773          VG_(debugLog)(1, "cache",
    774                        "Autodetected cache info is sensible\n");
    775       }
    776       write_cache_info(ci);  /* write out for debugging */
    777    }
    778 
    779    if (! ok ) {
    780       /* Reset cache info */
    781       ci->num_levels = 0;
    782       ci->num_caches = 0;
    783       VG_(free)(ci->caches);
    784       ci->caches = NULL;
    785    }
    786 
    787    return ok;
    788 }
    789 
    790 /*--------------------------------------------------------------------*/
    791 /*--- end                                                          ---*/
    792 /*--------------------------------------------------------------------*/
    793