Home | History | Annotate | Download | only in coregrind
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Management of the translation table and cache.               ---*/
      4 /*---                                                 m_transtab.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2000-2011 Julian Seward
     12       jseward (at) acm.org
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #include "pub_core_basics.h"
     33 #include "pub_core_debuglog.h"
     34 #include "pub_core_machine.h"    // For VG(machine_get_VexArchInfo)
     35 #include "pub_core_libcbase.h"
     36 #include "pub_core_libcassert.h"
     37 #include "pub_core_libcprint.h"
     38 #include "pub_core_options.h"
     39 #include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
     40 #include "pub_core_transtab.h"
     41 #include "pub_core_aspacemgr.h"
     42 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
     43 
     44 // JRS FIXME get rid of this somehow
     45 #if defined(VGP_arm_linux)
     46 # include "pub_core_vkiscnums.h" // __ARM_NR_cacheflush
     47 # include "pub_core_syscall.h"   // VG_(do_syscallN)
     48 #endif
     49 
     50 
     51 /* #define DEBUG_TRANSTAB */
     52 
     53 
     54 /*-------------------------------------------------------------*/
     55 /*--- Management of the FIFO-based translation table+cache. ---*/
     56 /*-------------------------------------------------------------*/
     57 
     58 /*------------------ CONSTANTS ------------------*/
     59 
     60 /* Number of sectors the TC is divided into.  If you need a larger
     61    overall translation cache, increase this value. */
     62 #define N_SECTORS 8
     63 
     64 /* Number of TC entries in each sector.  This needs to be a prime
     65    number to work properly, it must be <= 65535 (so that a TT index
     66    fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
     67    'deleted') and it is strongly recommended not to change this.
     68    65521 is the largest prime <= 65535. */
     69 #define N_TTES_PER_SECTOR /*30011*/ /*40009*/ 65521
     70 
     71 /* Because each sector contains a hash table of TTEntries, we need to
     72    specify the maximum allowable loading, after which the sector is
     73    deemed full. */
     74 #define SECTOR_TT_LIMIT_PERCENT 65
     75 
     76 /* The sector is deemed full when this many entries are in it. */
     77 #define N_TTES_PER_SECTOR_USABLE \
     78            ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
     79 
     80 /* Equivalence classes for fast address range deletion.  There are 1 +
     81    2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
     82    address range which does not fall cleanly within any specific bin.
     83    Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
     84 #define ECLASS_SHIFT 11
     85 #define ECLASS_WIDTH 8
     86 #define ECLASS_MISC  (1 << ECLASS_WIDTH)
     87 #define ECLASS_N     (1 + ECLASS_MISC)
     88 
     89 #define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
     90 
     91 
     92 /*------------------ TYPES ------------------*/
     93 
     94 /* A translation-table entry.  This indicates precisely which areas of
     95    guest code are included in the translation, and contains all other
     96    auxiliary info too.  */
     97 typedef
     98    struct {
     99       /* Profiling only: the count and weight (arbitrary meaning) for
    100          this translation.  Weight is a property of the translation
    101          itself and computed once when the translation is created.
    102          Count is an entry count for the translation and is
    103          incremented by 1 every time the translation is used, if we
    104          are profiling. */
    105       UInt     count;
    106       UShort   weight;
    107 
    108       /* Status of the slot.  Note, we need to be able to do lazy
    109          deletion, hence the Deleted state. */
    110       enum { InUse, Deleted, Empty } status;
    111 
    112       /* 64-bit aligned pointer to one or more 64-bit words containing
    113          the corresponding host code (must be in the same sector!)
    114          This is a pointer into the sector's tc (code) area. */
    115       ULong* tcptr;
    116 
    117       /* This is the original guest address that purportedly is the
    118          entry point of the translation.  You might think that .entry
    119          should be the same as .vge->base[0], and most of the time it
    120          is.  However, when doing redirections, that is not the case.
    121          .vge must always correctly describe the guest code sections
    122          from which this translation was made.  However, .entry may or
    123          may not be a lie, depending on whether or not we're doing
    124          redirection. */
    125       Addr64 entry;
    126 
    127       /* This structure describes precisely what ranges of guest code
    128          the translation covers, so we can decide whether or not to
    129          delete it when translations of a given address range are
    130          invalidated. */
    131       VexGuestExtents vge;
    132 
    133       /* Address range summary info: these are pointers back to
    134          eclass[] entries in the containing Sector.  Those entries in
    135          turn point back here -- the two structures are mutually
    136          redundant but both necessary to make fast deletions work.
    137          The eclass info is similar to, and derived from, this entry's
    138          'vge' field, but it is not the same */
    139       UShort n_tte2ec;      // # tte2ec pointers (1 to 3)
    140       UShort tte2ec_ec[3];  // for each, the eclass #
    141       UInt   tte2ec_ix[3];  // and the index within the eclass.
    142       // for i in 0 .. n_tte2ec-1
    143       //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
    144       // should be the index
    145       // of this TTEntry in the containing Sector's tt array.
    146    }
    147    TTEntry;
    148 
    149 
    150 /* Finally, a sector itself.  Each sector contains an array of
    151    TCEntries, which hold code, and an array of TTEntries, containing
    152    all required administrative info.  Profiling is supported using the
    153    TTEntry .count and .weight fields, if required.  Each sector is
    154    independent in that no cross-sector references are allowed.
    155 
    156    If the sector is not in use, all three pointers are NULL and
    157    tt_n_inuse is zero.
    158 */
    159 typedef
    160    struct {
    161       /* The TCEntry area.  Size of this depends on the average
    162          translation size.  We try and size it so it becomes full
    163          precisely when this sector's translation table (tt) reaches
    164          its load limit (SECTOR_TT_LIMIT_PERCENT). */
    165       ULong* tc;
    166 
    167       /* The TTEntry array.  This is a fixed size, always containing
    168          exactly N_TTES_PER_SECTOR entries. */
    169       TTEntry* tt;
    170 
    171       /* This points to the current allocation point in tc. */
    172       ULong* tc_next;
    173 
    174       /* The count of tt entries with state InUse. */
    175       Int tt_n_inuse;
    176 
    177       /* Expandable arrays of tt indices for each of the ECLASS_N
    178          address range equivalence classes.  These hold indices into
    179          the containing sector's tt array, which in turn should point
    180          back here. */
    181       Int     ec2tte_size[ECLASS_N];
    182       Int     ec2tte_used[ECLASS_N];
    183       UShort* ec2tte[ECLASS_N];
    184    }
    185    Sector;
    186 
    187 
    188 /*------------------ DECLS ------------------*/
    189 
    190 /* The root data structure is an array of sectors.  The index of the
    191    youngest sector is recorded, and new translations are put into that
    192    sector.  When it fills up, we move along to the next sector and
    193    start to fill that up, wrapping around at the end of the array.
    194    That way, once all N_TC_SECTORS have been bought into use for the
    195    first time, and are full, we then re-use the oldest sector,
    196    endlessly.
    197 
    198    When running, youngest sector should be between >= 0 and <
    199    N_TC_SECTORS.  The initial -1 value indicates the TT/TC system is
    200    not yet initialised.
    201 */
    202 static Sector sectors[N_SECTORS];
    203 static Int    youngest_sector = -1;
    204 
    205 /* The number of ULongs in each TCEntry area.  This is computed once
    206    at startup and does not change. */
    207 static Int    tc_sector_szQ;
    208 
    209 
    210 /* A list of sector numbers, in the order which they should be
    211    searched to find translations.  This is an optimisation to be used
    212    when searching for translations and should not affect
    213    correctness.  -1 denotes "no entry". */
    214 static Int sector_search_order[N_SECTORS];
    215 
    216 
    217 /* Fast helper for the TC.  A direct-mapped cache which holds a set of
    218    recently used (guest address, host address) pairs.  This array is
    219    referred to directly from m_dispatch/dispatch-<platform>.S.
    220 
    221    Entries in tt_fast may refer to any valid TC entry, regardless of
    222    which sector it's in.  Consequently we must be very careful to
    223    invalidate this cache when TC entries are changed or disappear.
    224 
    225    A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
    226    pointed at to cause that cache entry to miss.  This relies on the
    227    assumption that no guest code actually has that address, hence a
    228    value 0x1 seems good.  m_translate gives the client a synthetic
    229    segfault if it tries to execute at this address.
    230 */
    231 /*
    232 typedef
    233    struct {
    234       Addr guest;
    235       Addr host;
    236    }
    237    FastCacheEntry;
    238 */
    239 /*global*/ __attribute__((aligned(16)))
    240            FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
    241 /*
    242 #define TRANSTAB_BOGUS_GUEST_ADDR ((Addr)1)
    243 */
    244 
    245 /* For profiling, we have a parallel array of pointers to .count
    246    fields in TT entries.  Again, these pointers must be invalidated
    247    when translations disappear.  A NULL pointer suffices to indicate
    248    an unused slot.
    249 
    250    When not profiling (the normal case, VG_(clo_profile_flags) == 0),
    251    all tt_fastN entries are set to NULL at startup and never read nor
    252    written after that.
    253 
    254    When profiling (VG_(clo_profile_flags) > 0), tt_fast and tt_fastN
    255    change together: if tt_fast[i].guest is TRANSTAB_BOGUS_GUEST_ADDR
    256    then the corresponding tt_fastN[i] must be null.  If
    257    tt_fast[i].guest is any other value, then tt_fastN[i] *must* point
    258    to the .count field of the corresponding TT entry.
    259 
    260    tt_fast and tt_fastN are referred to from assembly code
    261    (dispatch.S).
    262 */
    263 /*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE];
    264 
    265 
    266 /* Make sure we're not used before initialisation. */
    267 static Bool init_done = False;
    268 
    269 
    270 /*------------------ STATS DECLS ------------------*/
    271 
    272 /* Number of fast-cache updates and flushes done. */
    273 ULong n_fast_flushes = 0;
    274 ULong n_fast_updates = 0;
    275 
    276 /* Number of full lookups done. */
    277 ULong n_full_lookups = 0;
    278 ULong n_lookup_probes = 0;
    279 
    280 /* Number/osize/tsize of translations entered; also the number of
    281    those for which self-checking was requested. */
    282 ULong n_in_count    = 0;
    283 ULong n_in_osize    = 0;
    284 ULong n_in_tsize    = 0;
    285 ULong n_in_sc_count = 0;
    286 
    287 /* Number/osize of translations discarded due to lack of space. */
    288 ULong n_dump_count = 0;
    289 ULong n_dump_osize = 0;
    290 
    291 /* Number/osize of translations discarded due to requests to do so. */
    292 ULong n_disc_count = 0;
    293 ULong n_disc_osize = 0;
    294 
    295 
    296 /*-------------------------------------------------------------*/
    297 /*--- Address-range equivalence class stuff                 ---*/
    298 /*-------------------------------------------------------------*/
    299 
    300 /* Return equivalence class number for a range. */
    301 
    302 static Int range_to_eclass ( Addr64 start, UInt len )
    303 {
    304    UInt mask   = (1 << ECLASS_WIDTH) - 1;
    305    UInt lo     = (UInt)start;
    306    UInt hi     = lo + len - 1;
    307    UInt loBits = (lo >> ECLASS_SHIFT) & mask;
    308    UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
    309    if (loBits == hiBits) {
    310       vg_assert(loBits < ECLASS_N-1);
    311       return loBits;
    312    } else {
    313       return ECLASS_MISC;
    314    }
    315 }
    316 
    317 
    318 /* Calculates the equivalence class numbers for any VexGuestExtent.
    319    These are written in *eclasses, which must be big enough to hold 3
    320    Ints.  The number written, between 1 and 3, is returned.  The
    321    eclasses are presented in order, and any duplicates are removed.
    322 */
    323 
    324 static
    325 Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
    326                                   VexGuestExtents* vge )
    327 {
    328 #  define SWAP(_lv1,_lv2) \
    329       do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
    330 
    331    Int i, j, n_ec, r;
    332 
    333    vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
    334 
    335    n_ec = 0;
    336    for (i = 0; i < vge->n_used; i++) {
    337       r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
    338       if (r == ECLASS_MISC)
    339          goto bad;
    340       /* only add if we haven't already seen it */
    341       for (j = 0; j < n_ec; j++)
    342          if (eclasses[j] == r)
    343             break;
    344       if (j == n_ec)
    345          eclasses[n_ec++] = r;
    346    }
    347 
    348    if (n_ec == 1)
    349       return 1;
    350 
    351    if (n_ec == 2) {
    352       /* sort */
    353       if (eclasses[0] > eclasses[1])
    354          SWAP(eclasses[0], eclasses[1]);
    355       return 2;
    356    }
    357 
    358    if (n_ec == 3) {
    359       /* sort */
    360       if (eclasses[0] > eclasses[2])
    361          SWAP(eclasses[0], eclasses[2]);
    362       if (eclasses[0] > eclasses[1])
    363          SWAP(eclasses[0], eclasses[1]);
    364       if (eclasses[1] > eclasses[2])
    365          SWAP(eclasses[1], eclasses[2]);
    366       return 3;
    367    }
    368 
    369    /* NOTREACHED */
    370    vg_assert(0);
    371 
    372   bad:
    373    eclasses[0] = ECLASS_MISC;
    374    return 1;
    375 
    376 #  undef SWAP
    377 }
    378 
    379 
    380 /* Add tteno to the set of entries listed for equivalence class ec in
    381    this sector.  Returns used location in eclass array. */
    382 
    383 static
    384 UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
    385 {
    386    Int    old_sz, new_sz, i, r;
    387    UShort *old_ar, *new_ar;
    388 
    389    vg_assert(ec >= 0 && ec < ECLASS_N);
    390    vg_assert(tteno < N_TTES_PER_SECTOR);
    391 
    392    if (0) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
    393 
    394    if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
    395 
    396       vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
    397 
    398       old_sz = sec->ec2tte_size[ec];
    399       old_ar = sec->ec2tte[ec];
    400       new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
    401       new_ar = VG_(arena_malloc)(VG_AR_TTAUX, "transtab.aECN.1",
    402                                  new_sz * sizeof(UShort));
    403       for (i = 0; i < old_sz; i++)
    404          new_ar[i] = old_ar[i];
    405       if (old_ar)
    406          VG_(arena_free)(VG_AR_TTAUX, old_ar);
    407       sec->ec2tte_size[ec] = new_sz;
    408       sec->ec2tte[ec] = new_ar;
    409 
    410       if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
    411    }
    412 
    413    /* Common case */
    414    r = sec->ec2tte_used[ec]++;
    415    vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
    416    sec->ec2tte[ec][r] = tteno;
    417    return (UInt)r;
    418 }
    419 
    420 
    421 /* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
    422    eclass entries to 'sec'. */
    423 
    424 static
    425 void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
    426 {
    427    Int i, r, eclasses[3];
    428    TTEntry* tte;
    429    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
    430 
    431    tte = &sec->tt[tteno];
    432    r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
    433 
    434    vg_assert(r >= 1 && r <= 3);
    435    tte->n_tte2ec = r;
    436 
    437    for (i = 0; i < r; i++) {
    438       tte->tte2ec_ec[i] = eclasses[i];
    439       tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
    440    }
    441 }
    442 
    443 
    444 /* Check the eclass info in 'sec' to ensure it is consistent.  Returns
    445    True if OK, False if something's not right.  Expensive. */
    446 
    447 static Bool sanity_check_eclasses_in_sector ( Sector* sec )
    448 {
    449 #  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
    450 
    451    HChar*   whassup = NULL;
    452    Int      i, j, k, n, ec_num, ec_idx;
    453    TTEntry* tte;
    454    UShort   tteno;
    455    ULong*   tce;
    456 
    457    /* Basic checks on this sector */
    458    if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
    459       BAD("invalid sec->tt_n_inuse");
    460    tce = sec->tc_next;
    461    if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
    462       BAD("sec->tc_next points outside tc");
    463 
    464    /* For each eclass ... */
    465    for (i = 0; i < ECLASS_N; i++) {
    466       if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
    467          BAD("ec2tte_size/ec2tte mismatch(1)");
    468       if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
    469          BAD("ec2tte_size/ec2tte mismatch(2)");
    470       if (sec->ec2tte_used[i] < 0
    471           || sec->ec2tte_used[i] > sec->ec2tte_size[i])
    472          BAD("implausible ec2tte_used");
    473       if (sec->ec2tte_used[i] == 0)
    474          continue;
    475 
    476       /* For each tt reference in each eclass .. ensure the reference
    477          is to a valid tt entry, and that the entry's address ranges
    478          really include this eclass. */
    479 
    480       for (j = 0; j < sec->ec2tte_used[i]; j++) {
    481          tteno = sec->ec2tte[i][j];
    482          if (tteno == EC2TTE_DELETED)
    483             continue;
    484          if (tteno >= N_TTES_PER_SECTOR)
    485             BAD("implausible tteno");
    486          tte = &sec->tt[tteno];
    487          if (tte->status != InUse)
    488             BAD("tteno points to non-inuse tte");
    489          if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
    490             BAD("tte->n_tte2ec out of range");
    491          /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
    492             must equal i.  Inspect tte's eclass info. */
    493          n = 0;
    494          for (k = 0; k < tte->n_tte2ec; k++) {
    495             if (k < tte->n_tte2ec-1
    496                 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
    497                BAD("tte->tte2ec_ec[..] out of order");
    498             ec_num = tte->tte2ec_ec[k];
    499             if (ec_num < 0 || ec_num >= ECLASS_N)
    500                BAD("tte->tte2ec_ec[..] out of range");
    501             if (ec_num != i)
    502                continue;
    503             ec_idx = tte->tte2ec_ix[k];
    504             if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
    505                BAD("tte->tte2ec_ix[..] out of range");
    506             if (ec_idx == j)
    507                n++;
    508          }
    509          if (n != 1)
    510             BAD("tteno does not point back at eclass");
    511       }
    512    }
    513 
    514    /* That establishes that for each forward pointer from TTEntrys
    515       there is a corresponding backward pointer from the eclass[]
    516       arrays.  However, it doesn't rule out the possibility of other,
    517       bogus pointers in the eclass[] arrays.  So do those similarly:
    518       scan through them and check the TTEntryies they point at point
    519       back. */
    520 
    521    for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
    522 
    523       tte = &sec->tt[i];
    524       if (tte->status == Empty || tte->status == Deleted) {
    525          if (tte->n_tte2ec != 0)
    526             BAD("tte->n_eclasses nonzero for unused tte");
    527          continue;
    528       }
    529 
    530       vg_assert(tte->status == InUse);
    531 
    532       if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
    533          BAD("tte->n_eclasses out of range(2)");
    534 
    535       for (j = 0; j < tte->n_tte2ec; j++) {
    536          ec_num = tte->tte2ec_ec[j];
    537          if (ec_num < 0 || ec_num >= ECLASS_N)
    538             BAD("tte->eclass[..] out of range");
    539          ec_idx = tte->tte2ec_ix[j];
    540          if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
    541             BAD("tte->ec_idx[..] out of range(2)");
    542          if (sec->ec2tte[ec_num][ec_idx] != i)
    543             BAD("ec2tte does not point back to tte");
    544       }
    545    }
    546 
    547    return True;
    548 
    549   bad:
    550    if (whassup)
    551       VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
    552 
    553 #  if 0
    554    VG_(printf)("eclass = %d\n", i);
    555    VG_(printf)("tteno = %d\n", (Int)tteno);
    556    switch (tte->status) {
    557       case InUse:   VG_(printf)("InUse\n"); break;
    558       case Deleted: VG_(printf)("Deleted\n"); break;
    559       case Empty:   VG_(printf)("Empty\n"); break;
    560    }
    561    if (tte->status != Empty) {
    562       for (k = 0; k < tte->vge.n_used; k++)
    563          VG_(printf)("0x%llx %d\n", tte->vge.base[k],
    564                                     (Int)tte->vge.len[k]);
    565    }
    566 #  endif
    567 
    568    return False;
    569 
    570 #  undef BAD
    571 }
    572 
    573 
    574 /* Sanity check absolutely everything.  True == check passed. */
    575 
    576 /* forwards */
    577 static Bool sanity_check_redir_tt_tc ( void );
    578 static Bool sanity_check_fastcache ( void );
    579 
    580 static Bool sanity_check_sector_search_order ( void )
    581 {
    582    Int i, j, nListed;
    583    /* assert the array is the right size */
    584    vg_assert(N_SECTORS == (sizeof(sector_search_order)
    585                            / sizeof(sector_search_order[0])));
    586    /* Check it's of the form  valid_sector_numbers ++ [-1, -1, ..] */
    587    for (i = 0; i < N_SECTORS; i++) {
    588       if (sector_search_order[i] < 0 || sector_search_order[i] >= N_SECTORS)
    589          break;
    590    }
    591    nListed = i;
    592    for (/* */; i < N_SECTORS; i++) {
    593       if (sector_search_order[i] != -1)
    594          break;
    595    }
    596    if (i != N_SECTORS)
    597       return False;
    598    /* Check each sector number only appears once */
    599    for (i = 0; i < N_SECTORS; i++) {
    600       if (sector_search_order[i] == -1)
    601          continue;
    602       for (j = i+1; j < N_SECTORS; j++) {
    603          if (sector_search_order[j] == sector_search_order[i])
    604             return False;
    605       }
    606    }
    607    /* Check that the number of listed sectors equals the number
    608       in use, by counting nListed back down. */
    609    for (i = 0; i < N_SECTORS; i++) {
    610       if (sectors[i].tc != NULL)
    611          nListed--;
    612    }
    613    if (nListed != 0)
    614       return False;
    615    return True;
    616 }
    617 
    618 static Bool sanity_check_all_sectors ( void )
    619 {
    620    Int     sno;
    621    Bool    sane;
    622    Sector* sec;
    623    for (sno = 0; sno < N_SECTORS; sno++) {
    624       sec = &sectors[sno];
    625       if (sec->tc == NULL)
    626          continue;
    627       sane = sanity_check_eclasses_in_sector( sec );
    628       if (!sane)
    629          return False;
    630    }
    631    if ( !sanity_check_redir_tt_tc() )
    632       return False;
    633    if ( !sanity_check_fastcache() )
    634       return False;
    635    if ( !sanity_check_sector_search_order() )
    636       return False;
    637    return True;
    638 }
    639 
    640 
    641 
    642 /*-------------------------------------------------------------*/
    643 /*--- Add/find translations                                 ---*/
    644 /*-------------------------------------------------------------*/
    645 
    646 static UInt vge_osize ( VexGuestExtents* vge )
    647 {
    648    UInt i, n = 0;
    649    for (i = 0; i < vge->n_used; i++)
    650       n += (UInt)vge->len[i];
    651    return n;
    652 }
    653 
    654 static Bool isValidSector ( Int sector )
    655 {
    656    if (sector < 0 || sector >= N_SECTORS)
    657       return False;
    658    return True;
    659 }
    660 
    661 static inline UInt HASH_TT ( Addr64 key )
    662 {
    663    UInt kHi = (UInt)(key >> 32);
    664    UInt kLo = (UInt)key;
    665    UInt k32 = kHi ^ kLo;
    666    UInt ror = 7;
    667    if (ror > 0)
    668       k32 = (k32 >> ror) | (k32 << (32-ror));
    669    return k32 % N_TTES_PER_SECTOR;
    670 }
    671 
    672 static void setFastCacheEntry ( Addr64 key, ULong* tcptr, UInt* count )
    673 {
    674    UInt cno = (UInt)VG_TT_FAST_HASH(key);
    675    VG_(tt_fast)[cno].guest = (Addr)key;
    676    VG_(tt_fast)[cno].host  = (Addr)tcptr;
    677    if (VG_(clo_profile_flags) > 0)
    678       VG_(tt_fastN)[cno] = count;
    679    n_fast_updates++;
    680    /* This shouldn't fail.  It should be assured by m_translate
    681       which should reject any attempt to make translation of code
    682       starting at TRANSTAB_BOGUS_GUEST_ADDR. */
    683    vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
    684 }
    685 
    686 /* Invalidate the fast cache's counter array, VG_(tt_fastN). */
    687 static void invalidateFastNCache ( void )
    688 {
    689    UInt j;
    690    vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
    691    for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
    692       VG_(tt_fastN)[j+0] = NULL;
    693       VG_(tt_fastN)[j+1] = NULL;
    694       VG_(tt_fastN)[j+2] = NULL;
    695       VG_(tt_fastN)[j+3] = NULL;
    696    }
    697    vg_assert(j == VG_TT_FAST_SIZE);
    698 }
    699 
    700 /* Invalidate the fast cache VG_(tt_fast).  If profiling, also
    701    invalidate the fast cache's counter array VG_(tt_fastN), otherwise
    702    don't touch it. */
    703 static void invalidateFastCache ( void )
    704 {
    705    UInt j;
    706    /* This loop is popular enough to make it worth unrolling a
    707       bit, at least on ppc32. */
    708    vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
    709    for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
    710       VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
    711       VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
    712       VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
    713       VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
    714    }
    715 
    716    if (VG_(clo_profile_flags) > 0)
    717       invalidateFastNCache();
    718 
    719    vg_assert(j == VG_TT_FAST_SIZE);
    720    n_fast_flushes++;
    721 }
    722 
    723 static Bool sanity_check_fastcache ( void )
    724 {
    725    UInt j;
    726    if (0) VG_(printf)("sanity check fastcache\n");
    727    if (VG_(clo_profile_flags) > 0) {
    728       /* profiling */
    729       for (j = 0; j < VG_TT_FAST_SIZE; j++) {
    730          if (VG_(tt_fastN)[j] == NULL
    731              && VG_(tt_fast)[j].guest != TRANSTAB_BOGUS_GUEST_ADDR)
    732             return False;
    733          if (VG_(tt_fastN)[j] != NULL
    734              && VG_(tt_fast)[j].guest == TRANSTAB_BOGUS_GUEST_ADDR)
    735             return False;
    736       }
    737    } else {
    738       /* not profiling */
    739       for (j = 0; j < VG_TT_FAST_SIZE; j++) {
    740          if (VG_(tt_fastN)[j] != NULL)
    741             return False;
    742       }
    743    }
    744    return True;
    745 }
    746 
    747 static void initialiseSector ( Int sno )
    748 {
    749    Int    i;
    750    SysRes sres;
    751    Sector* sec;
    752    vg_assert(isValidSector(sno));
    753 
    754    { Bool sane = sanity_check_sector_search_order();
    755      vg_assert(sane);
    756    }
    757    sec = &sectors[sno];
    758 
    759    if (sec->tc == NULL) {
    760 
    761       /* Sector has never been used before.  Need to allocate tt and
    762          tc. */
    763       vg_assert(sec->tt == NULL);
    764       vg_assert(sec->tc_next == NULL);
    765       vg_assert(sec->tt_n_inuse == 0);
    766       for (i = 0; i < ECLASS_N; i++) {
    767          vg_assert(sec->ec2tte_size[i] == 0);
    768          vg_assert(sec->ec2tte_used[i] == 0);
    769          vg_assert(sec->ec2tte[i] == NULL);
    770       }
    771 
    772       VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
    773 
    774       sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
    775       if (sr_isError(sres)) {
    776          VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
    777                                      8 * tc_sector_szQ );
    778 	 /*NOTREACHED*/
    779       }
    780       sec->tc = (ULong*)(AddrH)sr_Res(sres);
    781 
    782       sres = VG_(am_mmap_anon_float_valgrind)
    783                 ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
    784       if (sr_isError(sres)) {
    785          VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
    786                                      N_TTES_PER_SECTOR * sizeof(TTEntry) );
    787 	 /*NOTREACHED*/
    788       }
    789       sec->tt = (TTEntry*)(AddrH)sr_Res(sres);
    790 
    791       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
    792          sec->tt[i].status   = Empty;
    793          sec->tt[i].n_tte2ec = 0;
    794       }
    795 
    796       /* Add an entry in the sector_search_order */
    797       for (i = 0; i < N_SECTORS; i++) {
    798          if (sector_search_order[i] == -1)
    799             break;
    800       }
    801       vg_assert(i >= 0 && i < N_SECTORS);
    802       sector_search_order[i] = sno;
    803 
    804       if (VG_(clo_verbosity) > 2)
    805          VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
    806 
    807    } else {
    808 
    809       /* Sector has been used before.  Dump the old contents. */
    810       VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
    811       vg_assert(sec->tt != NULL);
    812       vg_assert(sec->tc_next != NULL);
    813       n_dump_count += sec->tt_n_inuse;
    814 
    815       /* Visit each just-about-to-be-abandoned translation. */
    816       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
    817          if (sec->tt[i].status == InUse) {
    818             vg_assert(sec->tt[i].n_tte2ec >= 1);
    819             vg_assert(sec->tt[i].n_tte2ec <= 3);
    820             n_dump_osize += vge_osize(&sec->tt[i].vge);
    821             /* Tell the tool too. */
    822             if (VG_(needs).superblock_discards) {
    823                VG_TDICT_CALL( tool_discard_superblock_info,
    824                               sec->tt[i].entry,
    825                               sec->tt[i].vge );
    826             }
    827          } else {
    828             vg_assert(sec->tt[i].n_tte2ec == 0);
    829          }
    830          sec->tt[i].status   = Empty;
    831          sec->tt[i].n_tte2ec = 0;
    832       }
    833 
    834       /* Free up the eclass structures. */
    835       for (i = 0; i < ECLASS_N; i++) {
    836          if (sec->ec2tte_size[i] == 0) {
    837             vg_assert(sec->ec2tte_used[i] == 0);
    838             vg_assert(sec->ec2tte[i] == NULL);
    839          } else {
    840             vg_assert(sec->ec2tte[i] != NULL);
    841             VG_(arena_free)(VG_AR_TTAUX, sec->ec2tte[i]);
    842             sec->ec2tte[i] = NULL;
    843             sec->ec2tte_size[i] = 0;
    844             sec->ec2tte_used[i] = 0;
    845          }
    846       }
    847 
    848       /* Sanity check: ensure it is already in
    849          sector_search_order[]. */
    850       for (i = 0; i < N_SECTORS; i++) {
    851          if (sector_search_order[i] == sno)
    852             break;
    853       }
    854       vg_assert(i >= 0 && i < N_SECTORS);
    855 
    856       if (VG_(clo_verbosity) > 2)
    857          VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
    858    }
    859 
    860    sec->tc_next = sec->tc;
    861    sec->tt_n_inuse = 0;
    862 
    863    invalidateFastCache();
    864 
    865    { Bool sane = sanity_check_sector_search_order();
    866      vg_assert(sane);
    867    }
    868 }
    869 
    870 static void invalidate_icache ( void *ptr, Int nbytes )
    871 {
    872 #  if defined(VGA_ppc32) || defined(VGA_ppc64)
    873    Addr startaddr = (Addr) ptr;
    874    Addr endaddr   = startaddr + nbytes;
    875    Addr cls;
    876    Addr addr;
    877    VexArchInfo vai;
    878 
    879    if (nbytes == 0) return;
    880    vg_assert(nbytes > 0);
    881 
    882    VG_(machine_get_VexArchInfo)( NULL, &vai );
    883    cls = vai.ppc_cache_line_szB;
    884 
    885    /* Stay sane .. */
    886    vg_assert(cls == 32 || cls == 64 || cls == 128);
    887 
    888    startaddr &= ~(cls - 1);
    889    for (addr = startaddr; addr < endaddr; addr += cls) {
    890       __asm__ __volatile__("dcbst 0,%0" : : "r" (addr));
    891    }
    892    __asm__ __volatile__("sync");
    893    for (addr = startaddr; addr < endaddr; addr += cls) {
    894       __asm__ __volatile__("icbi 0,%0" : : "r" (addr));
    895    }
    896    __asm__ __volatile__("sync; isync");
    897 
    898 #  elif defined(VGA_x86)
    899    /* no need to do anything, hardware provides coherence */
    900 
    901 #  elif defined(VGA_amd64)
    902    /* no need to do anything, hardware provides coherence */
    903 
    904 #  elif defined(VGA_s390x)
    905    /* no need to do anything, hardware provides coherence */
    906 
    907 #  elif defined(VGP_arm_linux)
    908    /* ARM cache flushes are privileged, so we must defer to the kernel. */
    909    Addr startaddr = (Addr) ptr;
    910    Addr endaddr   = startaddr + nbytes;
    911    VG_(do_syscall2)(__NR_ARM_cacheflush, startaddr, endaddr);
    912 
    913 #  else
    914 #    error "Unknown ARCH"
    915 #  endif
    916 }
    917 
    918 
    919 /* Add a translation of vge to TT/TC.  The translation is temporarily
    920    in code[0 .. code_len-1].
    921 
    922    pre: youngest_sector points to a valid (although possibly full)
    923    sector.
    924 */
    925 void VG_(add_to_transtab)( VexGuestExtents* vge,
    926                            Addr64           entry,
    927                            AddrH            code,
    928                            UInt             code_len,
    929                            Bool             is_self_checking )
    930 {
    931    Int    tcAvailQ, reqdQ, y, i;
    932    ULong  *tcptr, *tcptr2;
    933    UChar* srcP;
    934    UChar* dstP;
    935 
    936    vg_assert(init_done);
    937    vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
    938 
    939    /* 60000: should agree with N_TMPBUF in m_translate.c. */
    940    vg_assert(code_len > 0 && code_len < 60000);
    941 
    942    if (0)
    943       VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n",
    944                   entry, code_len);
    945 
    946    n_in_count++;
    947    n_in_tsize += code_len;
    948    n_in_osize += vge_osize(vge);
    949    if (is_self_checking)
    950       n_in_sc_count++;
    951 
    952    y = youngest_sector;
    953    vg_assert(isValidSector(y));
    954 
    955    if (sectors[y].tc == NULL)
    956       initialiseSector(y);
    957 
    958    /* Try putting the translation in this sector. */
    959    reqdQ = (code_len + 7) >> 3;
    960 
    961    /* Will it fit in tc? */
    962    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
    963               - ((ULong*)(sectors[y].tc_next));
    964    vg_assert(tcAvailQ >= 0);
    965    vg_assert(tcAvailQ <= tc_sector_szQ);
    966 
    967    if (tcAvailQ < reqdQ
    968        || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
    969       /* No.  So move on to the next sector.  Either it's never been
    970          used before, in which case it will get its tt/tc allocated
    971          now, or it has been used before, in which case it is set to be
    972          empty, hence throwing out the oldest sector. */
    973       vg_assert(tc_sector_szQ > 0);
    974       VG_(debugLog)(1,"transtab",
    975                       "declare sector %d full "
    976                       "(TT loading %2d%%, TC loading %2d%%)\n",
    977                       y,
    978                       (100 * sectors[y].tt_n_inuse)
    979                          / N_TTES_PER_SECTOR,
    980                       (100 * (tc_sector_szQ - tcAvailQ))
    981                          / tc_sector_szQ);
    982       youngest_sector++;
    983       if (youngest_sector >= N_SECTORS)
    984          youngest_sector = 0;
    985       y = youngest_sector;
    986       initialiseSector(y);
    987    }
    988 
    989    /* Be sure ... */
    990    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
    991               - ((ULong*)(sectors[y].tc_next));
    992    vg_assert(tcAvailQ >= 0);
    993    vg_assert(tcAvailQ <= tc_sector_szQ);
    994    vg_assert(tcAvailQ >= reqdQ);
    995    vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
    996    vg_assert(sectors[y].tt_n_inuse >= 0);
    997 
    998    /* Copy into tc. */
    999    tcptr = sectors[y].tc_next;
   1000    vg_assert(tcptr >= &sectors[y].tc[0]);
   1001    vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
   1002 
   1003    dstP = (UChar*)tcptr;
   1004    srcP = (UChar*)code;
   1005    for (i = 0; i < code_len; i++)
   1006       dstP[i] = srcP[i];
   1007    sectors[y].tc_next += reqdQ;
   1008    sectors[y].tt_n_inuse++;
   1009 
   1010    invalidate_icache( dstP, code_len );
   1011 
   1012    /* more paranoia */
   1013    tcptr2 = sectors[y].tc_next;
   1014    vg_assert(tcptr2 >= &sectors[y].tc[0]);
   1015    vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
   1016 
   1017    /* Find an empty tt slot, and use it.  There must be such a slot
   1018       since tt is never allowed to get completely full. */
   1019    i = HASH_TT(entry);
   1020    vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
   1021    while (True) {
   1022       if (sectors[y].tt[i].status == Empty
   1023           || sectors[y].tt[i].status == Deleted)
   1024          break;
   1025       i++;
   1026       if (i >= N_TTES_PER_SECTOR)
   1027          i = 0;
   1028    }
   1029 
   1030    sectors[y].tt[i].status = InUse;
   1031    sectors[y].tt[i].tcptr  = tcptr;
   1032    sectors[y].tt[i].count  = 0;
   1033    sectors[y].tt[i].weight = 1;
   1034    sectors[y].tt[i].vge    = *vge;
   1035    sectors[y].tt[i].entry  = entry;
   1036 
   1037    /* Update the fast-cache. */
   1038    setFastCacheEntry( entry, tcptr, &sectors[y].tt[i].count );
   1039 
   1040    /* Note the eclass numbers for this translation. */
   1041    upd_eclasses_after_add( &sectors[y], i );
   1042 }
   1043 
   1044 
   1045 /* Search for the translation of the given guest address.  If
   1046    requested, a successful search can also cause the fast-caches to be
   1047    updated.
   1048 */
   1049 Bool VG_(search_transtab) ( /*OUT*/AddrH* result,
   1050                             Addr64        guest_addr,
   1051                             Bool          upd_cache )
   1052 {
   1053    Int i, j, k, kstart, sno;
   1054 
   1055    vg_assert(init_done);
   1056    /* Find the initial probe point just once.  It will be the same in
   1057       all sectors and avoids multiple expensive % operations. */
   1058    n_full_lookups++;
   1059    k      = -1;
   1060    kstart = HASH_TT(guest_addr);
   1061    vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
   1062 
   1063    /* Search in all the sectors,using sector_search_order[] as a
   1064       heuristic guide as to what order to visit the sectors. */
   1065    for (i = 0; i < N_SECTORS; i++) {
   1066 
   1067       sno = sector_search_order[i];
   1068       if (UNLIKELY(sno == -1))
   1069          return False; /* run out of sectors to search */
   1070 
   1071       k = kstart;
   1072       for (j = 0; j < N_TTES_PER_SECTOR; j++) {
   1073          n_lookup_probes++;
   1074          if (sectors[sno].tt[k].status == InUse
   1075              && sectors[sno].tt[k].entry == guest_addr) {
   1076             /* found it */
   1077             if (upd_cache)
   1078                setFastCacheEntry(
   1079                   guest_addr, sectors[sno].tt[k].tcptr,
   1080                               &sectors[sno].tt[k].count );
   1081             if (result)
   1082                *result = (AddrH)sectors[sno].tt[k].tcptr;
   1083             /* pull this one one step closer to the front.  For large
   1084                apps this more or less halves the number of required
   1085                probes. */
   1086             if (i > 0) {
   1087                Int tmp = sector_search_order[i-1];
   1088                sector_search_order[i-1] = sector_search_order[i];
   1089                sector_search_order[i] = tmp;
   1090             }
   1091             return True;
   1092          }
   1093          if (sectors[sno].tt[k].status == Empty)
   1094             break; /* not found in this sector */
   1095          k++;
   1096          if (k == N_TTES_PER_SECTOR)
   1097             k = 0;
   1098       }
   1099 
   1100       /* If we fall off the end, all entries are InUse and not
   1101          matching, or Deleted.  In any case we did not find it in this
   1102          sector. */
   1103    }
   1104 
   1105    /* Not found in any sector. */
   1106    return False;
   1107 }
   1108 
   1109 
   1110 /*-------------------------------------------------------------*/
   1111 /*--- Delete translations.                                  ---*/
   1112 /*-------------------------------------------------------------*/
   1113 
   1114 /* forward */
   1115 static void unredir_discard_translations( Addr64, ULong );
   1116 
   1117 /* Stuff for deleting translations which intersect with a given
   1118    address range.  Unfortunately, to make this run at a reasonable
   1119    speed, it is complex. */
   1120 
   1121 static inline
   1122 Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
   1123 {
   1124    Addr64 e1 = s1 + r1 - 1ULL;
   1125    Addr64 e2 = s2 + r2 - 1ULL;
   1126    if (e1 < s2 || e2 < s1)
   1127       return False;
   1128    return True;
   1129 }
   1130 
   1131 static inline
   1132 Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
   1133 {
   1134    if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
   1135       return True;
   1136    if (vge->n_used < 2)
   1137       return False;
   1138    if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
   1139       return True;
   1140    if (vge->n_used < 3)
   1141       return False;
   1142    if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
   1143       return True;
   1144    return False;
   1145 }
   1146 
   1147 
   1148 /* Delete a tt entry, and update all the eclass data accordingly. */
   1149 
   1150 static void delete_tte ( /*MOD*/Sector* sec, Int tteno )
   1151 {
   1152    Int      i, ec_num, ec_idx;
   1153    TTEntry* tte;
   1154 
   1155    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
   1156    tte = &sec->tt[tteno];
   1157    vg_assert(tte->status == InUse);
   1158    vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
   1159 
   1160    /* Deal with the ec-to-tte links first. */
   1161    for (i = 0; i < tte->n_tte2ec; i++) {
   1162       ec_num = (Int)tte->tte2ec_ec[i];
   1163       ec_idx = tte->tte2ec_ix[i];
   1164       vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
   1165       vg_assert(ec_idx >= 0);
   1166       vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
   1167       /* Assert that the two links point at each other. */
   1168       vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
   1169       /* "delete" the pointer back to here. */
   1170       sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
   1171    }
   1172 
   1173    /* Now fix up this TTEntry. */
   1174    tte->status   = Deleted;
   1175    tte->n_tte2ec = 0;
   1176 
   1177    /* Stats .. */
   1178    sec->tt_n_inuse--;
   1179    n_disc_count++;
   1180    n_disc_osize += vge_osize(&tte->vge);
   1181 
   1182    /* Tell the tool too. */
   1183    if (VG_(needs).superblock_discards) {
   1184       VG_TDICT_CALL( tool_discard_superblock_info,
   1185                      tte->entry,
   1186                      tte->vge );
   1187    }
   1188 }
   1189 
   1190 
   1191 /* Delete translations from sec which intersect specified range, but
   1192    only consider translations in the specified eclass. */
   1193 
   1194 static
   1195 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec,
   1196                                             Addr64 guest_start, ULong range,
   1197                                             Int ec )
   1198 {
   1199    Int      i;
   1200    UShort   tteno;
   1201    Bool     anyDeld = False;
   1202    TTEntry* tte;
   1203 
   1204    vg_assert(ec >= 0 && ec < ECLASS_N);
   1205 
   1206    for (i = 0; i < sec->ec2tte_used[ec]; i++) {
   1207 
   1208       tteno = sec->ec2tte[ec][i];
   1209       if (tteno == EC2TTE_DELETED) {
   1210          /* already deleted */
   1211          continue;
   1212       }
   1213 
   1214       vg_assert(tteno < N_TTES_PER_SECTOR);
   1215 
   1216       tte = &sec->tt[tteno];
   1217       vg_assert(tte->status == InUse);
   1218 
   1219       if (overlaps( guest_start, range, &tte->vge )) {
   1220          anyDeld = True;
   1221          delete_tte( sec, (Int)tteno );
   1222       }
   1223 
   1224    }
   1225 
   1226    return anyDeld;
   1227 }
   1228 
   1229 
   1230 /* Delete translations from sec which intersect specified range, the
   1231    slow way, by inspecting all translations in sec. */
   1232 
   1233 static
   1234 Bool delete_translations_in_sector ( /*MOD*/Sector* sec,
   1235                                      Addr64 guest_start, ULong range )
   1236 {
   1237    Int  i;
   1238    Bool anyDeld = False;
   1239 
   1240    for (i = 0; i < N_TTES_PER_SECTOR; i++) {
   1241       if (sec->tt[i].status == InUse
   1242           && overlaps( guest_start, range, &sec->tt[i].vge )) {
   1243          anyDeld = True;
   1244          delete_tte( sec, i );
   1245       }
   1246    }
   1247 
   1248    return anyDeld;
   1249 }
   1250 
   1251 
   1252 void VG_(discard_translations) ( Addr64 guest_start, ULong range,
   1253                                  HChar* who )
   1254 {
   1255    Sector* sec;
   1256    Int     sno, ec;
   1257    Bool    anyDeleted = False;
   1258 
   1259    vg_assert(init_done);
   1260 
   1261    VG_(debugLog)(2, "transtab",
   1262                     "discard_translations(0x%llx, %lld) req by %s\n",
   1263                     guest_start, range, who );
   1264 
   1265    /* Pre-deletion sanity check */
   1266    if (VG_(clo_sanity_level >= 4)) {
   1267       Bool sane = sanity_check_all_sectors();
   1268       vg_assert(sane);
   1269    }
   1270 
   1271    if (range == 0)
   1272       return;
   1273 
   1274    /* There are two different ways to do this.
   1275 
   1276       If the range fits within a single address-range equivalence
   1277       class, as will be the case for a cache line sized invalidation,
   1278       then we only have to inspect the set of translations listed in
   1279       that equivalence class, and also in the "sin-bin" equivalence
   1280       class ECLASS_MISC.
   1281 
   1282       Otherwise, the invalidation is of a larger range and probably
   1283       results from munmap.  In this case it's (probably!) faster just
   1284       to inspect all translations, dump those we don't want, and
   1285       regenerate the equivalence class information (since modifying it
   1286       in-situ is even more expensive).
   1287    */
   1288 
   1289    /* First off, figure out if the range falls within a single class,
   1290       and if so which one. */
   1291 
   1292    ec = ECLASS_MISC;
   1293    if (range < (1ULL << ECLASS_SHIFT))
   1294       ec = range_to_eclass( guest_start, (UInt)range );
   1295 
   1296    /* if ec is ECLASS_MISC then we aren't looking at just a single
   1297       class, so use the slow scheme.  Else use the fast scheme,
   1298       examining 'ec' and ECLASS_MISC. */
   1299 
   1300    if (ec != ECLASS_MISC) {
   1301 
   1302       VG_(debugLog)(2, "transtab",
   1303                        "                    FAST, ec = %d\n", ec);
   1304 
   1305       /* Fast scheme */
   1306       vg_assert(ec >= 0 && ec < ECLASS_MISC);
   1307 
   1308       for (sno = 0; sno < N_SECTORS; sno++) {
   1309          sec = &sectors[sno];
   1310          if (sec->tc == NULL)
   1311             continue;
   1312          anyDeleted |= delete_translations_in_sector_eclass(
   1313                          sec, guest_start, range, ec );
   1314          anyDeleted |= delete_translations_in_sector_eclass(
   1315                          sec, guest_start, range, ECLASS_MISC );
   1316       }
   1317 
   1318    } else {
   1319 
   1320       /* slow scheme */
   1321 
   1322       VG_(debugLog)(2, "transtab",
   1323                        "                    SLOW, ec = %d\n", ec);
   1324 
   1325       for (sno = 0; sno < N_SECTORS; sno++) {
   1326          sec = &sectors[sno];
   1327          if (sec->tc == NULL)
   1328             continue;
   1329          anyDeleted |= delete_translations_in_sector(
   1330                          sec, guest_start, range );
   1331       }
   1332 
   1333    }
   1334 
   1335    if (anyDeleted)
   1336       invalidateFastCache();
   1337 
   1338    /* don't forget the no-redir cache */
   1339    unredir_discard_translations( guest_start, range );
   1340 
   1341    /* Post-deletion sanity check */
   1342    if (VG_(clo_sanity_level >= 4)) {
   1343       Int      i;
   1344       TTEntry* tte;
   1345       Bool     sane = sanity_check_all_sectors();
   1346       vg_assert(sane);
   1347       /* But now, also check the requested address range isn't
   1348          present anywhere. */
   1349       for (sno = 0; sno < N_SECTORS; sno++) {
   1350          sec = &sectors[sno];
   1351          if (sec->tc == NULL)
   1352             continue;
   1353          for (i = 0; i < N_TTES_PER_SECTOR; i++) {
   1354             tte = &sec->tt[i];
   1355             if (tte->status != InUse)
   1356                continue;
   1357             vg_assert(!overlaps( guest_start, range, &tte->vge ));
   1358          }
   1359       }
   1360    }
   1361 }
   1362 
   1363 
   1364 /*------------------------------------------------------------*/
   1365 /*--- AUXILIARY: the unredirected TT/TC                    ---*/
   1366 /*------------------------------------------------------------*/
   1367 
   1368 /* A very simple translation cache which holds a small number of
   1369    unredirected translations.  This is completely independent of the
   1370    main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
   1371    both structures are simply dumped and we start over.
   1372 
   1373    Since these translations are unredirected, the search key is (by
   1374    definition) the first address entry in the .vge field. */
   1375 
   1376 /* Sized to hold 500 translations of average size 1000 bytes. */
   1377 
   1378 #define UNREDIR_SZB   1000
   1379 
   1380 #define N_UNREDIR_TT  500
   1381 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
   1382 
   1383 typedef
   1384    struct {
   1385       VexGuestExtents vge;
   1386       Addr            hcode;
   1387       Bool            inUse;
   1388    }
   1389    UTCEntry;
   1390 
   1391 /* We just allocate forwards in _tc, never deleting. */
   1392 static ULong    *unredir_tc;
   1393 static Int      unredir_tc_used = N_UNREDIR_TCQ;
   1394 
   1395 /* Slots in _tt can come into use and out again (.inUse).
   1396    Nevertheless _tt_highwater is maintained so that invalidations
   1397    don't have to scan all the slots when only a few are in use.
   1398    _tt_highwater holds the index of the highest ever allocated
   1399    slot. */
   1400 static UTCEntry unredir_tt[N_UNREDIR_TT];
   1401 static Int      unredir_tt_highwater;
   1402 
   1403 
   1404 static void init_unredir_tt_tc ( void )
   1405 {
   1406    Int i;
   1407    if (unredir_tc == NULL) {
   1408       SysRes sres = VG_(am_mmap_anon_float_valgrind)
   1409                        ( N_UNREDIR_TT * UNREDIR_SZB );
   1410       if (sr_isError(sres)) {
   1411          VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
   1412                                      N_UNREDIR_TT * UNREDIR_SZB);
   1413          /*NOTREACHED*/
   1414       }
   1415       unredir_tc = (ULong *)(AddrH)sr_Res(sres);
   1416    }
   1417    unredir_tc_used = 0;
   1418    for (i = 0; i < N_UNREDIR_TT; i++)
   1419       unredir_tt[i].inUse = False;
   1420    unredir_tt_highwater = -1;
   1421 }
   1422 
   1423 /* Do a sanity check; return False on failure. */
   1424 static Bool sanity_check_redir_tt_tc ( void )
   1425 {
   1426    Int i;
   1427    if (unredir_tt_highwater < -1) return False;
   1428    if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
   1429 
   1430    for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
   1431       if (unredir_tt[i].inUse)
   1432          return False;
   1433 
   1434    if (unredir_tc_used < 0) return False;
   1435    if (unredir_tc_used > N_UNREDIR_TCQ) return False;
   1436 
   1437    return True;
   1438 }
   1439 
   1440 
   1441 /* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
   1442    is temporarily in code[0 .. code_len-1].
   1443 */
   1444 void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
   1445                                    Addr64           entry,
   1446                                    AddrH            code,
   1447                                    UInt             code_len )
   1448 {
   1449    Int   i, j, code_szQ;
   1450    HChar *srcP, *dstP;
   1451 
   1452    vg_assert(sanity_check_redir_tt_tc());
   1453 
   1454    /* This is the whole point: it's not redirected! */
   1455    vg_assert(entry == vge->base[0]);
   1456 
   1457    /* How many unredir_tt slots are needed */
   1458    code_szQ = (code_len + 7) / 8;
   1459 
   1460    /* Look for an empty unredir_tc slot */
   1461    for (i = 0; i < N_UNREDIR_TT; i++)
   1462       if (!unredir_tt[i].inUse)
   1463          break;
   1464 
   1465    if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
   1466       /* It's full; dump everything we currently have */
   1467       init_unredir_tt_tc();
   1468       i = 0;
   1469    }
   1470 
   1471    vg_assert(unredir_tc_used >= 0);
   1472    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
   1473    vg_assert(code_szQ > 0);
   1474    vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
   1475    vg_assert(i >= 0 && i < N_UNREDIR_TT);
   1476    vg_assert(unredir_tt[i].inUse == False);
   1477 
   1478    if (i > unredir_tt_highwater)
   1479       unredir_tt_highwater = i;
   1480 
   1481    dstP = (HChar*)&unredir_tc[unredir_tc_used];
   1482    srcP = (HChar*)code;
   1483    for (j = 0; j < code_len; j++)
   1484       dstP[j] = srcP[j];
   1485 
   1486    invalidate_icache( dstP, code_len );
   1487 
   1488    unredir_tt[i].inUse = True;
   1489    unredir_tt[i].vge   = *vge;
   1490    unredir_tt[i].hcode = (Addr)dstP;
   1491 
   1492    unredir_tc_used += code_szQ;
   1493    vg_assert(unredir_tc_used >= 0);
   1494    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
   1495 
   1496    vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
   1497 }
   1498 
   1499 Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
   1500                                     Addr64        guest_addr )
   1501 {
   1502    Int i;
   1503    for (i = 0; i < N_UNREDIR_TT; i++) {
   1504       if (!unredir_tt[i].inUse)
   1505          continue;
   1506       if (unredir_tt[i].vge.base[0] == guest_addr) {
   1507          *result = (AddrH)unredir_tt[i].hcode;
   1508          return True;
   1509       }
   1510    }
   1511    return False;
   1512 }
   1513 
   1514 static void unredir_discard_translations( Addr64 guest_start, ULong range )
   1515 {
   1516    Int i;
   1517 
   1518    vg_assert(sanity_check_redir_tt_tc());
   1519 
   1520    for (i = 0; i <= unredir_tt_highwater; i++) {
   1521       if (unredir_tt[i].inUse
   1522           && overlaps( guest_start, range, &unredir_tt[i].vge))
   1523          unredir_tt[i].inUse = False;
   1524    }
   1525 }
   1526 
   1527 
   1528 /*------------------------------------------------------------*/
   1529 /*--- Initialisation.                                      ---*/
   1530 /*------------------------------------------------------------*/
   1531 
   1532 void VG_(init_tt_tc) ( void )
   1533 {
   1534    Int i, j, avg_codeszQ;
   1535 
   1536    vg_assert(!init_done);
   1537    init_done = True;
   1538 
   1539    /* Otherwise lots of things go wrong... */
   1540    vg_assert(sizeof(ULong) == 8);
   1541    vg_assert(sizeof(Addr64) == 8);
   1542    /* check fast cache entries really are 2 words long */
   1543    vg_assert(sizeof(Addr) == sizeof(void*));
   1544    vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
   1545    /* check fast cache entries are packed back-to-back with no spaces */
   1546    vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
   1547    /* check fast cache is aligned as we requested.  Not fatal if it
   1548       isn't, but we might as well make sure. */
   1549    vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
   1550 
   1551    if (VG_(clo_verbosity) > 2)
   1552       VG_(message)(Vg_DebugMsg,
   1553                    "TT/TC: VG_(init_tt_tc) "
   1554                    "(startup of code management)\n");
   1555 
   1556    /* Figure out how big each tc area should be.  */
   1557    avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
   1558    tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
   1559 
   1560    /* Ensure the calculated value is not way crazy. */
   1561    vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
   1562    vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR_USABLE);
   1563 
   1564    /* Initialise the sectors */
   1565    youngest_sector = 0;
   1566    for (i = 0; i < N_SECTORS; i++) {
   1567       sectors[i].tc = NULL;
   1568       sectors[i].tt = NULL;
   1569       sectors[i].tc_next = NULL;
   1570       sectors[i].tt_n_inuse = 0;
   1571       for (j = 0; j < ECLASS_N; j++) {
   1572          sectors[i].ec2tte_size[j] = 0;
   1573          sectors[i].ec2tte_used[j] = 0;
   1574          sectors[i].ec2tte[j] = NULL;
   1575       }
   1576    }
   1577 
   1578    /* Initialise the sector_search_order hint table. */
   1579    for (i = 0; i < N_SECTORS; i++)
   1580       sector_search_order[i] = -1;
   1581 
   1582    /* Initialise the fast caches.  If not profiling (the usual case),
   1583       we have to explicitly invalidate the fastN cache as
   1584       invalidateFastCache() won't do that for us. */
   1585    invalidateFastCache();
   1586    if (VG_(clo_profile_flags) == 0)
   1587       invalidateFastNCache();
   1588 
   1589    /* and the unredir tt/tc */
   1590    init_unredir_tt_tc();
   1591 
   1592    if (VG_(clo_verbosity) > 2) {
   1593       VG_(message)(Vg_DebugMsg,
   1594          "TT/TC: cache: %d sectors of %d bytes each = %d total\n",
   1595           N_SECTORS, 8 * tc_sector_szQ,
   1596           N_SECTORS * 8 * tc_sector_szQ );
   1597       VG_(message)(Vg_DebugMsg,
   1598          "TT/TC: table: %d total entries, max occupancy %d (%d%%)\n",
   1599          N_SECTORS * N_TTES_PER_SECTOR,
   1600          N_SECTORS * N_TTES_PER_SECTOR_USABLE,
   1601          SECTOR_TT_LIMIT_PERCENT );
   1602    }
   1603 
   1604    VG_(debugLog)(2, "transtab",
   1605       "cache: %d sectors of %d bytes each = %d total\n",
   1606        N_SECTORS, 8 * tc_sector_szQ,
   1607        N_SECTORS * 8 * tc_sector_szQ );
   1608    VG_(debugLog)(2, "transtab",
   1609       "table: %d total entries, max occupancy %d (%d%%)\n",
   1610       N_SECTORS * N_TTES_PER_SECTOR,
   1611       N_SECTORS * N_TTES_PER_SECTOR_USABLE,
   1612       SECTOR_TT_LIMIT_PERCENT );
   1613 }
   1614 
   1615 
   1616 /*------------------------------------------------------------*/
   1617 /*--- Printing out statistics.                             ---*/
   1618 /*------------------------------------------------------------*/
   1619 
   1620 static ULong safe_idiv( ULong a, ULong b )
   1621 {
   1622    return (b == 0 ? 0 : a / b);
   1623 }
   1624 
   1625 UInt VG_(get_bbs_translated) ( void )
   1626 {
   1627    return n_in_count;
   1628 }
   1629 
   1630 void VG_(print_tt_tc_stats) ( void )
   1631 {
   1632    VG_(message)(Vg_DebugMsg,
   1633       "    tt/tc: %'llu tt lookups requiring %'llu probes\n",
   1634       n_full_lookups, n_lookup_probes );
   1635    VG_(message)(Vg_DebugMsg,
   1636       "    tt/tc: %'llu fast-cache updates, %'llu flushes\n",
   1637       n_fast_updates, n_fast_flushes );
   1638 
   1639    VG_(message)(Vg_DebugMsg,
   1640                 " transtab: new        %'lld "
   1641                 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n",
   1642                 n_in_count, n_in_osize, n_in_tsize,
   1643                 safe_idiv(10*n_in_tsize, n_in_osize),
   1644                 n_in_sc_count);
   1645    VG_(message)(Vg_DebugMsg,
   1646                 " transtab: dumped     %'llu (%'llu -> ?" "?)\n",
   1647                 n_dump_count, n_dump_osize );
   1648    VG_(message)(Vg_DebugMsg,
   1649                 " transtab: discarded  %'llu (%'llu -> ?" "?)\n",
   1650                 n_disc_count, n_disc_osize );
   1651 
   1652    if (0) {
   1653       Int i;
   1654       VG_(printf)("\n");
   1655       for (i = 0; i < ECLASS_N; i++) {
   1656          VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
   1657          if (i % 16 == 15)
   1658             VG_(printf)("\n");
   1659       }
   1660       VG_(printf)("\n\n");
   1661    }
   1662 }
   1663 
   1664 /*------------------------------------------------------------*/
   1665 /*--- Printing out of profiling results.                   ---*/
   1666 /*------------------------------------------------------------*/
   1667 
   1668 static ULong score ( TTEntry* tte )
   1669 {
   1670    return ((ULong)tte->weight) * ((ULong)tte->count);
   1671 }
   1672 
   1673 ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops )
   1674 {
   1675    Int   sno, i, r, s;
   1676    ULong score_total;
   1677 
   1678    /* First, compute the total weighted count, and find the top N
   1679       ttes.  tops contains pointers to the most-used n_tops blocks, in
   1680       descending order (viz, tops[0] is the highest scorer). */
   1681    for (i = 0; i < n_tops; i++) {
   1682       tops[i].addr  = 0;
   1683       tops[i].score = 0;
   1684    }
   1685 
   1686    score_total = 0;
   1687 
   1688    for (sno = 0; sno < N_SECTORS; sno++) {
   1689       if (sectors[sno].tc == NULL)
   1690          continue;
   1691       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
   1692          if (sectors[sno].tt[i].status != InUse)
   1693             continue;
   1694          score_total += score(&sectors[sno].tt[i]);
   1695          /* Find the rank for sectors[sno].tt[i]. */
   1696          r = n_tops-1;
   1697          while (True) {
   1698             if (r == -1)
   1699                break;
   1700              if (tops[r].addr == 0) {
   1701                r--;
   1702                continue;
   1703              }
   1704              if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
   1705                 r--;
   1706                 continue;
   1707              }
   1708              break;
   1709          }
   1710          r++;
   1711          vg_assert(r >= 0 && r <= n_tops);
   1712          /* This bb should be placed at r, and bbs above it shifted
   1713             upwards one slot. */
   1714          if (r < n_tops) {
   1715             for (s = n_tops-1; s > r; s--)
   1716                tops[s] = tops[s-1];
   1717             tops[r].addr  = sectors[sno].tt[i].entry;
   1718             tops[r].score = score( &sectors[sno].tt[i] );
   1719          }
   1720       }
   1721    }
   1722 
   1723    return score_total;
   1724 }
   1725 
   1726 /*--------------------------------------------------------------------*/
   1727 /*--- end                                                          ---*/
   1728 /*--------------------------------------------------------------------*/
   1729