Home | History | Annotate | Download | only in coregrind
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Management of the translation table and cache.               ---*/
      4 /*---                                                 m_transtab.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2000-2010 Julian Seward
     12       jseward (at) acm.org
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #include "pub_core_basics.h"
     33 #include "pub_core_debuglog.h"
     34 #include "pub_core_machine.h"    // For VG(machine_get_VexArchInfo)
     35 #include "pub_core_libcbase.h"
     36 #include "pub_core_libcassert.h"
     37 #include "pub_core_libcprint.h"
     38 #include "pub_core_options.h"
     39 #include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
     40 #include "pub_core_transtab.h"
     41 #include "pub_core_aspacemgr.h"
     42 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
     43 
     44 // JRS FIXME get rid of this somehow
     45 #if defined(VGP_arm_linux)
     46 # include "pub_core_vkiscnums.h" // __ARM_NR_cacheflush
     47 # include "pub_core_syscall.h"   // VG_(do_syscallN)
     48 #endif
     49 
     50 
     51 /* #define DEBUG_TRANSTAB */
     52 
     53 
     54 /*-------------------------------------------------------------*/
     55 /*--- Management of the FIFO-based translation table+cache. ---*/
     56 /*-------------------------------------------------------------*/
     57 
     58 /*------------------ CONSTANTS ------------------*/
     59 
     60 /* Number of sectors the TC is divided into.  If you need a larger
     61    overall translation cache, increase this value. */
     62 #define N_SECTORS 8
     63 
     64 /* Number of TC entries in each sector.  This needs to be a prime
     65    number to work properly, it must be <= 65535 (so that a TT index
     66    fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
     67    'deleted') and it is strongly recommended not to change this.
     68    65521 is the largest prime <= 65535. */
     69 #define N_TTES_PER_SECTOR /*30011*/ /*40009*/ 65521
     70 
     71 /* Because each sector contains a hash table of TTEntries, we need to
     72    specify the maximum allowable loading, after which the sector is
     73    deemed full. */
     74 #define SECTOR_TT_LIMIT_PERCENT 65
     75 
     76 /* The sector is deemed full when this many entries are in it. */
     77 #define N_TTES_PER_SECTOR_USABLE \
     78            ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
     79 
     80 /* Equivalence classes for fast address range deletion.  There are 1 +
     81    2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
     82    address range which does not fall cleanly within any specific bin.
     83    Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
     84 #define ECLASS_SHIFT 11
     85 #define ECLASS_WIDTH 8
     86 #define ECLASS_MISC  (1 << ECLASS_WIDTH)
     87 #define ECLASS_N     (1 + ECLASS_MISC)
     88 
     89 #define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
     90 
     91 
     92 /*------------------ TYPES ------------------*/
     93 
     94 /* A translation-table entry.  This indicates precisely which areas of
     95    guest code are included in the translation, and contains all other
     96    auxiliary info too.  */
     97 typedef
     98    struct {
     99       /* Profiling only: the count and weight (arbitrary meaning) for
    100          this translation.  Weight is a property of the translation
    101          itself and computed once when the translation is created.
    102          Count is an entry count for the translation and is
    103          incremented by 1 every time the translation is used, if we
    104          are profiling. */
    105       UInt     count;
    106       UShort   weight;
    107 
    108       /* Status of the slot.  Note, we need to be able to do lazy
    109          deletion, hence the Deleted state. */
    110       enum { InUse, Deleted, Empty } status;
    111 
    112       /* 64-bit aligned pointer to one or more 64-bit words containing
    113          the corresponding host code (must be in the same sector!)
    114          This is a pointer into the sector's tc (code) area. */
    115       ULong* tcptr;
    116 
    117       /* This is the original guest address that purportedly is the
    118          entry point of the translation.  You might think that .entry
    119          should be the same as .vge->base[0], and most of the time it
    120          is.  However, when doing redirections, that is not the case.
    121          .vge must always correctly describe the guest code sections
    122          from which this translation was made.  However, .entry may or
    123          may not be a lie, depending on whether or not we're doing
    124          redirection. */
    125       Addr64 entry;
    126 
    127       /* This structure describes precisely what ranges of guest code
    128          the translation covers, so we can decide whether or not to
    129          delete it when translations of a given address range are
    130          invalidated. */
    131       VexGuestExtents vge;
    132 
    133       /* Address range summary info: these are pointers back to
    134          eclass[] entries in the containing Sector.  Those entries in
    135          turn point back here -- the two structures are mutually
    136          redundant but both necessary to make fast deletions work.
    137          The eclass info is similar to, and derived from, this entry's
    138          'vge' field, but it is not the same */
    139       UShort n_tte2ec;      // # tte2ec pointers (1 to 3)
    140       UShort tte2ec_ec[3];  // for each, the eclass #
    141       UInt   tte2ec_ix[3];  // and the index within the eclass.
    142       // for i in 0 .. n_tte2ec-1
    143       //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
    144       // should be the index
    145       // of this TTEntry in the containing Sector's tt array.
    146    }
    147    TTEntry;
    148 
    149 
    150 /* Finally, a sector itself.  Each sector contains an array of
    151    TCEntries, which hold code, and an array of TTEntries, containing
    152    all required administrative info.  Profiling is supported using the
    153    TTEntry .count and .weight fields, if required.  Each sector is
    154    independent in that no cross-sector references are allowed.
    155 
    156    If the sector is not in use, all three pointers are NULL and
    157    tt_n_inuse is zero.
    158 */
    159 typedef
    160    struct {
    161       /* The TCEntry area.  Size of this depends on the average
    162          translation size.  We try and size it so it becomes full
    163          precisely when this sector's translation table (tt) reaches
    164          its load limit (SECTOR_TT_LIMIT_PERCENT). */
    165       ULong* tc;
    166 
    167       /* The TTEntry array.  This is a fixed size, always containing
    168          exactly N_TTES_PER_SECTOR entries. */
    169       TTEntry* tt;
    170 
    171       /* This points to the current allocation point in tc. */
    172       ULong* tc_next;
    173 
    174       /* The count of tt entries with state InUse. */
    175       Int tt_n_inuse;
    176 
    177       /* Expandable arrays of tt indices for each of the ECLASS_N
    178          address range equivalence classes.  These hold indices into
    179          the containing sector's tt array, which in turn should point
    180          back here. */
    181       Int     ec2tte_size[ECLASS_N];
    182       Int     ec2tte_used[ECLASS_N];
    183       UShort* ec2tte[ECLASS_N];
    184    }
    185    Sector;
    186 
    187 
    188 /*------------------ DECLS ------------------*/
    189 
    190 /* The root data structure is an array of sectors.  The index of the
    191    youngest sector is recorded, and new translations are put into that
    192    sector.  When it fills up, we move along to the next sector and
    193    start to fill that up, wrapping around at the end of the array.
    194    That way, once all N_TC_SECTORS have been bought into use for the
    195    first time, and are full, we then re-use the oldest sector,
    196    endlessly.
    197 
    198    When running, youngest sector should be between >= 0 and <
    199    N_TC_SECTORS.  The initial -1 value indicates the TT/TC system is
    200    not yet initialised.
    201 */
    202 static Sector sectors[N_SECTORS];
    203 static Int    youngest_sector = -1;
    204 
    205 /* The number of ULongs in each TCEntry area.  This is computed once
    206    at startup and does not change. */
    207 static Int    tc_sector_szQ;
    208 
    209 
    210 /* A list of sector numbers, in the order which they should be
    211    searched to find translations.  This is an optimisation to be used
    212    when searching for translations and should not affect
    213    correctness.  -1 denotes "no entry". */
    214 static Int sector_search_order[N_SECTORS];
    215 
    216 
    217 /* Fast helper for the TC.  A direct-mapped cache which holds a set of
    218    recently used (guest address, host address) pairs.  This array is
    219    referred to directly from m_dispatch/dispatch-<platform>.S.
    220 
    221    Entries in tt_fast may refer to any valid TC entry, regardless of
    222    which sector it's in.  Consequently we must be very careful to
    223    invalidate this cache when TC entries are changed or disappear.
    224 
    225    A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
    226    pointed at to cause that cache entry to miss.  This relies on the
    227    assumption that no guest code actually has that address, hence a
    228    value 0x1 seems good.  m_translate gives the client a synthetic
    229    segfault if it tries to execute at this address.
    230 */
    231 /*
    232 typedef
    233    struct {
    234       Addr guest;
    235       Addr host;
    236    }
    237    FastCacheEntry;
    238 */
    239 /*global*/ __attribute__((aligned(16)))
    240            FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
    241 /*
    242 #define TRANSTAB_BOGUS_GUEST_ADDR ((Addr)1)
    243 */
    244 
    245 /* For profiling, we have a parallel array of pointers to .count
    246    fields in TT entries.  Again, these pointers must be invalidated
    247    when translations disappear.  A NULL pointer suffices to indicate
    248    an unused slot.
    249 
    250    When not profiling (the normal case, VG_(clo_profile_flags) == 0),
    251    all tt_fastN entries are set to NULL at startup and never read nor
    252    written after that.
    253 
    254    When profiling (VG_(clo_profile_flags) > 0), tt_fast and tt_fastN
    255    change together: if tt_fast[i].guest is TRANSTAB_BOGUS_GUEST_ADDR
    256    then the corresponding tt_fastN[i] must be null.  If
    257    tt_fast[i].guest is any other value, then tt_fastN[i] *must* point
    258    to the .count field of the corresponding TT entry.
    259 
    260    tt_fast and tt_fastN are referred to from assembly code
    261    (dispatch.S).
    262 */
    263 /*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE];
    264 
    265 
    266 /* Make sure we're not used before initialisation. */
    267 static Bool init_done = False;
    268 
    269 
    270 /*------------------ STATS DECLS ------------------*/
    271 
    272 /* Number of fast-cache updates and flushes done. */
    273 ULong n_fast_flushes = 0;
    274 ULong n_fast_updates = 0;
    275 
    276 /* Number of full lookups done. */
    277 ULong n_full_lookups = 0;
    278 ULong n_lookup_probes = 0;
    279 
    280 /* Number/osize/tsize of translations entered; also the number of
    281    those for which self-checking was requested. */
    282 ULong n_in_count    = 0;
    283 ULong n_in_osize    = 0;
    284 ULong n_in_tsize    = 0;
    285 ULong n_in_sc_count = 0;
    286 
    287 /* Number/osize of translations discarded due to lack of space. */
    288 ULong n_dump_count = 0;
    289 ULong n_dump_osize = 0;
    290 
    291 /* Number/osize of translations discarded due to requests to do so. */
    292 ULong n_disc_count = 0;
    293 ULong n_disc_osize = 0;
    294 
    295 
    296 /*-------------------------------------------------------------*/
    297 /*--- Address-range equivalence class stuff                 ---*/
    298 /*-------------------------------------------------------------*/
    299 
    300 /* Return equivalence class number for a range. */
    301 
    302 static Int range_to_eclass ( Addr64 start, UInt len )
    303 {
    304    UInt mask   = (1 << ECLASS_WIDTH) - 1;
    305    UInt lo     = (UInt)start;
    306    UInt hi     = lo + len - 1;
    307    UInt loBits = (lo >> ECLASS_SHIFT) & mask;
    308    UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
    309    if (loBits == hiBits) {
    310       vg_assert(loBits < ECLASS_N-1);
    311       return loBits;
    312    } else {
    313       return ECLASS_MISC;
    314    }
    315 }
    316 
    317 
    318 /* Calculates the equivalence class numbers for any VexGuestExtent.
    319    These are written in *eclasses, which must be big enough to hold 3
    320    Ints.  The number written, between 1 and 3, is returned.  The
    321    eclasses are presented in order, and any duplicates are removed.
    322 */
    323 
    324 static
    325 Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
    326                                   VexGuestExtents* vge )
    327 {
    328 #  define SWAP(_lv1,_lv2) \
    329       do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
    330 
    331    Int i, j, n_ec, r;
    332 
    333    vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
    334 
    335    n_ec = 0;
    336    for (i = 0; i < vge->n_used; i++) {
    337       r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
    338       if (r == ECLASS_MISC)
    339          goto bad;
    340       /* only add if we haven't already seen it */
    341       for (j = 0; j < n_ec; j++)
    342          if (eclasses[j] == r)
    343             break;
    344       if (j == n_ec)
    345          eclasses[n_ec++] = r;
    346    }
    347 
    348    if (n_ec == 1)
    349       return 1;
    350 
    351    if (n_ec == 2) {
    352       /* sort */
    353       if (eclasses[0] > eclasses[1])
    354          SWAP(eclasses[0], eclasses[1]);
    355       return 2;
    356    }
    357 
    358    if (n_ec == 3) {
    359       /* sort */
    360       if (eclasses[0] > eclasses[2])
    361          SWAP(eclasses[0], eclasses[2]);
    362       if (eclasses[0] > eclasses[1])
    363          SWAP(eclasses[0], eclasses[1]);
    364       if (eclasses[1] > eclasses[2])
    365          SWAP(eclasses[1], eclasses[2]);
    366       return 3;
    367    }
    368 
    369    /* NOTREACHED */
    370    vg_assert(0);
    371 
    372   bad:
    373    eclasses[0] = ECLASS_MISC;
    374    return 1;
    375 
    376 #  undef SWAP
    377 }
    378 
    379 
    380 /* Add tteno to the set of entries listed for equivalence class ec in
    381    this sector.  Returns used location in eclass array. */
    382 
    383 static
    384 UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
    385 {
    386    Int    old_sz, new_sz, i, r;
    387    UShort *old_ar, *new_ar;
    388 
    389    vg_assert(ec >= 0 && ec < ECLASS_N);
    390    vg_assert(tteno < N_TTES_PER_SECTOR);
    391 
    392    if (0) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
    393 
    394    if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
    395 
    396       vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
    397 
    398       old_sz = sec->ec2tte_size[ec];
    399       old_ar = sec->ec2tte[ec];
    400       new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
    401       new_ar = VG_(arena_malloc)(VG_AR_TTAUX, "transtab.aECN.1",
    402                                  new_sz * sizeof(UShort));
    403       for (i = 0; i < old_sz; i++)
    404          new_ar[i] = old_ar[i];
    405       if (old_ar)
    406          VG_(arena_free)(VG_AR_TTAUX, old_ar);
    407       sec->ec2tte_size[ec] = new_sz;
    408       sec->ec2tte[ec] = new_ar;
    409 
    410       if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
    411    }
    412 
    413    /* Common case */
    414    r = sec->ec2tte_used[ec]++;
    415    vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
    416    sec->ec2tte[ec][r] = tteno;
    417    return (UInt)r;
    418 }
    419 
    420 
    421 /* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
    422    eclass entries to 'sec'. */
    423 
    424 static
    425 void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
    426 {
    427    Int i, r, eclasses[3];
    428    TTEntry* tte;
    429    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
    430 
    431    tte = &sec->tt[tteno];
    432    r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
    433 
    434    vg_assert(r >= 1 && r <= 3);
    435    tte->n_tte2ec = r;
    436 
    437    for (i = 0; i < r; i++) {
    438       tte->tte2ec_ec[i] = eclasses[i];
    439       tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
    440    }
    441 }
    442 
    443 
    444 /* Check the eclass info in 'sec' to ensure it is consistent.  Returns
    445    True if OK, False if something's not right.  Expensive. */
    446 
    447 static Bool sanity_check_eclasses_in_sector ( Sector* sec )
    448 {
    449 #  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
    450 
    451    HChar*   whassup = NULL;
    452    Int      i, j, k, n, ec_num, ec_idx;
    453    TTEntry* tte;
    454    UShort   tteno;
    455    ULong*   tce;
    456 
    457    /* Basic checks on this sector */
    458    if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
    459       BAD("invalid sec->tt_n_inuse");
    460    tce = sec->tc_next;
    461    if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
    462       BAD("sec->tc_next points outside tc");
    463 
    464    /* For each eclass ... */
    465    for (i = 0; i < ECLASS_N; i++) {
    466       if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
    467          BAD("ec2tte_size/ec2tte mismatch(1)");
    468       if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
    469          BAD("ec2tte_size/ec2tte mismatch(2)");
    470       if (sec->ec2tte_used[i] < 0
    471           || sec->ec2tte_used[i] > sec->ec2tte_size[i])
    472          BAD("implausible ec2tte_used");
    473       if (sec->ec2tte_used[i] == 0)
    474          continue;
    475 
    476       /* For each tt reference in each eclass .. ensure the reference
    477          is to a valid tt entry, and that the entry's address ranges
    478          really include this eclass. */
    479 
    480       for (j = 0; j < sec->ec2tte_used[i]; j++) {
    481          tteno = sec->ec2tte[i][j];
    482          if (tteno == EC2TTE_DELETED)
    483             continue;
    484          if (tteno >= N_TTES_PER_SECTOR)
    485             BAD("implausible tteno");
    486          tte = &sec->tt[tteno];
    487          if (tte->status != InUse)
    488             BAD("tteno points to non-inuse tte");
    489          if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
    490             BAD("tte->n_tte2ec out of range");
    491          /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
    492             must equal i.  Inspect tte's eclass info. */
    493          n = 0;
    494          for (k = 0; k < tte->n_tte2ec; k++) {
    495             if (k < tte->n_tte2ec-1
    496                 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
    497                BAD("tte->tte2ec_ec[..] out of order");
    498             ec_num = tte->tte2ec_ec[k];
    499             if (ec_num < 0 || ec_num >= ECLASS_N)
    500                BAD("tte->tte2ec_ec[..] out of range");
    501             if (ec_num != i)
    502                continue;
    503             ec_idx = tte->tte2ec_ix[k];
    504             if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
    505                BAD("tte->tte2ec_ix[..] out of range");
    506             if (ec_idx == j)
    507                n++;
    508          }
    509          if (n != 1)
    510             BAD("tteno does not point back at eclass");
    511       }
    512    }
    513 
    514    /* That establishes that for each forward pointer from TTEntrys
    515       there is a corresponding backward pointer from the eclass[]
    516       arrays.  However, it doesn't rule out the possibility of other,
    517       bogus pointers in the eclass[] arrays.  So do those similarly:
    518       scan through them and check the TTEntryies they point at point
    519       back. */
    520 
    521    for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
    522 
    523       tte = &sec->tt[i];
    524       if (tte->status == Empty || tte->status == Deleted) {
    525          if (tte->n_tte2ec != 0)
    526             BAD("tte->n_eclasses nonzero for unused tte");
    527          continue;
    528       }
    529 
    530       vg_assert(tte->status == InUse);
    531 
    532       if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
    533          BAD("tte->n_eclasses out of range(2)");
    534 
    535       for (j = 0; j < tte->n_tte2ec; j++) {
    536          ec_num = tte->tte2ec_ec[j];
    537          if (ec_num < 0 || ec_num >= ECLASS_N)
    538             BAD("tte->eclass[..] out of range");
    539          ec_idx = tte->tte2ec_ix[j];
    540          if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
    541             BAD("tte->ec_idx[..] out of range(2)");
    542          if (sec->ec2tte[ec_num][ec_idx] != i)
    543             BAD("ec2tte does not point back to tte");
    544       }
    545    }
    546 
    547    return True;
    548 
    549   bad:
    550    if (whassup)
    551       VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
    552 
    553 #  if 0
    554    VG_(printf)("eclass = %d\n", i);
    555    VG_(printf)("tteno = %d\n", (Int)tteno);
    556    switch (tte->status) {
    557       case InUse:   VG_(printf)("InUse\n"); break;
    558       case Deleted: VG_(printf)("Deleted\n"); break;
    559       case Empty:   VG_(printf)("Empty\n"); break;
    560    }
    561    if (tte->status != Empty) {
    562       for (k = 0; k < tte->vge.n_used; k++)
    563          VG_(printf)("0x%llx %d\n", tte->vge.base[k],
    564                                     (Int)tte->vge.len[k]);
    565    }
    566 #  endif
    567 
    568    return False;
    569 
    570 #  undef BAD
    571 }
    572 
    573 
    574 /* Sanity check absolutely everything.  True == check passed. */
    575 
    576 /* forwards */
    577 static Bool sanity_check_redir_tt_tc ( void );
    578 static Bool sanity_check_fastcache ( void );
    579 
    580 static Bool sanity_check_sector_search_order ( void )
    581 {
    582    Int i, j, nListed;
    583    /* assert the array is the right size */
    584    vg_assert(N_SECTORS == (sizeof(sector_search_order)
    585                            / sizeof(sector_search_order[0])));
    586    /* Check it's of the form  valid_sector_numbers ++ [-1, -1, ..] */
    587    for (i = 0; i < N_SECTORS; i++) {
    588       if (sector_search_order[i] < 0 || sector_search_order[i] >= N_SECTORS)
    589          break;
    590    }
    591    nListed = i;
    592    for (/* */; i < N_SECTORS; i++) {
    593       if (sector_search_order[i] != -1)
    594          break;
    595    }
    596    if (i != N_SECTORS)
    597       return False;
    598    /* Check each sector number only appears once */
    599    for (i = 0; i < N_SECTORS; i++) {
    600       if (sector_search_order[i] == -1)
    601          continue;
    602       for (j = i+1; j < N_SECTORS; j++) {
    603          if (sector_search_order[j] == sector_search_order[i])
    604             return False;
    605       }
    606    }
    607    /* Check that the number of listed sectors equals the number
    608       in use, by counting nListed back down. */
    609    for (i = 0; i < N_SECTORS; i++) {
    610       if (sectors[i].tc != NULL)
    611          nListed--;
    612    }
    613    if (nListed != 0)
    614       return False;
    615    return True;
    616 }
    617 
    618 static Bool sanity_check_all_sectors ( void )
    619 {
    620    Int     sno;
    621    Bool    sane;
    622    Sector* sec;
    623    for (sno = 0; sno < N_SECTORS; sno++) {
    624       sec = &sectors[sno];
    625       if (sec->tc == NULL)
    626          continue;
    627       sane = sanity_check_eclasses_in_sector( sec );
    628       if (!sane)
    629          return False;
    630    }
    631    if ( !sanity_check_redir_tt_tc() )
    632       return False;
    633    if ( !sanity_check_fastcache() )
    634       return False;
    635    if ( !sanity_check_sector_search_order() )
    636       return False;
    637    return True;
    638 }
    639 
    640 
    641 
    642 /*-------------------------------------------------------------*/
    643 /*--- Add/find translations                                 ---*/
    644 /*-------------------------------------------------------------*/
    645 
    646 static UInt vge_osize ( VexGuestExtents* vge )
    647 {
    648    UInt i, n = 0;
    649    for (i = 0; i < vge->n_used; i++)
    650       n += (UInt)vge->len[i];
    651    return n;
    652 }
    653 
    654 static Bool isValidSector ( Int sector )
    655 {
    656    if (sector < 0 || sector >= N_SECTORS)
    657       return False;
    658    return True;
    659 }
    660 
    661 static inline UInt HASH_TT ( Addr64 key )
    662 {
    663    UInt kHi = (UInt)(key >> 32);
    664    UInt kLo = (UInt)key;
    665    UInt k32 = kHi ^ kLo;
    666    UInt ror = 7;
    667    if (ror > 0)
    668       k32 = (k32 >> ror) | (k32 << (32-ror));
    669    return k32 % N_TTES_PER_SECTOR;
    670 }
    671 
    672 static void setFastCacheEntry ( Addr64 key, ULong* tcptr, UInt* count )
    673 {
    674    UInt cno = (UInt)VG_TT_FAST_HASH(key);
    675    VG_(tt_fast)[cno].guest = (Addr)key;
    676    VG_(tt_fast)[cno].host  = (Addr)tcptr;
    677    if (VG_(clo_profile_flags) > 0)
    678       VG_(tt_fastN)[cno] = count;
    679    n_fast_updates++;
    680    /* This shouldn't fail.  It should be assured by m_translate
    681       which should reject any attempt to make translation of code
    682       starting at TRANSTAB_BOGUS_GUEST_ADDR. */
    683    vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
    684 }
    685 
    686 /* Invalidate the fast cache's counter array, VG_(tt_fastN). */
    687 static void invalidateFastNCache ( void )
    688 {
    689    UInt j;
    690    vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
    691    for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
    692       VG_(tt_fastN)[j+0] = NULL;
    693       VG_(tt_fastN)[j+1] = NULL;
    694       VG_(tt_fastN)[j+2] = NULL;
    695       VG_(tt_fastN)[j+3] = NULL;
    696    }
    697    vg_assert(j == VG_TT_FAST_SIZE);
    698 }
    699 
    700 /* Invalidate the fast cache VG_(tt_fast).  If profiling, also
    701    invalidate the fast cache's counter array VG_(tt_fastN), otherwise
    702    don't touch it. */
    703 static void invalidateFastCache ( void )
    704 {
    705    UInt j;
    706    /* This loop is popular enough to make it worth unrolling a
    707       bit, at least on ppc32. */
    708    vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
    709    for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
    710       VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
    711       VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
    712       VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
    713       VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
    714    }
    715 
    716    if (VG_(clo_profile_flags) > 0)
    717       invalidateFastNCache();
    718 
    719    vg_assert(j == VG_TT_FAST_SIZE);
    720    n_fast_flushes++;
    721 }
    722 
    723 static Bool sanity_check_fastcache ( void )
    724 {
    725    UInt j;
    726    if (0) VG_(printf)("sanity check fastcache\n");
    727    if (VG_(clo_profile_flags) > 0) {
    728       /* profiling */
    729       for (j = 0; j < VG_TT_FAST_SIZE; j++) {
    730          if (VG_(tt_fastN)[j] == NULL
    731              && VG_(tt_fast)[j].guest != TRANSTAB_BOGUS_GUEST_ADDR)
    732             return False;
    733          if (VG_(tt_fastN)[j] != NULL
    734              && VG_(tt_fast)[j].guest == TRANSTAB_BOGUS_GUEST_ADDR)
    735             return False;
    736       }
    737    } else {
    738       /* not profiling */
    739       for (j = 0; j < VG_TT_FAST_SIZE; j++) {
    740          if (VG_(tt_fastN)[j] != NULL)
    741             return False;
    742       }
    743    }
    744    return True;
    745 }
    746 
    747 static void initialiseSector ( Int sno )
    748 {
    749    Int    i;
    750    SysRes sres;
    751    Sector* sec;
    752    vg_assert(isValidSector(sno));
    753 
    754    { Bool sane = sanity_check_sector_search_order();
    755      vg_assert(sane);
    756    }
    757    sec = &sectors[sno];
    758 
    759    if (sec->tc == NULL) {
    760 
    761       /* Sector has never been used before.  Need to allocate tt and
    762          tc. */
    763       vg_assert(sec->tt == NULL);
    764       vg_assert(sec->tc_next == NULL);
    765       vg_assert(sec->tt_n_inuse == 0);
    766       for (i = 0; i < ECLASS_N; i++) {
    767          vg_assert(sec->ec2tte_size[i] == 0);
    768          vg_assert(sec->ec2tte_used[i] == 0);
    769          vg_assert(sec->ec2tte[i] == NULL);
    770       }
    771 
    772       VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
    773 
    774       sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
    775       if (sr_isError(sres)) {
    776          VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
    777                                      8 * tc_sector_szQ );
    778 	 /*NOTREACHED*/
    779       }
    780       sec->tc = (ULong*)(AddrH)sr_Res(sres);
    781 
    782       sres = VG_(am_mmap_anon_float_valgrind)
    783                 ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
    784       if (sr_isError(sres)) {
    785          VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
    786                                      N_TTES_PER_SECTOR * sizeof(TTEntry) );
    787 	 /*NOTREACHED*/
    788       }
    789       sec->tt = (TTEntry*)(AddrH)sr_Res(sres);
    790 
    791       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
    792          sec->tt[i].status   = Empty;
    793          sec->tt[i].n_tte2ec = 0;
    794       }
    795 
    796       /* Add an entry in the sector_search_order */
    797       for (i = 0; i < N_SECTORS; i++) {
    798          if (sector_search_order[i] == -1)
    799             break;
    800       }
    801       vg_assert(i >= 0 && i < N_SECTORS);
    802       sector_search_order[i] = sno;
    803 
    804       if (VG_(clo_verbosity) > 2)
    805          VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
    806 
    807    } else {
    808 
    809       /* Sector has been used before.  Dump the old contents. */
    810       VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
    811       vg_assert(sec->tt != NULL);
    812       vg_assert(sec->tc_next != NULL);
    813       n_dump_count += sec->tt_n_inuse;
    814 
    815       /* Visit each just-about-to-be-abandoned translation. */
    816       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
    817          if (sec->tt[i].status == InUse) {
    818             vg_assert(sec->tt[i].n_tte2ec >= 1);
    819             vg_assert(sec->tt[i].n_tte2ec <= 3);
    820             n_dump_osize += vge_osize(&sec->tt[i].vge);
    821             /* Tell the tool too. */
    822             if (VG_(needs).superblock_discards) {
    823                VG_TDICT_CALL( tool_discard_superblock_info,
    824                               sec->tt[i].entry,
    825                               sec->tt[i].vge );
    826             }
    827          } else {
    828             vg_assert(sec->tt[i].n_tte2ec == 0);
    829          }
    830          sec->tt[i].status   = Empty;
    831          sec->tt[i].n_tte2ec = 0;
    832       }
    833 
    834       /* Free up the eclass structures. */
    835       for (i = 0; i < ECLASS_N; i++) {
    836          if (sec->ec2tte_size[i] == 0) {
    837             vg_assert(sec->ec2tte_used[i] == 0);
    838             vg_assert(sec->ec2tte[i] == NULL);
    839          } else {
    840             vg_assert(sec->ec2tte[i] != NULL);
    841             VG_(arena_free)(VG_AR_TTAUX, sec->ec2tte[i]);
    842             sec->ec2tte[i] = NULL;
    843             sec->ec2tte_size[i] = 0;
    844             sec->ec2tte_used[i] = 0;
    845          }
    846       }
    847 
    848       /* Sanity check: ensure it is already in
    849          sector_search_order[]. */
    850       for (i = 0; i < N_SECTORS; i++) {
    851          if (sector_search_order[i] == sno)
    852             break;
    853       }
    854       vg_assert(i >= 0 && i < N_SECTORS);
    855 
    856       if (VG_(clo_verbosity) > 2)
    857          VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
    858    }
    859 
    860    sec->tc_next = sec->tc;
    861    sec->tt_n_inuse = 0;
    862 
    863    invalidateFastCache();
    864 
    865    { Bool sane = sanity_check_sector_search_order();
    866      vg_assert(sane);
    867    }
    868 }
    869 
    870 static void invalidate_icache ( void *ptr, Int nbytes )
    871 {
    872 #  if defined(VGA_ppc32) || defined(VGA_ppc64)
    873    Addr startaddr = (Addr) ptr;
    874    Addr endaddr   = startaddr + nbytes;
    875    Addr cls;
    876    Addr addr;
    877    VexArchInfo vai;
    878 
    879    if (nbytes == 0) return;
    880    vg_assert(nbytes > 0);
    881 
    882    VG_(machine_get_VexArchInfo)( NULL, &vai );
    883    cls = vai.ppc_cache_line_szB;
    884 
    885    /* Stay sane .. */
    886    vg_assert(cls == 32 || cls == 64 || cls == 128);
    887 
    888    startaddr &= ~(cls - 1);
    889    for (addr = startaddr; addr < endaddr; addr += cls) {
    890       __asm__ __volatile__("dcbst 0,%0" : : "r" (addr));
    891    }
    892    __asm__ __volatile__("sync");
    893    for (addr = startaddr; addr < endaddr; addr += cls) {
    894       __asm__ __volatile__("icbi 0,%0" : : "r" (addr));
    895    }
    896    __asm__ __volatile__("sync; isync");
    897 
    898 #  elif defined(VGA_x86)
    899    /* no need to do anything, hardware provides coherence */
    900 
    901 #  elif defined(VGA_amd64)
    902    /* no need to do anything, hardware provides coherence */
    903 
    904 #  elif defined(VGP_arm_linux)
    905    /* ARM cache flushes are privileged, so we must defer to the kernel. */
    906    Addr startaddr = (Addr) ptr;
    907    Addr endaddr   = startaddr + nbytes;
    908    VG_(do_syscall2)(__NR_ARM_cacheflush, startaddr, endaddr);
    909 
    910 #  else
    911 #    error "Unknown ARCH"
    912 #  endif
    913 }
    914 
    915 
    916 /* Add a translation of vge to TT/TC.  The translation is temporarily
    917    in code[0 .. code_len-1].
    918 
    919    pre: youngest_sector points to a valid (although possibly full)
    920    sector.
    921 */
    922 void VG_(add_to_transtab)( VexGuestExtents* vge,
    923                            Addr64           entry,
    924                            AddrH            code,
    925                            UInt             code_len,
    926                            Bool             is_self_checking )
    927 {
    928    Int    tcAvailQ, reqdQ, y, i;
    929    ULong  *tcptr, *tcptr2;
    930    UChar* srcP;
    931    UChar* dstP;
    932 
    933    vg_assert(init_done);
    934    vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
    935 
    936    /* 60000: should agree with N_TMPBUF in m_translate.c. */
    937    vg_assert(code_len > 0 && code_len < 60000);
    938 
    939    if (0)
    940       VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n",
    941                   entry, code_len);
    942 
    943    n_in_count++;
    944    n_in_tsize += code_len;
    945    n_in_osize += vge_osize(vge);
    946    if (is_self_checking)
    947       n_in_sc_count++;
    948 
    949    y = youngest_sector;
    950    vg_assert(isValidSector(y));
    951 
    952    if (sectors[y].tc == NULL)
    953       initialiseSector(y);
    954 
    955    /* Try putting the translation in this sector. */
    956    reqdQ = (code_len + 7) >> 3;
    957 
    958    /* Will it fit in tc? */
    959    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
    960               - ((ULong*)(sectors[y].tc_next));
    961    vg_assert(tcAvailQ >= 0);
    962    vg_assert(tcAvailQ <= tc_sector_szQ);
    963 
    964    if (tcAvailQ < reqdQ
    965        || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
    966       /* No.  So move on to the next sector.  Either it's never been
    967          used before, in which case it will get its tt/tc allocated
    968          now, or it has been used before, in which case it is set to be
    969          empty, hence throwing out the oldest sector. */
    970       vg_assert(tc_sector_szQ > 0);
    971       VG_(debugLog)(1,"transtab",
    972                       "declare sector %d full "
    973                       "(TT loading %2d%%, TC loading %2d%%)\n",
    974                       y,
    975                       (100 * sectors[y].tt_n_inuse)
    976                          / N_TTES_PER_SECTOR,
    977                       (100 * (tc_sector_szQ - tcAvailQ))
    978                          / tc_sector_szQ);
    979       youngest_sector++;
    980       if (youngest_sector >= N_SECTORS)
    981          youngest_sector = 0;
    982       y = youngest_sector;
    983       initialiseSector(y);
    984    }
    985 
    986    /* Be sure ... */
    987    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
    988               - ((ULong*)(sectors[y].tc_next));
    989    vg_assert(tcAvailQ >= 0);
    990    vg_assert(tcAvailQ <= tc_sector_szQ);
    991    vg_assert(tcAvailQ >= reqdQ);
    992    vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
    993    vg_assert(sectors[y].tt_n_inuse >= 0);
    994 
    995    /* Copy into tc. */
    996    tcptr = sectors[y].tc_next;
    997    vg_assert(tcptr >= &sectors[y].tc[0]);
    998    vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
    999 
   1000    dstP = (UChar*)tcptr;
   1001    srcP = (UChar*)code;
   1002    for (i = 0; i < code_len; i++)
   1003       dstP[i] = srcP[i];
   1004    sectors[y].tc_next += reqdQ;
   1005    sectors[y].tt_n_inuse++;
   1006 
   1007    invalidate_icache( dstP, code_len );
   1008 
   1009    /* more paranoia */
   1010    tcptr2 = sectors[y].tc_next;
   1011    vg_assert(tcptr2 >= &sectors[y].tc[0]);
   1012    vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
   1013 
   1014    /* Find an empty tt slot, and use it.  There must be such a slot
   1015       since tt is never allowed to get completely full. */
   1016    i = HASH_TT(entry);
   1017    vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
   1018    while (True) {
   1019       if (sectors[y].tt[i].status == Empty
   1020           || sectors[y].tt[i].status == Deleted)
   1021          break;
   1022       i++;
   1023       if (i >= N_TTES_PER_SECTOR)
   1024          i = 0;
   1025    }
   1026 
   1027    sectors[y].tt[i].status = InUse;
   1028    sectors[y].tt[i].tcptr  = tcptr;
   1029    sectors[y].tt[i].count  = 0;
   1030    sectors[y].tt[i].weight = 1;
   1031    sectors[y].tt[i].vge    = *vge;
   1032    sectors[y].tt[i].entry  = entry;
   1033 
   1034    /* Update the fast-cache. */
   1035    setFastCacheEntry( entry, tcptr, &sectors[y].tt[i].count );
   1036 
   1037    /* Note the eclass numbers for this translation. */
   1038    upd_eclasses_after_add( &sectors[y], i );
   1039 }
   1040 
   1041 
   1042 /* Search for the translation of the given guest address.  If
   1043    requested, a successful search can also cause the fast-caches to be
   1044    updated.
   1045 */
   1046 Bool VG_(search_transtab) ( /*OUT*/AddrH* result,
   1047                             Addr64        guest_addr,
   1048                             Bool          upd_cache )
   1049 {
   1050    Int i, j, k, kstart, sno;
   1051 
   1052    vg_assert(init_done);
   1053    /* Find the initial probe point just once.  It will be the same in
   1054       all sectors and avoids multiple expensive % operations. */
   1055    n_full_lookups++;
   1056    k      = -1;
   1057    kstart = HASH_TT(guest_addr);
   1058    vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
   1059 
   1060    /* Search in all the sectors,using sector_search_order[] as a
   1061       heuristic guide as to what order to visit the sectors. */
   1062    for (i = 0; i < N_SECTORS; i++) {
   1063 
   1064       sno = sector_search_order[i];
   1065       if (UNLIKELY(sno == -1))
   1066          return False; /* run out of sectors to search */
   1067 
   1068       k = kstart;
   1069       for (j = 0; j < N_TTES_PER_SECTOR; j++) {
   1070          n_lookup_probes++;
   1071          if (sectors[sno].tt[k].status == InUse
   1072              && sectors[sno].tt[k].entry == guest_addr) {
   1073             /* found it */
   1074             if (upd_cache)
   1075                setFastCacheEntry(
   1076                   guest_addr, sectors[sno].tt[k].tcptr,
   1077                               &sectors[sno].tt[k].count );
   1078             if (result)
   1079                *result = (AddrH)sectors[sno].tt[k].tcptr;
   1080             /* pull this one one step closer to the front.  For large
   1081                apps this more or less halves the number of required
   1082                probes. */
   1083             if (i > 0) {
   1084                Int tmp = sector_search_order[i-1];
   1085                sector_search_order[i-1] = sector_search_order[i];
   1086                sector_search_order[i] = tmp;
   1087             }
   1088             return True;
   1089          }
   1090          if (sectors[sno].tt[k].status == Empty)
   1091             break; /* not found in this sector */
   1092          k++;
   1093          if (k == N_TTES_PER_SECTOR)
   1094             k = 0;
   1095       }
   1096 
   1097       /* If we fall off the end, all entries are InUse and not
   1098          matching, or Deleted.  In any case we did not find it in this
   1099          sector. */
   1100    }
   1101 
   1102    /* Not found in any sector. */
   1103    return False;
   1104 }
   1105 
   1106 
   1107 /*-------------------------------------------------------------*/
   1108 /*--- Delete translations.                                  ---*/
   1109 /*-------------------------------------------------------------*/
   1110 
   1111 /* forward */
   1112 static void unredir_discard_translations( Addr64, ULong );
   1113 
   1114 /* Stuff for deleting translations which intersect with a given
   1115    address range.  Unfortunately, to make this run at a reasonable
   1116    speed, it is complex. */
   1117 
   1118 static inline
   1119 Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
   1120 {
   1121    Addr64 e1 = s1 + r1 - 1ULL;
   1122    Addr64 e2 = s2 + r2 - 1ULL;
   1123    if (e1 < s2 || e2 < s1)
   1124       return False;
   1125    return True;
   1126 }
   1127 
   1128 static inline
   1129 Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
   1130 {
   1131    if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
   1132       return True;
   1133    if (vge->n_used < 2)
   1134       return False;
   1135    if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
   1136       return True;
   1137    if (vge->n_used < 3)
   1138       return False;
   1139    if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
   1140       return True;
   1141    return False;
   1142 }
   1143 
   1144 
   1145 /* Delete a tt entry, and update all the eclass data accordingly. */
   1146 
   1147 static void delete_tte ( /*MOD*/Sector* sec, Int tteno )
   1148 {
   1149    Int      i, ec_num, ec_idx;
   1150    TTEntry* tte;
   1151 
   1152    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
   1153    tte = &sec->tt[tteno];
   1154    vg_assert(tte->status == InUse);
   1155    vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
   1156 
   1157    /* Deal with the ec-to-tte links first. */
   1158    for (i = 0; i < tte->n_tte2ec; i++) {
   1159       ec_num = (Int)tte->tte2ec_ec[i];
   1160       ec_idx = tte->tte2ec_ix[i];
   1161       vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
   1162       vg_assert(ec_idx >= 0);
   1163       vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
   1164       /* Assert that the two links point at each other. */
   1165       vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
   1166       /* "delete" the pointer back to here. */
   1167       sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
   1168    }
   1169 
   1170    /* Now fix up this TTEntry. */
   1171    tte->status   = Deleted;
   1172    tte->n_tte2ec = 0;
   1173 
   1174    /* Stats .. */
   1175    sec->tt_n_inuse--;
   1176    n_disc_count++;
   1177    n_disc_osize += vge_osize(&tte->vge);
   1178 
   1179    /* Tell the tool too. */
   1180    if (VG_(needs).superblock_discards) {
   1181       VG_TDICT_CALL( tool_discard_superblock_info,
   1182                      tte->entry,
   1183                      tte->vge );
   1184    }
   1185 }
   1186 
   1187 
   1188 /* Delete translations from sec which intersect specified range, but
   1189    only consider translations in the specified eclass. */
   1190 
   1191 static
   1192 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec,
   1193                                             Addr64 guest_start, ULong range,
   1194                                             Int ec )
   1195 {
   1196    Int      i;
   1197    UShort   tteno;
   1198    Bool     anyDeld = False;
   1199    TTEntry* tte;
   1200 
   1201    vg_assert(ec >= 0 && ec < ECLASS_N);
   1202 
   1203    for (i = 0; i < sec->ec2tte_used[ec]; i++) {
   1204 
   1205       tteno = sec->ec2tte[ec][i];
   1206       if (tteno == EC2TTE_DELETED) {
   1207          /* already deleted */
   1208          continue;
   1209       }
   1210 
   1211       vg_assert(tteno < N_TTES_PER_SECTOR);
   1212 
   1213       tte = &sec->tt[tteno];
   1214       vg_assert(tte->status == InUse);
   1215 
   1216       if (overlaps( guest_start, range, &tte->vge )) {
   1217          anyDeld = True;
   1218          delete_tte( sec, (Int)tteno );
   1219       }
   1220 
   1221    }
   1222 
   1223    return anyDeld;
   1224 }
   1225 
   1226 
   1227 /* Delete translations from sec which intersect specified range, the
   1228    slow way, by inspecting all translations in sec. */
   1229 
   1230 static
   1231 Bool delete_translations_in_sector ( /*MOD*/Sector* sec,
   1232                                      Addr64 guest_start, ULong range )
   1233 {
   1234    Int  i;
   1235    Bool anyDeld = False;
   1236 
   1237    for (i = 0; i < N_TTES_PER_SECTOR; i++) {
   1238       if (sec->tt[i].status == InUse
   1239           && overlaps( guest_start, range, &sec->tt[i].vge )) {
   1240          anyDeld = True;
   1241          delete_tte( sec, i );
   1242       }
   1243    }
   1244 
   1245    return anyDeld;
   1246 }
   1247 
   1248 
   1249 void VG_(discard_translations) ( Addr64 guest_start, ULong range,
   1250                                  HChar* who )
   1251 {
   1252    Sector* sec;
   1253    Int     sno, ec;
   1254    Bool    anyDeleted = False;
   1255 
   1256    vg_assert(init_done);
   1257 
   1258    VG_(debugLog)(2, "transtab",
   1259                     "discard_translations(0x%llx, %lld) req by %s\n",
   1260                     guest_start, range, who );
   1261 
   1262    /* Pre-deletion sanity check */
   1263    if (VG_(clo_sanity_level >= 4)) {
   1264       Bool sane = sanity_check_all_sectors();
   1265       vg_assert(sane);
   1266    }
   1267 
   1268    if (range == 0)
   1269       return;
   1270 
   1271    /* There are two different ways to do this.
   1272 
   1273       If the range fits within a single address-range equivalence
   1274       class, as will be the case for a cache line sized invalidation,
   1275       then we only have to inspect the set of translations listed in
   1276       that equivalence class, and also in the "sin-bin" equivalence
   1277       class ECLASS_MISC.
   1278 
   1279       Otherwise, the invalidation is of a larger range and probably
   1280       results from munmap.  In this case it's (probably!) faster just
   1281       to inspect all translations, dump those we don't want, and
   1282       regenerate the equivalence class information (since modifying it
   1283       in-situ is even more expensive).
   1284    */
   1285 
   1286    /* First off, figure out if the range falls within a single class,
   1287       and if so which one. */
   1288 
   1289    ec = ECLASS_MISC;
   1290    if (range < (1ULL << ECLASS_SHIFT))
   1291       ec = range_to_eclass( guest_start, (UInt)range );
   1292 
   1293    /* if ec is ECLASS_MISC then we aren't looking at just a single
   1294       class, so use the slow scheme.  Else use the fast scheme,
   1295       examining 'ec' and ECLASS_MISC. */
   1296 
   1297    if (ec != ECLASS_MISC) {
   1298 
   1299       VG_(debugLog)(2, "transtab",
   1300                        "                    FAST, ec = %d\n", ec);
   1301 
   1302       /* Fast scheme */
   1303       vg_assert(ec >= 0 && ec < ECLASS_MISC);
   1304 
   1305       for (sno = 0; sno < N_SECTORS; sno++) {
   1306          sec = &sectors[sno];
   1307          if (sec->tc == NULL)
   1308             continue;
   1309          anyDeleted |= delete_translations_in_sector_eclass(
   1310                          sec, guest_start, range, ec );
   1311          anyDeleted |= delete_translations_in_sector_eclass(
   1312                          sec, guest_start, range, ECLASS_MISC );
   1313       }
   1314 
   1315    } else {
   1316 
   1317       /* slow scheme */
   1318 
   1319       VG_(debugLog)(2, "transtab",
   1320                        "                    SLOW, ec = %d\n", ec);
   1321 
   1322       for (sno = 0; sno < N_SECTORS; sno++) {
   1323          sec = &sectors[sno];
   1324          if (sec->tc == NULL)
   1325             continue;
   1326          anyDeleted |= delete_translations_in_sector(
   1327                          sec, guest_start, range );
   1328       }
   1329 
   1330    }
   1331 
   1332    if (anyDeleted)
   1333       invalidateFastCache();
   1334 
   1335    /* don't forget the no-redir cache */
   1336    unredir_discard_translations( guest_start, range );
   1337 
   1338    /* Post-deletion sanity check */
   1339    if (VG_(clo_sanity_level >= 4)) {
   1340       Int      i;
   1341       TTEntry* tte;
   1342       Bool     sane = sanity_check_all_sectors();
   1343       vg_assert(sane);
   1344       /* But now, also check the requested address range isn't
   1345          present anywhere. */
   1346       for (sno = 0; sno < N_SECTORS; sno++) {
   1347          sec = &sectors[sno];
   1348          if (sec->tc == NULL)
   1349             continue;
   1350          for (i = 0; i < N_TTES_PER_SECTOR; i++) {
   1351             tte = &sec->tt[i];
   1352             if (tte->status != InUse)
   1353                continue;
   1354             vg_assert(!overlaps( guest_start, range, &tte->vge ));
   1355          }
   1356       }
   1357    }
   1358 }
   1359 
   1360 
   1361 /*------------------------------------------------------------*/
   1362 /*--- AUXILIARY: the unredirected TT/TC                    ---*/
   1363 /*------------------------------------------------------------*/
   1364 
   1365 /* A very simple translation cache which holds a small number of
   1366    unredirected translations.  This is completely independent of the
   1367    main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
   1368    both structures are simply dumped and we start over.
   1369 
   1370    Since these translations are unredirected, the search key is (by
   1371    definition) the first address entry in the .vge field. */
   1372 
   1373 /* Sized to hold 500 translations of average size 1000 bytes. */
   1374 
   1375 #define UNREDIR_SZB   1000
   1376 
   1377 #define N_UNREDIR_TT  500
   1378 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
   1379 
   1380 typedef
   1381    struct {
   1382       VexGuestExtents vge;
   1383       Addr            hcode;
   1384       Bool            inUse;
   1385    }
   1386    UTCEntry;
   1387 
   1388 /* We just allocate forwards in _tc, never deleting. */
   1389 static ULong    *unredir_tc;
   1390 static Int      unredir_tc_used = N_UNREDIR_TCQ;
   1391 
   1392 /* Slots in _tt can come into use and out again (.inUse).
   1393    Nevertheless _tt_highwater is maintained so that invalidations
   1394    don't have to scan all the slots when only a few are in use.
   1395    _tt_highwater holds the index of the highest ever allocated
   1396    slot. */
   1397 static UTCEntry unredir_tt[N_UNREDIR_TT];
   1398 static Int      unredir_tt_highwater;
   1399 
   1400 
   1401 static void init_unredir_tt_tc ( void )
   1402 {
   1403    Int i;
   1404    if (unredir_tc == NULL) {
   1405       SysRes sres = VG_(am_mmap_anon_float_valgrind)
   1406                        ( N_UNREDIR_TT * UNREDIR_SZB );
   1407       if (sr_isError(sres)) {
   1408          VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
   1409                                      N_UNREDIR_TT * UNREDIR_SZB);
   1410          /*NOTREACHED*/
   1411       }
   1412       unredir_tc = (ULong *)(AddrH)sr_Res(sres);
   1413    }
   1414    unredir_tc_used = 0;
   1415    for (i = 0; i < N_UNREDIR_TT; i++)
   1416       unredir_tt[i].inUse = False;
   1417    unredir_tt_highwater = -1;
   1418 }
   1419 
   1420 /* Do a sanity check; return False on failure. */
   1421 static Bool sanity_check_redir_tt_tc ( void )
   1422 {
   1423    Int i;
   1424    if (unredir_tt_highwater < -1) return False;
   1425    if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
   1426 
   1427    for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
   1428       if (unredir_tt[i].inUse)
   1429          return False;
   1430 
   1431    if (unredir_tc_used < 0) return False;
   1432    if (unredir_tc_used > N_UNREDIR_TCQ) return False;
   1433 
   1434    return True;
   1435 }
   1436 
   1437 
   1438 /* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
   1439    is temporarily in code[0 .. code_len-1].
   1440 */
   1441 void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
   1442                                    Addr64           entry,
   1443                                    AddrH            code,
   1444                                    UInt             code_len )
   1445 {
   1446    Int   i, j, code_szQ;
   1447    HChar *srcP, *dstP;
   1448 
   1449    vg_assert(sanity_check_redir_tt_tc());
   1450 
   1451    /* This is the whole point: it's not redirected! */
   1452    vg_assert(entry == vge->base[0]);
   1453 
   1454    /* How many unredir_tt slots are needed */
   1455    code_szQ = (code_len + 7) / 8;
   1456 
   1457    /* Look for an empty unredir_tc slot */
   1458    for (i = 0; i < N_UNREDIR_TT; i++)
   1459       if (!unredir_tt[i].inUse)
   1460          break;
   1461 
   1462    if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
   1463       /* It's full; dump everything we currently have */
   1464       init_unredir_tt_tc();
   1465       i = 0;
   1466    }
   1467 
   1468    vg_assert(unredir_tc_used >= 0);
   1469    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
   1470    vg_assert(code_szQ > 0);
   1471    vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
   1472    vg_assert(i >= 0 && i < N_UNREDIR_TT);
   1473    vg_assert(unredir_tt[i].inUse == False);
   1474 
   1475    if (i > unredir_tt_highwater)
   1476       unredir_tt_highwater = i;
   1477 
   1478    dstP = (HChar*)&unredir_tc[unredir_tc_used];
   1479    srcP = (HChar*)code;
   1480    for (j = 0; j < code_len; j++)
   1481       dstP[j] = srcP[j];
   1482 
   1483    invalidate_icache( dstP, code_len );
   1484 
   1485    unredir_tt[i].inUse = True;
   1486    unredir_tt[i].vge   = *vge;
   1487    unredir_tt[i].hcode = (Addr)dstP;
   1488 
   1489    unredir_tc_used += code_szQ;
   1490    vg_assert(unredir_tc_used >= 0);
   1491    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
   1492 
   1493    vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
   1494 }
   1495 
   1496 Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
   1497                                     Addr64        guest_addr )
   1498 {
   1499    Int i;
   1500    for (i = 0; i < N_UNREDIR_TT; i++) {
   1501       if (!unredir_tt[i].inUse)
   1502          continue;
   1503       if (unredir_tt[i].vge.base[0] == guest_addr) {
   1504          *result = (AddrH)unredir_tt[i].hcode;
   1505          return True;
   1506       }
   1507    }
   1508    return False;
   1509 }
   1510 
   1511 static void unredir_discard_translations( Addr64 guest_start, ULong range )
   1512 {
   1513    Int i;
   1514 
   1515    vg_assert(sanity_check_redir_tt_tc());
   1516 
   1517    for (i = 0; i <= unredir_tt_highwater; i++) {
   1518       if (unredir_tt[i].inUse
   1519           && overlaps( guest_start, range, &unredir_tt[i].vge))
   1520          unredir_tt[i].inUse = False;
   1521    }
   1522 }
   1523 
   1524 
   1525 /*------------------------------------------------------------*/
   1526 /*--- Initialisation.                                      ---*/
   1527 /*------------------------------------------------------------*/
   1528 
   1529 void VG_(init_tt_tc) ( void )
   1530 {
   1531    Int i, j, avg_codeszQ;
   1532 
   1533    vg_assert(!init_done);
   1534    init_done = True;
   1535 
   1536    /* Otherwise lots of things go wrong... */
   1537    vg_assert(sizeof(ULong) == 8);
   1538    vg_assert(sizeof(Addr64) == 8);
   1539    /* check fast cache entries really are 2 words long */
   1540    vg_assert(sizeof(Addr) == sizeof(void*));
   1541    vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
   1542    /* check fast cache entries are packed back-to-back with no spaces */
   1543    vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
   1544    /* check fast cache is aligned as we requested.  Not fatal if it
   1545       isn't, but we might as well make sure. */
   1546    vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
   1547 
   1548    if (VG_(clo_verbosity) > 2)
   1549       VG_(message)(Vg_DebugMsg,
   1550                    "TT/TC: VG_(init_tt_tc) "
   1551                    "(startup of code management)\n");
   1552 
   1553    /* Figure out how big each tc area should be.  */
   1554    avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
   1555    tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
   1556 
   1557    /* Ensure the calculated value is not way crazy. */
   1558    vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
   1559    vg_assert(tc_sector_szQ <= 80 * N_TTES_PER_SECTOR_USABLE);
   1560 
   1561    /* Initialise the sectors */
   1562    youngest_sector = 0;
   1563    for (i = 0; i < N_SECTORS; i++) {
   1564       sectors[i].tc = NULL;
   1565       sectors[i].tt = NULL;
   1566       sectors[i].tc_next = NULL;
   1567       sectors[i].tt_n_inuse = 0;
   1568       for (j = 0; j < ECLASS_N; j++) {
   1569          sectors[i].ec2tte_size[j] = 0;
   1570          sectors[i].ec2tte_used[j] = 0;
   1571          sectors[i].ec2tte[j] = NULL;
   1572       }
   1573    }
   1574 
   1575    /* Initialise the sector_search_order hint table. */
   1576    for (i = 0; i < N_SECTORS; i++)
   1577       sector_search_order[i] = -1;
   1578 
   1579    /* Initialise the fast caches.  If not profiling (the usual case),
   1580       we have to explicitly invalidate the fastN cache as
   1581       invalidateFastCache() won't do that for us. */
   1582    invalidateFastCache();
   1583    if (VG_(clo_profile_flags) == 0)
   1584       invalidateFastNCache();
   1585 
   1586    /* and the unredir tt/tc */
   1587    init_unredir_tt_tc();
   1588 
   1589    if (VG_(clo_verbosity) > 2) {
   1590       VG_(message)(Vg_DebugMsg,
   1591          "TT/TC: cache: %d sectors of %d bytes each = %d total\n",
   1592           N_SECTORS, 8 * tc_sector_szQ,
   1593           N_SECTORS * 8 * tc_sector_szQ );
   1594       VG_(message)(Vg_DebugMsg,
   1595          "TT/TC: table: %d total entries, max occupancy %d (%d%%)\n",
   1596          N_SECTORS * N_TTES_PER_SECTOR,
   1597          N_SECTORS * N_TTES_PER_SECTOR_USABLE,
   1598          SECTOR_TT_LIMIT_PERCENT );
   1599    }
   1600 
   1601    VG_(debugLog)(2, "transtab",
   1602       "cache: %d sectors of %d bytes each = %d total\n",
   1603        N_SECTORS, 8 * tc_sector_szQ,
   1604        N_SECTORS * 8 * tc_sector_szQ );
   1605    VG_(debugLog)(2, "transtab",
   1606       "table: %d total entries, max occupancy %d (%d%%)\n",
   1607       N_SECTORS * N_TTES_PER_SECTOR,
   1608       N_SECTORS * N_TTES_PER_SECTOR_USABLE,
   1609       SECTOR_TT_LIMIT_PERCENT );
   1610 }
   1611 
   1612 
   1613 /*------------------------------------------------------------*/
   1614 /*--- Printing out statistics.                             ---*/
   1615 /*------------------------------------------------------------*/
   1616 
   1617 static ULong safe_idiv( ULong a, ULong b )
   1618 {
   1619    return (b == 0 ? 0 : a / b);
   1620 }
   1621 
   1622 UInt VG_(get_bbs_translated) ( void )
   1623 {
   1624    return n_in_count;
   1625 }
   1626 
   1627 void VG_(print_tt_tc_stats) ( void )
   1628 {
   1629    VG_(message)(Vg_DebugMsg,
   1630       "    tt/tc: %'llu tt lookups requiring %'llu probes\n",
   1631       n_full_lookups, n_lookup_probes );
   1632    VG_(message)(Vg_DebugMsg,
   1633       "    tt/tc: %'llu fast-cache updates, %'llu flushes\n",
   1634       n_fast_updates, n_fast_flushes );
   1635 
   1636    VG_(message)(Vg_DebugMsg,
   1637                 " transtab: new        %'lld "
   1638                 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n",
   1639                 n_in_count, n_in_osize, n_in_tsize,
   1640                 safe_idiv(10*n_in_tsize, n_in_osize),
   1641                 n_in_sc_count);
   1642    VG_(message)(Vg_DebugMsg,
   1643                 " transtab: dumped     %'llu (%'llu -> ?" "?)\n",
   1644                 n_dump_count, n_dump_osize );
   1645    VG_(message)(Vg_DebugMsg,
   1646                 " transtab: discarded  %'llu (%'llu -> ?" "?)\n",
   1647                 n_disc_count, n_disc_osize );
   1648 
   1649    if (0) {
   1650       Int i;
   1651       VG_(printf)("\n");
   1652       for (i = 0; i < ECLASS_N; i++) {
   1653          VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
   1654          if (i % 16 == 15)
   1655             VG_(printf)("\n");
   1656       }
   1657       VG_(printf)("\n\n");
   1658    }
   1659 }
   1660 
   1661 /*------------------------------------------------------------*/
   1662 /*--- Printing out of profiling results.                   ---*/
   1663 /*------------------------------------------------------------*/
   1664 
   1665 static ULong score ( TTEntry* tte )
   1666 {
   1667    return ((ULong)tte->weight) * ((ULong)tte->count);
   1668 }
   1669 
   1670 ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops )
   1671 {
   1672    Int   sno, i, r, s;
   1673    ULong score_total;
   1674 
   1675    /* First, compute the total weighted count, and find the top N
   1676       ttes.  tops contains pointers to the most-used n_tops blocks, in
   1677       descending order (viz, tops[0] is the highest scorer). */
   1678    for (i = 0; i < n_tops; i++) {
   1679       tops[i].addr  = 0;
   1680       tops[i].score = 0;
   1681    }
   1682 
   1683    score_total = 0;
   1684 
   1685    for (sno = 0; sno < N_SECTORS; sno++) {
   1686       if (sectors[sno].tc == NULL)
   1687          continue;
   1688       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
   1689          if (sectors[sno].tt[i].status != InUse)
   1690             continue;
   1691          score_total += score(&sectors[sno].tt[i]);
   1692          /* Find the rank for sectors[sno].tt[i]. */
   1693          r = n_tops-1;
   1694          while (True) {
   1695             if (r == -1)
   1696                break;
   1697              if (tops[r].addr == 0) {
   1698                r--;
   1699                continue;
   1700              }
   1701              if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
   1702                 r--;
   1703                 continue;
   1704              }
   1705              break;
   1706          }
   1707          r++;
   1708          vg_assert(r >= 0 && r <= n_tops);
   1709          /* This bb should be placed at r, and bbs above it shifted
   1710             upwards one slot. */
   1711          if (r < n_tops) {
   1712             for (s = n_tops-1; s > r; s--)
   1713                tops[s] = tops[s-1];
   1714             tops[r].addr  = sectors[sno].tt[i].entry;
   1715             tops[r].score = score( &sectors[sno].tt[i] );
   1716          }
   1717       }
   1718    }
   1719 
   1720    return score_total;
   1721 }
   1722 
   1723 /*--------------------------------------------------------------------*/
   1724 /*--- end                                                          ---*/
   1725 /*--------------------------------------------------------------------*/
   1726