Home | History | Annotate | Download | only in coregrind
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Management of the translation table and cache.               ---*/
      4 /*---                                                 m_transtab.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2000-2017 Julian Seward
     12       jseward (at) acm.org
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #include "pub_core_basics.h"
     33 #include "pub_core_debuglog.h"
     34 #include "pub_core_machine.h"    // For VG_(machine_get_VexArchInfo)
     35 #include "pub_core_libcbase.h"
     36 #include "pub_core_vki.h"        // to keep pub_core_libproc.h happy, sigh
     37 #include "pub_core_libcproc.h"   // VG_(invalidate_icache)
     38 #include "pub_core_libcassert.h"
     39 #include "pub_core_libcprint.h"
     40 #include "pub_core_options.h"
     41 #include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
     42 #include "pub_core_transtab.h"
     43 #include "pub_core_aspacemgr.h"
     44 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
     45 #include "pub_core_xarray.h"
     46 #include "pub_core_dispatch.h"   // For VG_(disp_cp*) addresses
     47 
     48 
     49 #define DEBUG_TRANSTAB 0
     50 
     51 
     52 /*-------------------------------------------------------------*/
     53 /*--- Management of the FIFO-based translation table+cache. ---*/
     54 /*-------------------------------------------------------------*/
     55 
     56 /* Nr of sectors provided via command line parameter. */
     57 UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT;
     58 /* Nr of sectors.
     59    Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */
     60 static SECno n_sectors = 0;
     61 
     62 /* Average size of a transtab code entry. 0 means to use the tool
     63    provided default. */
     64 UInt VG_(clo_avg_transtab_entry_size) = 0;
     65 
     66 /*------------------ CONSTANTS ------------------*/
     67 /* Number of entries in hash table of each sector.  This needs to be a prime
     68    number to work properly, it must be <= 65535 (so that a TTE index
     69    fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED, HTT_DELETED)
     70    to denote 'deleted') and  0xFFFE (HTT_EMPTY) to denote 'Empty' in the
     71    hash table.
     72    It is strongly recommended not to change this.
     73    65521 is the largest prime <= 65535. */
     74 #define N_HTTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
     75 
     76 #define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
     77 #define HTT_DELETED     EC2TTE_DELETED
     78 #define HTT_EMPTY       0XFFFE
     79 
     80 // HTTno is the Sector->htt hash table index. Must be the same type as TTEno.
     81 typedef UShort HTTno;
     82 
     83 /* Because each sector contains a hash table of TTEntries, we need to
     84    specify the maximum allowable loading, after which the sector is
     85    deemed full. */
     86 #define SECTOR_TT_LIMIT_PERCENT 65
     87 
     88 /* The sector is deemed full when this many entries are in it. */
     89 #define N_TTES_PER_SECTOR \
     90            ((N_HTTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
     91 
     92 /* Equivalence classes for fast address range deletion.  There are 1 +
     93    2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
     94    address range which does not fall cleanly within any specific bin.
     95    Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32.
     96    ECLASS_N must fit in a EclassNo. */
     97 #define ECLASS_SHIFT 13
     98 #define ECLASS_WIDTH 9
     99 #define ECLASS_MISC  (1 << ECLASS_WIDTH)
    100 #define ECLASS_N     (1 + ECLASS_MISC)
    101 STATIC_ASSERT(ECLASS_SHIFT + ECLASS_WIDTH < 32);
    102 
    103 typedef UShort EClassNo;
    104 
    105 /*------------------ TYPES ------------------*/
    106 
    107 /* In edges ("to-me") in the graph created by chaining. */
    108 typedef
    109    struct {
    110       SECno from_sNo;   /* sector number */
    111       TTEno from_tteNo; /* TTE number in given sector */
    112       UInt  from_offs: (sizeof(UInt)*8)-1;  /* code offset from TCEntry::tcptr
    113                                                where the patch is */
    114       Bool  to_fastEP:1; /* Is the patch to a fast or slow entry point? */
    115    }
    116    InEdge;
    117 
    118 
    119 /* Out edges ("from-me") in the graph created by chaining. */
    120 typedef
    121    struct {
    122       SECno to_sNo;    /* sector number */
    123       TTEno to_tteNo;  /* TTE number in given sector */
    124       UInt  from_offs; /* code offset in owning translation where patch is */
    125    }
    126    OutEdge;
    127 
    128 
    129 #define N_FIXED_IN_EDGE_ARR 3
    130 typedef
    131    struct {
    132       Bool     has_var:1; /* True if var is used (then n_fixed must be 0) */
    133       UInt     n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_IN_EDGE_ARR */
    134       union {
    135          InEdge   fixed[N_FIXED_IN_EDGE_ARR];      /* if !has_var */
    136          XArray*  var; /* XArray* of InEdgeArr */  /* if  has_var */
    137       } edges;
    138    }
    139    InEdgeArr;
    140 
    141 #define N_FIXED_OUT_EDGE_ARR 2
    142 typedef
    143    struct {
    144       Bool    has_var:1; /* True if var is used (then n_fixed must be 0) */
    145       UInt    n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_OUT_EDGE_ARR */
    146       union {
    147          OutEdge fixed[N_FIXED_OUT_EDGE_ARR];      /* if !has_var */
    148          XArray* var; /* XArray* of OutEdgeArr */  /* if  has_var */
    149       } edges;
    150    }
    151    OutEdgeArr;
    152 
    153 
    154 /* A translation-table entry.  This indicates precisely which areas of
    155    guest code are included in the translation, and contains all other
    156    auxiliary info too.  These are split into hold and cold parts,
    157    TTEntryH and TTEntryC, so as to be more cache-friendly
    158    (a.k.a. "faster") when searching for translations that intersect
    159    with a certain guest code address range, for deletion.  In the
    160    worst case this can involve a sequential scan through all the hot
    161    parts, so they are packed as compactly as possible -- 32 bytes on a
    162    64-bit platform, 20 bytes on a 32-bit platform.
    163 
    164    Each TTEntry is logically a matching cold-and-hot pair, and in fact
    165    it was originally one structure.  First, the cold part.
    166 */
    167 typedef
    168    struct {
    169       union {
    170          struct {
    171             /* Profiling only: the count and weight (arbitrary meaning) for
    172                this translation.  Weight is a property of the translation
    173                itself and computed once when the translation is created.
    174                Count is an entry count for the translation and is
    175                incremented by 1 every time the translation is used, if we
    176                are profiling. */
    177             ULong    count;
    178             UShort   weight;
    179          } prof; // if status == InUse
    180          TTEno next_empty_tte; // if status != InUse
    181       } usage;
    182 
    183       /* 64-bit aligned pointer to one or more 64-bit words containing
    184          the corresponding host code (must be in the same sector!)
    185          This is a pointer into the sector's tc (code) area. */
    186       ULong* tcptr;
    187 
    188       /* This is the original guest address that purportedly is the
    189          entry point of the translation.  You might think that .entry
    190          should be the same as .vge->base[0], and most of the time it
    191          is.  However, when doing redirections, that is not the case.
    192          .vge must always correctly describe the guest code sections
    193          from which this translation was made.  However, .entry may or
    194          may not be a lie, depending on whether or not we're doing
    195          redirection. */
    196       Addr entry;
    197 
    198       /* Address range summary info: these are pointers back to
    199          eclass[] entries in the containing Sector.  Those entries in
    200          turn point back here -- the two structures are mutually
    201          redundant but both necessary to make fast deletions work.
    202          The eclass info is similar to, and derived from, this entry's
    203          'vge' field, but it is not the same */
    204       UShort   n_tte2ec;      // # tte2ec pointers (1 to 3)
    205       EClassNo tte2ec_ec[3];  // for each, the eclass #
    206       UInt     tte2ec_ix[3];  // and the index within the eclass.
    207       // for i in 0 .. n_tte2ec-1
    208       //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
    209       // should be the index
    210       // of this TTEntry in the containing Sector's tt array.
    211 
    212       /* Admin information for chaining.  'in_edges' is a set of the
    213          patch points which jump to this translation -- hence are
    214          predecessors in the control flow graph.  'out_edges' points
    215          to successors in the control flow graph -- translations to
    216          which this one has a patched jump.  In short these are just
    217          backwards and forwards edges in the graph of patched-together
    218          blocks.  The 'in_edges' contain slightly more info, enough
    219          that we can undo the chaining of each mentioned patch point.
    220          The 'out_edges' list exists only so that we can visit the
    221          'in_edges' entries of all blocks we're patched through to, in
    222          order to remove ourselves from then when we're deleted. */
    223 
    224       /* A translation can disappear for two reasons:
    225           1. erased (as part of the oldest sector cleanup) when the
    226              youngest sector is full.
    227           2. discarded due to calls to VG_(discard_translations).
    228              VG_(discard_translations) sets the status of the
    229              translation to 'Deleted'.
    230              A.o., the gdbserver discards one or more translations
    231              when a breakpoint is inserted or removed at an Addr,
    232              or when single stepping mode is enabled/disabled
    233              or when a translation is instrumented for gdbserver
    234              (all the target jumps of this translation are
    235               invalidated).
    236 
    237          So, it is possible that the translation A to be patched
    238          (to obtain a patched jump from A to B) is invalidated
    239          after B is translated and before A is patched.
    240          In case a translation is erased or discarded, the patching
    241          cannot be done.  VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
    242          are checking the 'from' translation still exists before
    243          doing the patching.
    244 
    245          Is it safe to erase or discard the current translation E being
    246          executed ? Amazing, but yes, it is safe.
    247          Here is the explanation:
    248 
    249          The translation E being executed can only be erased if a new
    250          translation N is being done. A new translation is done only
    251          if the host addr is a not yet patched jump to another
    252          translation. In such a case, the guest address of N is
    253          assigned to the PC in the VEX state. Control is returned
    254          to the scheduler. N will be translated. This can erase the
    255          translation E (in case of sector full). VG_(tt_tc_do_chaining)
    256          will not do the chaining to a non found translation E.
    257          The execution will continue at the current guest PC
    258          (i.e. the translation N).
    259          => it is safe to erase the current translation being executed.
    260 
    261          The current translation E being executed can also be discarded
    262          (e.g. by gdbserver). VG_(discard_translations) will mark
    263          this translation E as Deleted, but the translation itself
    264          is not erased. In particular, its host code can only
    265          be overwritten or erased in case a new translation is done.
    266          A new translation will only be done if a not yet translated
    267          jump is to be executed. The execution of the Deleted translation
    268          E will continue till a non patched jump is encountered.
    269          This situation is then similar to the 'erasing' case above :
    270          the current translation E can be erased or overwritten, as the
    271          execution will continue at the new translation N.
    272       */
    273 
    274       /* It is possible, although very unlikely, that a block A has
    275          more than one patched jump to block B.  This could happen if
    276          (eg) A finishes "jcond B; jmp B".
    277 
    278          This means in turn that B's in_edges set can list A more than
    279          once (twice in this example).  However, each such entry must
    280          have a different from_offs, since a patched jump can only
    281          jump to one place at once (it's meaningless for it to have
    282          multiple destinations.)  IOW, the successor and predecessor
    283          edges in the graph are not uniquely determined by a
    284          TTEntry --> TTEntry pair, but rather by a
    285          (TTEntry,offset) --> TTEntry triple.
    286 
    287          If A has multiple edges to B then B will mention A multiple
    288          times in its in_edges.  To make things simpler, we then
    289          require that A mentions B exactly the same number of times in
    290          its out_edges.  Furthermore, a matching out-in pair must have
    291          the same offset (from_offs).  This facilitates sanity
    292          checking, and it facilitates establishing the invariant that
    293          a out_edges set may not have duplicates when using the
    294          equality defined by (TTEntry,offset).  Hence the out_edges
    295          and in_edges sets really do have both have set semantics.
    296 
    297          eg if  A has been patched to B at offsets 42 and 87 (in A)
    298          then   A.out_edges = { (B,42), (B,87) }   (in any order)
    299          and    B.in_edges  = { (A,42), (A,87) }   (in any order)
    300 
    301          Hence for each node pair P->Q in the graph, there's a 1:1
    302          mapping between P.out_edges and Q.in_edges.
    303       */
    304       InEdgeArr  in_edges;
    305       OutEdgeArr out_edges;
    306    }
    307    TTEntryC;
    308 
    309 /* And this is the hot part. */
    310 typedef
    311    struct {
    312       /* This structure describes precisely what ranges of guest code
    313          the translation covers, so we can decide whether or not to
    314          delete it when translations of a given address range are
    315          invalidated.  Originally this was a VexGuestExtents, but that
    316          itself is 32 bytes on a 64-bit target, and we really want to
    317          squeeze in an 8-bit |status| field into the 32 byte field, so
    318          as to get 2 of them in a 64 byte LLC line.  Hence the
    319          VexGuestExtents fields are inlined, the _n_used field is
    320          changed to a UChar (it only ever has the values 1, 2 or 3)
    321          and the 8-bit status field is placed in byte 31 of the
    322          structure. */
    323       /* ORIGINALLY: VexGuestExtents vge; */
    324       Addr   vge_base[3];
    325       UShort vge_len[3];
    326       UChar  vge_n_used;  /* BEWARE: is UShort in VexGuestExtents */
    327 
    328       /* Status of the slot.  Note, we need to be able to do lazy
    329          deletion, hence the Deleted state. */
    330       enum { InUse, Deleted, Empty } status : 8;
    331    }
    332    TTEntryH;
    333 
    334 
    335 /* Impedance matchers, that convert between a VexGuestExtents and a
    336    TTEntryH, ignoring TTEntryH::status, which doesn't exist in a
    337    VexGuestExtents -- it is entirely unrelated. */
    338 
    339 /* Takes a VexGuestExtents and pushes it into a TTEntryH.  The
    340    TTEntryH.status field is left unchanged. */
    341 static
    342 inline void TTEntryH__from_VexGuestExtents ( /*MOD*/TTEntryH* tteH,
    343                                              const VexGuestExtents* vge )
    344 {
    345    tteH->vge_base[0] = vge->base[0];
    346    tteH->vge_base[1] = vge->base[1];
    347    tteH->vge_base[2] = vge->base[2];
    348    tteH->vge_len[0]  = vge->len[0];
    349    tteH->vge_len[1]  = vge->len[1];
    350    tteH->vge_len[2]  = vge->len[2];
    351    tteH->vge_n_used  = (UChar)vge->n_used; /* BEWARE: no range check. */
    352 }
    353 
    354 /* Takes a TTEntryH and pushes the vge_ components into a VexGuestExtents. */
    355 static
    356 inline void TTEntryH__to_VexGuestExtents ( /*MOD*/VexGuestExtents* vge,
    357                                            const TTEntryH* tteH )
    358 {
    359    vge->base[0] = tteH->vge_base[0];
    360    vge->base[1] = tteH->vge_base[1];
    361    vge->base[2] = tteH->vge_base[2];
    362    vge->len[0]  = tteH->vge_len[0] ;
    363    vge->len[1]  = tteH->vge_len[1] ;
    364    vge->len[2]  = tteH->vge_len[2] ;
    365    vge->n_used  = (UShort)tteH->vge_n_used ;
    366 }
    367 
    368 
    369 /* A structure used for mapping host code addresses back to the
    370    relevant TTEntry.  Used when doing chaining, for finding the
    371    TTEntry to which some arbitrary patch address belongs. */
    372 typedef
    373    struct {
    374       UChar* start;
    375       UInt   len;
    376       TTEno  tteNo;
    377    }
    378    HostExtent;
    379 
    380 /* Finally, a sector itself.  Each sector contains an array of
    381    TCEntries, which hold code, and an array of TTEntries, containing
    382    all required administrative info.  Profiling is supported using the
    383    TTEntry usage.prof.count and usage.prof.weight fields, if required.
    384 
    385    If the sector is not in use, all three pointers are NULL and
    386    tt_n_inuse is zero.
    387 */
    388 typedef
    389    struct {
    390       /* The TCEntry area.  Size of this depends on the average
    391          translation size.  We try and size it so it becomes full
    392          precisely when this sector's translation table (tt) reaches
    393          its load limit (SECTOR_TT_LIMIT_PERCENT). */
    394       ULong* tc;
    395 
    396       /* An hash table, mapping guest address to an index in the tt array.
    397          htt is a fixed size, always containing
    398          exactly N_HTTES_PER_SECTOR entries. */
    399       TTEno* htt;
    400 
    401       /* The TTEntry{C,H} arrays.  These are a fixed size, always
    402          containing exactly N_TTES_PER_SECTOR entries. */
    403       TTEntryC* ttC;
    404       TTEntryH* ttH;
    405 
    406       /* This points to the current allocation point in tc. */
    407       ULong* tc_next;
    408 
    409       /* The count of tt entries with state InUse. */
    410       Int tt_n_inuse;
    411 
    412       /* A list of Empty/Deleted entries, chained by tte->next_empty_tte */
    413       TTEno empty_tt_list;
    414 
    415       /* Expandable arrays of tt indices for each of the ECLASS_N
    416          address range equivalence classes.  These hold indices into
    417          the containing sector's tt array, which in turn should point
    418          back here. */
    419       Int     ec2tte_size[ECLASS_N];
    420       Int     ec2tte_used[ECLASS_N];
    421       TTEno*  ec2tte[ECLASS_N];
    422 
    423       /* The host extents.  The [start, +len) ranges are constructed
    424          in strictly non-overlapping order, so we can binary search
    425          them at any time. */
    426       XArray* host_extents; /* XArray* of HostExtent */
    427    }
    428    Sector;
    429 
    430 
    431 /*------------------ DECLS ------------------*/
    432 
    433 /* The root data structure is an array of sectors.  The index of the
    434    youngest sector is recorded, and new translations are put into that
    435    sector.  When it fills up, we move along to the next sector and
    436    start to fill that up, wrapping around at the end of the array.
    437    That way, once all N_TC_SECTORS have been bought into use for the
    438    first time, and are full, we then re-use the oldest sector,
    439    endlessly.
    440 
    441    When running, youngest sector should be between >= 0 and <
    442    N_TC_SECTORS.  The initial  value indicates the TT/TC system is
    443    not yet initialised.
    444 */
    445 static Sector sectors[MAX_N_SECTORS];
    446 static Int    youngest_sector = INV_SNO;
    447 
    448 /* The number of ULongs in each TCEntry area.  This is computed once
    449    at startup and does not change. */
    450 static Int    tc_sector_szQ = 0;
    451 
    452 
    453 /* A list of sector numbers, in the order which they should be
    454    searched to find translations.  This is an optimisation to be used
    455    when searching for translations and should not affect
    456    correctness.  INV_SNO denotes "no entry". */
    457 static SECno sector_search_order[MAX_N_SECTORS];
    458 
    459 
    460 /* Fast helper for the TC.  A direct-mapped cache which holds a set of
    461    recently used (guest address, host address) pairs.  This array is
    462    referred to directly from m_dispatch/dispatch-<platform>.S.
    463 
    464    Entries in tt_fast may refer to any valid TC entry, regardless of
    465    which sector it's in.  Consequently we must be very careful to
    466    invalidate this cache when TC entries are changed or disappear.
    467 
    468    A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
    469    pointed at to cause that cache entry to miss.  This relies on the
    470    assumption that no guest code actually has that address, hence a
    471    value 0x1 seems good.  m_translate gives the client a synthetic
    472    segfault if it tries to execute at this address.
    473 */
    474 /*
    475 typedef
    476    struct {
    477       Addr guest;
    478       Addr host;
    479    }
    480    FastCacheEntry;
    481 */
    482 /*global*/ __attribute__((aligned(16)))
    483            FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
    484 
    485 /* Make sure we're not used before initialisation. */
    486 static Bool init_done = False;
    487 
    488 
    489 /*------------------ STATS DECLS ------------------*/
    490 
    491 /* Number of fast-cache updates and flushes done. */
    492 static ULong n_fast_flushes = 0;
    493 static ULong n_fast_updates = 0;
    494 
    495 /* Number of full lookups done. */
    496 static ULong n_full_lookups = 0;
    497 static ULong n_lookup_probes = 0;
    498 
    499 /* Number/osize/tsize of translations entered; also the number of
    500    those for which self-checking was requested. */
    501 static ULong n_in_count    = 0;
    502 static ULong n_in_osize    = 0;
    503 static ULong n_in_tsize    = 0;
    504 static ULong n_in_sc_count = 0;
    505 
    506 /* Number/osize of translations discarded due to lack of space. */
    507 static ULong n_dump_count = 0;
    508 static ULong n_dump_osize = 0;
    509 static ULong n_sectors_recycled = 0;
    510 
    511 /* Number/osize of translations discarded due to requests to do so. */
    512 static ULong n_disc_count = 0;
    513 static ULong n_disc_osize = 0;
    514 
    515 
    516 /*-------------------------------------------------------------*/
    517 /*--- Misc                                                  ---*/
    518 /*-------------------------------------------------------------*/
    519 
    520 static void* ttaux_malloc ( const HChar* tag, SizeT n )
    521 {
    522    return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
    523 }
    524 
    525 static void ttaux_free ( void* p )
    526 {
    527    VG_(arena_free)(VG_AR_TTAUX, p);
    528 }
    529 
    530 
    531 /*-------------------------------------------------------------*/
    532 /*--- Chaining support                                      ---*/
    533 /*-------------------------------------------------------------*/
    534 
    535 static inline TTEntryC* index_tteC ( SECno sNo, TTEno tteNo )
    536 {
    537    vg_assert(sNo < n_sectors);
    538    vg_assert(tteNo < N_TTES_PER_SECTOR);
    539    Sector* s = &sectors[sNo];
    540    vg_assert(s->ttC && s->ttH);
    541    TTEntryC* tteC = &s->ttC[tteNo];
    542    TTEntryH* tteH = &s->ttH[tteNo];
    543    vg_assert(tteH->status == InUse);
    544    return tteC;
    545 }
    546 
    547 static inline TTEntryH* index_tteH ( SECno sNo, TTEno tteNo )
    548 {
    549    vg_assert(sNo < n_sectors);
    550    vg_assert(tteNo < N_TTES_PER_SECTOR);
    551    Sector* s = &sectors[sNo];
    552    vg_assert(s->ttH);
    553    TTEntryH* tteH = &s->ttH[tteNo];
    554    vg_assert(tteH->status == InUse);
    555    return tteH;
    556 }
    557 
    558 static void InEdge__init ( InEdge* ie )
    559 {
    560    ie->from_sNo   = INV_SNO; /* invalid */
    561    ie->from_tteNo = 0;
    562    ie->from_offs  = 0;
    563    ie->to_fastEP  = False;
    564 }
    565 
    566 static void OutEdge__init ( OutEdge* oe )
    567 {
    568    oe->to_sNo    = INV_SNO; /* invalid */
    569    oe->to_tteNo  = 0;
    570    oe->from_offs = 0;
    571 }
    572 
    573 static void TTEntryC__init ( TTEntryC* tteC )
    574 {
    575    VG_(bzero_inline)(tteC, sizeof(*tteC));
    576 }
    577 
    578 static void TTEntryH__init ( TTEntryH* tteH )
    579 {
    580    VG_(bzero_inline)(tteH, sizeof(*tteH));
    581 }
    582 
    583 static UWord InEdgeArr__size ( const InEdgeArr* iea )
    584 {
    585    if (iea->has_var) {
    586       vg_assert(iea->n_fixed == 0);
    587       return VG_(sizeXA)(iea->edges.var);
    588    } else {
    589       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
    590       return iea->n_fixed;
    591    }
    592 }
    593 
    594 static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
    595 {
    596    if (iea->has_var) {
    597       vg_assert(iea->n_fixed == 0);
    598       VG_(deleteXA)(iea->edges.var);
    599       iea->edges.var = NULL;
    600       iea->has_var = False;
    601    } else {
    602       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
    603       iea->n_fixed = 0;
    604    }
    605 }
    606 
    607 static
    608 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
    609 {
    610    if (iea->has_var) {
    611       vg_assert(iea->n_fixed == 0);
    612       return (InEdge*)VG_(indexXA)(iea->edges.var, i);
    613    } else {
    614       vg_assert(i < iea->n_fixed);
    615       return &iea->edges.fixed[i];
    616    }
    617 }
    618 
    619 static
    620 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
    621 {
    622    if (iea->has_var) {
    623       vg_assert(iea->n_fixed == 0);
    624       VG_(removeIndexXA)(iea->edges.var, i);
    625    } else {
    626       vg_assert(i < iea->n_fixed);
    627       for (; i+1 < iea->n_fixed; i++) {
    628          iea->edges.fixed[i] = iea->edges.fixed[i+1];
    629       }
    630       iea->n_fixed--;
    631    }
    632 }
    633 
    634 static
    635 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
    636 {
    637    if (iea->has_var) {
    638       vg_assert(iea->n_fixed == 0);
    639       VG_(addToXA)(iea->edges.var, ie);
    640    } else {
    641       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
    642       if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
    643          /* The fixed array is full, so we have to initialise an
    644             XArray and copy the fixed array into it. */
    645          XArray *var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
    646                                   ttaux_free,
    647                                   sizeof(InEdge));
    648          VG_(hintSizeXA) (var, iea->n_fixed + 1);
    649          UWord i;
    650          for (i = 0; i < iea->n_fixed; i++) {
    651             VG_(addToXA)(var, &iea->edges.fixed[i]);
    652          }
    653          VG_(addToXA)(var, ie);
    654          iea->n_fixed = 0;
    655          iea->has_var = True;
    656          iea->edges.var = var;
    657       } else {
    658          /* Just add to the fixed array. */
    659          iea->edges.fixed[iea->n_fixed++] = *ie;
    660       }
    661    }
    662 }
    663 
    664 static UWord OutEdgeArr__size ( const OutEdgeArr* oea )
    665 {
    666    if (oea->has_var) {
    667       vg_assert(oea->n_fixed == 0);
    668       return VG_(sizeXA)(oea->edges.var);
    669    } else {
    670       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
    671       return oea->n_fixed;
    672    }
    673 }
    674 
    675 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
    676 {
    677    if (oea->has_var) {
    678       vg_assert(oea->n_fixed == 0);
    679       VG_(deleteXA)(oea->edges.var);
    680       oea->edges.var = NULL;
    681       oea->has_var = False;
    682    } else {
    683       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
    684       oea->n_fixed = 0;
    685    }
    686 }
    687 
    688 static
    689 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
    690 {
    691    if (oea->has_var) {
    692       vg_assert(oea->n_fixed == 0);
    693       return (OutEdge*)VG_(indexXA)(oea->edges.var, i);
    694    } else {
    695       vg_assert(i < oea->n_fixed);
    696       return &oea->edges.fixed[i];
    697    }
    698 }
    699 
    700 static
    701 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
    702 {
    703    if (oea->has_var) {
    704       vg_assert(oea->n_fixed == 0);
    705       VG_(removeIndexXA)(oea->edges.var, i);
    706    } else {
    707       vg_assert(i < oea->n_fixed);
    708       for (; i+1 < oea->n_fixed; i++) {
    709          oea->edges.fixed[i] = oea->edges.fixed[i+1];
    710       }
    711       oea->n_fixed--;
    712    }
    713 }
    714 
    715 static
    716 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
    717 {
    718    if (oea->has_var) {
    719       vg_assert(oea->n_fixed == 0);
    720       VG_(addToXA)(oea->edges.var, oe);
    721    } else {
    722       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
    723       if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
    724          /* The fixed array is full, so we have to initialise an
    725             XArray and copy the fixed array into it. */
    726          XArray *var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
    727                                   ttaux_free,
    728                                   sizeof(OutEdge));
    729          VG_(hintSizeXA) (var, oea->n_fixed+1);
    730          UWord i;
    731          for (i = 0; i < oea->n_fixed; i++) {
    732             VG_(addToXA)(var, &oea->edges.fixed[i]);
    733          }
    734          VG_(addToXA)(var, oe);
    735          oea->n_fixed = 0;
    736          oea->has_var = True;
    737          oea->edges.var = var;
    738       } else {
    739          /* Just add to the fixed array. */
    740          oea->edges.fixed[oea->n_fixed++] = *oe;
    741       }
    742    }
    743 }
    744 
    745 static
    746 Int HostExtent__cmpOrd ( const void* v1, const void* v2 )
    747 {
    748    const HostExtent* hx1 = v1;
    749    const HostExtent* hx2 = v2;
    750    if (hx1->start + hx1->len <= hx2->start) return -1;
    751    if (hx2->start + hx2->len <= hx1->start) return 1;
    752    return 0; /* partial overlap */
    753 }
    754 
    755 /* True if hx is a dead host extent, i.e. corresponds to host code
    756    of an entry that was invalidated. */
    757 static
    758 Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec)
    759 {
    760    const TTEno tteNo = hx->tteNo;
    761 #define LDEBUG(m) if (DEBUG_TRANSTAB)                           \
    762       VG_(printf) (m                                            \
    763                    " start 0x%p len %u sector %d ttslot %u"     \
    764                    " tt.entry 0x%lu tt.tcptr 0x%p\n",           \
    765                    hx->start, hx->len, (int)(sec - sectors),    \
    766                    hx->tteNo,                                   \
    767                    sec->ttC[tteNo].entry, sec->ttC[tteNo].tcptr)
    768 
    769    /* Entry might have been invalidated and not re-used yet.*/
    770    if (sec->ttH[tteNo].status == Deleted) {
    771       LDEBUG("found deleted entry");
    772       return True;
    773    }
    774    /* Maybe we found this entry via a host_extents which was
    775       inserted for an entry which was changed to Deleted then
    776       re-used after. If this entry was re-used, then its tcptr
    777       is >= to host_extents start (i.e. the previous tcptr) + len.
    778       This is the case as there is no re-use of host code: a new
    779       entry or re-used entry always gets "higher value" host code. */
    780    if ((UChar*) sec->ttC[tteNo].tcptr >= hx->start + hx->len) {
    781       LDEBUG("found re-used entry");
    782       return True;
    783    }
    784 
    785    return False;
    786 #undef LDEBUG
    787 }
    788 
    789 static __attribute__((noinline))
    790 Bool find_TTEntry_from_hcode( /*OUT*/SECno* from_sNo,
    791                               /*OUT*/TTEno* from_tteNo,
    792                               void* hcode )
    793 {
    794    SECno i;
    795 
    796    /* Search order logic copied from VG_(search_transtab). */
    797    for (i = 0; i < n_sectors; i++) {
    798       SECno sno = sector_search_order[i];
    799       if (UNLIKELY(sno == INV_SNO))
    800          return False; /* run out of sectors to search */
    801 
    802       const Sector* sec = &sectors[sno];
    803       const XArray* /* of HostExtent */ host_extents = sec->host_extents;
    804       vg_assert(host_extents);
    805 
    806       HostExtent key;
    807       VG_(memset)(&key, 0, sizeof(key));
    808       key.start = hcode;
    809       key.len = 1;
    810       Word firstW = -1, lastW = -1;
    811       Bool found  = VG_(lookupXA_UNSAFE)(
    812                        host_extents, &key, &firstW, &lastW,
    813                        HostExtent__cmpOrd );
    814       vg_assert(firstW == lastW); // always true, even if not found
    815       if (found) {
    816          HostExtent* hx = VG_(indexXA)(host_extents, firstW);
    817          TTEno tteNo = hx->tteNo;
    818          /* Do some additional sanity checks. */
    819          vg_assert(tteNo < N_TTES_PER_SECTOR);
    820 
    821          /* if this hx entry corresponds to dead host code, we must
    822             tell this code has not been found, as it cannot be patched. */
    823          if (HostExtent__is_dead (hx, sec))
    824             return False;
    825 
    826          vg_assert(sec->ttH[tteNo].status == InUse);
    827          /* Can only half check that the found TTEntry contains hcode,
    828             due to not having a length value for the hcode in the
    829             TTEntry. */
    830          vg_assert((UChar*)sec->ttC[tteNo].tcptr <= (UChar*)hcode);
    831          /* Looks plausible */
    832          *from_sNo   = sno;
    833          *from_tteNo = tteNo;
    834          return True;
    835       }
    836    }
    837    return False;
    838 }
    839 
    840 
    841 /* Figure out whether or not hcode is jitted code present in the main
    842    code cache (but not in the no-redir cache).  Used for sanity
    843    checking. */
    844 static Bool is_in_the_main_TC ( const void* hcode )
    845 {
    846    SECno i, sno;
    847    for (i = 0; i < n_sectors; i++) {
    848       sno = sector_search_order[i];
    849       if (sno == INV_SNO)
    850          break; /* run out of sectors to search */
    851       if ((const UChar*)hcode >= (const UChar*)sectors[sno].tc
    852           && (const UChar*)hcode <= (const UChar*)sectors[sno].tc_next
    853                               + sizeof(ULong) - 1)
    854          return True;
    855    }
    856    return False;
    857 }
    858 
    859 
    860 /* Fulfill a chaining request, and record admin info so we
    861    can undo it later, if required.
    862 */
    863 void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
    864                               SECno to_sNo,
    865                               TTEno to_tteNo,
    866                               Bool  to_fastEP )
    867 {
    868    /* Get the CPU info established at startup. */
    869    VexArch     arch_host = VexArch_INVALID;
    870    VexArchInfo archinfo_host;
    871    VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
    872    VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
    873    VexEndness endness_host = archinfo_host.endness;
    874 
    875    // host_code is where we're patching to.  So it needs to
    876    // take into account, whether we're jumping to the slow
    877    // or fast entry point.  By definition, the fast entry point
    878    // is exactly one event check's worth of code along from
    879    // the slow (tcptr) entry point.
    880    TTEntryC* to_tteC   = index_tteC(to_sNo, to_tteNo);
    881    void*     host_code = ((UChar*)to_tteC->tcptr)
    882                          + (to_fastEP ? LibVEX_evCheckSzB(arch_host) : 0);
    883 
    884    // stay sane -- the patch point (dst) is in this sector's code cache
    885    vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
    886    vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
    887                                    + sizeof(ULong) - 1 );
    888 
    889    /* Find the TTEntry for the from__ code.  This isn't simple since
    890       we only know the patch address, which is going to be somewhere
    891       inside the from_ block. */
    892    SECno from_sNo   = INV_SNO;
    893    TTEno from_tteNo = INV_TTE;
    894    Bool from_found
    895       = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
    896                                  from__patch_addr );
    897    if (!from_found) {
    898       // The from code might have been discarded due to sector re-use
    899       // or marked Deleted due to translation invalidation.
    900       // In such a case, don't do the chaining.
    901       VG_(debugLog)(1,"transtab",
    902                     "host code %p not found (discarded? sector recycled?)"
    903                     " => no chaining done\n",
    904                     from__patch_addr);
    905       return;
    906    }
    907 
    908    TTEntryC* from_tteC = index_tteC(from_sNo, from_tteNo);
    909 
    910    /* Get VEX to do the patching itself.  We have to hand it off
    911       since it is host-dependent. */
    912    VexInvalRange vir
    913       = LibVEX_Chain(
    914            arch_host, endness_host,
    915            from__patch_addr,
    916            VG_(fnptr_to_fnentry)(
    917               to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
    918                         : &VG_(disp_cp_chain_me_to_slowEP)),
    919            (void*)host_code
    920         );
    921    VG_(invalidate_icache)( (void*)vir.start, vir.len );
    922 
    923    /* Now do the tricky bit -- update the ch_succs and ch_preds info
    924       for the two translations involved, so we can undo the chaining
    925       later, which we will have to do if the to_ block gets removed
    926       for whatever reason. */
    927 
    928    /* This is the new from_ -> to_ link to add. */
    929    InEdge ie;
    930    InEdge__init(&ie);
    931    ie.from_sNo   = from_sNo;
    932    ie.from_tteNo = from_tteNo;
    933    ie.to_fastEP  = to_fastEP;
    934    HWord from_offs = (HWord)( (UChar*)from__patch_addr
    935                               - (UChar*)from_tteC->tcptr );
    936    vg_assert(from_offs < 100000/* let's say */);
    937    ie.from_offs  = (UInt)from_offs;
    938 
    939    /* This is the new to_ -> from_ backlink to add. */
    940    OutEdge oe;
    941    OutEdge__init(&oe);
    942    oe.to_sNo    = to_sNo;
    943    oe.to_tteNo  = to_tteNo;
    944    oe.from_offs = (UInt)from_offs;
    945 
    946    /* Add .. */
    947    InEdgeArr__add(&to_tteC->in_edges, &ie);
    948    OutEdgeArr__add(&from_tteC->out_edges, &oe);
    949 }
    950 
    951 
    952 /* Unchain one patch, as described by the specified InEdge.  For
    953    sanity check purposes only (to check that the patched location is
    954    as expected) it also requires the fast and slow entry point
    955    addresses of the destination block (that is, the block that owns
    956    this InEdge). */
    957 __attribute__((noinline))
    958 static void unchain_one ( VexArch arch_host, VexEndness endness_host,
    959                           InEdge* ie,
    960                           void* to_fastEPaddr, void* to_slowEPaddr )
    961 {
    962    vg_assert(ie);
    963    TTEntryC* tteC
    964       = index_tteC(ie->from_sNo, ie->from_tteNo);
    965    UChar* place_to_patch
    966       = ((UChar*)tteC->tcptr) + ie->from_offs;
    967    UChar* disp_cp_chain_me
    968       = VG_(fnptr_to_fnentry)(
    969            ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
    970                          : &VG_(disp_cp_chain_me_to_slowEP)
    971         );
    972    UChar* place_to_jump_to_EXPECTED
    973       = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
    974 
    975    // stay sane: both src and dst for this unchaining are
    976    // in the main code cache
    977    vg_assert( is_in_the_main_TC(place_to_patch) ); // src
    978    vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
    979    // dst check is ok because LibVEX_UnChain checks that
    980    // place_to_jump_to_EXPECTED really is the current dst, and
    981    // asserts if it isn't.
    982    VexInvalRange vir
    983        = LibVEX_UnChain( arch_host, endness_host, place_to_patch,
    984                          place_to_jump_to_EXPECTED, disp_cp_chain_me );
    985    VG_(invalidate_icache)( (void*)vir.start, vir.len );
    986 }
    987 
    988 
    989 /* The specified block is about to be deleted.  Update the preds and
    990    succs of its associated blocks accordingly.  This includes undoing
    991    any chained jumps to this block. */
    992 static
    993 void unchain_in_preparation_for_deletion ( VexArch arch_host,
    994                                            VexEndness endness_host,
    995                                            SECno here_sNo, TTEno here_tteNo )
    996 {
    997    if (DEBUG_TRANSTAB)
    998       VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo);
    999    UWord     i, j, n, m;
   1000    Int       evCheckSzB = LibVEX_evCheckSzB(arch_host);
   1001    TTEntryC* here_tteC  = index_tteC(here_sNo, here_tteNo);
   1002    TTEntryH* here_tteH  = index_tteH(here_sNo, here_tteNo);
   1003    if (DEBUG_TRANSTAB)
   1004       VG_(printf)("... QQQ tt.entry 0x%lu tt.tcptr 0x%p\n",
   1005                   here_tteC->entry, here_tteC->tcptr);
   1006    vg_assert(here_tteH->status == InUse);
   1007 
   1008    /* Visit all InEdges owned by here_tte. */
   1009    n = InEdgeArr__size(&here_tteC->in_edges);
   1010    for (i = 0; i < n; i++) {
   1011       InEdge* ie = InEdgeArr__index(&here_tteC->in_edges, i);
   1012       // Undo the chaining.
   1013       UChar* here_slow_EP = (UChar*)here_tteC->tcptr;
   1014       UChar* here_fast_EP = here_slow_EP + evCheckSzB;
   1015       unchain_one(arch_host, endness_host, ie, here_fast_EP, here_slow_EP);
   1016       // Find the corresponding entry in the "from" node's out_edges,
   1017       // and remove it.
   1018       TTEntryC* from_tteC = index_tteC(ie->from_sNo, ie->from_tteNo);
   1019       m = OutEdgeArr__size(&from_tteC->out_edges);
   1020       vg_assert(m > 0); // it must have at least one entry
   1021       for (j = 0; j < m; j++) {
   1022          OutEdge* oe = OutEdgeArr__index(&from_tteC->out_edges, j);
   1023          if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
   1024              && oe->from_offs == ie->from_offs)
   1025            break;
   1026       }
   1027       vg_assert(j < m); // "oe must be findable"
   1028       OutEdgeArr__deleteIndex(&from_tteC->out_edges, j);
   1029    }
   1030 
   1031    /* Visit all OutEdges owned by here_tte. */
   1032    n = OutEdgeArr__size(&here_tteC->out_edges);
   1033    for (i = 0; i < n; i++) {
   1034       OutEdge* oe = OutEdgeArr__index(&here_tteC->out_edges, i);
   1035       // Find the corresponding entry in the "to" node's in_edges,
   1036       // and remove it.
   1037       TTEntryC* to_tteC = index_tteC(oe->to_sNo, oe->to_tteNo);
   1038       m = InEdgeArr__size(&to_tteC->in_edges);
   1039       vg_assert(m > 0); // it must have at least one entry
   1040       for (j = 0; j < m; j++) {
   1041          InEdge* ie = InEdgeArr__index(&to_tteC->in_edges, j);
   1042          if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
   1043              && ie->from_offs == oe->from_offs)
   1044            break;
   1045       }
   1046       vg_assert(j < m); // "ie must be findable"
   1047       InEdgeArr__deleteIndex(&to_tteC->in_edges, j);
   1048    }
   1049 
   1050    InEdgeArr__makeEmpty(&here_tteC->in_edges);
   1051    OutEdgeArr__makeEmpty(&here_tteC->out_edges);
   1052 }
   1053 
   1054 
   1055 /*-------------------------------------------------------------*/
   1056 /*--- Address-range equivalence class stuff                 ---*/
   1057 /*-------------------------------------------------------------*/
   1058 
   1059 /* Return equivalence class number for a range. */
   1060 
   1061 static EClassNo range_to_eclass ( Addr start, UInt len )
   1062 {
   1063    UInt mask   = (1 << ECLASS_WIDTH) - 1;
   1064    UInt lo     = (UInt)start;
   1065    UInt hi     = lo + len - 1;
   1066    UInt loBits = (lo >> ECLASS_SHIFT) & mask;
   1067    UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
   1068    if (loBits == hiBits) {
   1069       vg_assert(loBits < ECLASS_N-1);
   1070       return loBits;
   1071    } else {
   1072       return ECLASS_MISC;
   1073    }
   1074 }
   1075 
   1076 
   1077 /* Calculates the equivalence class numbers for any VexGuestExtent.
   1078    These are written in *eclasses, which must be big enough to hold 3
   1079    Ints.  The number written, between 1 and 3, is returned.  The
   1080    eclasses are presented in order, and any duplicates are removed.
   1081 */
   1082 
   1083 static
   1084 Int vexGuestExtents_to_eclasses ( /*OUT*/EClassNo* eclasses,
   1085                                   const TTEntryH* tteH )
   1086 {
   1087 
   1088 #  define SWAP(_lv1,_lv2) \
   1089       do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
   1090 
   1091    UInt i, j, n_ec;
   1092    EClassNo r;
   1093 
   1094    vg_assert(tteH->vge_n_used >= 1 && tteH->vge_n_used <= 3);
   1095 
   1096    n_ec = 0;
   1097    for (i = 0; i < tteH->vge_n_used; i++) {
   1098       r = range_to_eclass( tteH->vge_base[i], tteH->vge_len[i] );
   1099       if (r == ECLASS_MISC)
   1100          goto bad;
   1101       /* only add if we haven't already seen it */
   1102       for (j = 0; j < n_ec; j++)
   1103          if (eclasses[j] == r)
   1104             break;
   1105       if (j == n_ec)
   1106          eclasses[n_ec++] = r;
   1107    }
   1108 
   1109    if (n_ec == 1)
   1110       return 1;
   1111 
   1112    if (n_ec == 2) {
   1113       /* sort */
   1114       if (eclasses[0] > eclasses[1])
   1115          SWAP(eclasses[0], eclasses[1]);
   1116       return 2;
   1117    }
   1118 
   1119    if (n_ec == 3) {
   1120       /* sort */
   1121       if (eclasses[0] > eclasses[2])
   1122          SWAP(eclasses[0], eclasses[2]);
   1123       if (eclasses[0] > eclasses[1])
   1124          SWAP(eclasses[0], eclasses[1]);
   1125       if (eclasses[1] > eclasses[2])
   1126          SWAP(eclasses[1], eclasses[2]);
   1127       return 3;
   1128    }
   1129 
   1130    /* NOTREACHED */
   1131    vg_assert(0);
   1132 
   1133   bad:
   1134    eclasses[0] = ECLASS_MISC;
   1135    return 1;
   1136 
   1137 #  undef SWAP
   1138 }
   1139 
   1140 
   1141 /* Add tteno to the set of entries listed for equivalence class ec in
   1142    this sector.  Returns used location in eclass array. */
   1143 
   1144 static
   1145 UInt addEClassNo ( /*MOD*/Sector* sec, EClassNo ec, TTEno tteno )
   1146 {
   1147    Int    old_sz, new_sz, i, r;
   1148    TTEno  *old_ar, *new_ar;
   1149 
   1150    vg_assert(ec >= 0 && ec < ECLASS_N);
   1151    vg_assert(tteno < N_TTES_PER_SECTOR);
   1152 
   1153    if (DEBUG_TRANSTAB) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
   1154 
   1155    if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
   1156 
   1157       vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
   1158 
   1159       old_sz = sec->ec2tte_size[ec];
   1160       old_ar = sec->ec2tte[ec];
   1161       new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
   1162       new_ar = ttaux_malloc("transtab.aECN.1",
   1163                             new_sz * sizeof(TTEno));
   1164       for (i = 0; i < old_sz; i++)
   1165          new_ar[i] = old_ar[i];
   1166       if (old_ar)
   1167          ttaux_free(old_ar);
   1168       sec->ec2tte_size[ec] = new_sz;
   1169       sec->ec2tte[ec] = new_ar;
   1170 
   1171       if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
   1172    }
   1173 
   1174    /* Common case */
   1175    r = sec->ec2tte_used[ec]++;
   1176    vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
   1177    sec->ec2tte[ec][r] = tteno;
   1178    return (UInt)r;
   1179 }
   1180 
   1181 
   1182 /* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
   1183    eclass entries to 'sec'. */
   1184 
   1185 static
   1186 void upd_eclasses_after_add ( /*MOD*/Sector* sec, TTEno tteno )
   1187 {
   1188    Int i, r;
   1189    EClassNo eclasses[3];
   1190    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
   1191 
   1192    TTEntryH* tteH = &sec->ttH[tteno];
   1193    r = vexGuestExtents_to_eclasses( eclasses, tteH );
   1194    vg_assert(r >= 1 && r <= 3);
   1195 
   1196    TTEntryC* tteC = &sec->ttC[tteno];
   1197    tteC->n_tte2ec = r;
   1198    for (i = 0; i < r; i++) {
   1199       tteC->tte2ec_ec[i] = eclasses[i];
   1200       tteC->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], tteno );
   1201    }
   1202 }
   1203 
   1204 
   1205 /* Check the eclass info in 'sec' to ensure it is consistent.  Returns
   1206    True if OK, False if something's not right.  Expensive. */
   1207 
   1208 static Bool sanity_check_eclasses_in_sector ( const Sector* sec )
   1209 {
   1210 #  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
   1211 
   1212    const HChar* whassup = NULL;
   1213    Int      j, k, n, ec_idx;
   1214    EClassNo i;
   1215    EClassNo ec_num;
   1216    TTEno    tteno;
   1217    ULong*   tce;
   1218 
   1219    /* Basic checks on this sector */
   1220    if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR)
   1221       BAD("invalid sec->tt_n_inuse");
   1222    tce = sec->tc_next;
   1223    if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
   1224       BAD("sec->tc_next points outside tc");
   1225 
   1226    /* For each eclass ... */
   1227    for (i = 0; i < ECLASS_N; i++) {
   1228       if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
   1229          BAD("ec2tte_size/ec2tte mismatch(1)");
   1230       if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
   1231          BAD("ec2tte_size/ec2tte mismatch(2)");
   1232       if (sec->ec2tte_used[i] < 0
   1233           || sec->ec2tte_used[i] > sec->ec2tte_size[i])
   1234          BAD("implausible ec2tte_used");
   1235       if (sec->ec2tte_used[i] == 0)
   1236          continue;
   1237 
   1238       /* For each tt reference in each eclass .. ensure the reference
   1239          is to a valid tt entry, and that the entry's address ranges
   1240          really include this eclass. */
   1241 
   1242       for (j = 0; j < sec->ec2tte_used[i]; j++) {
   1243          tteno = sec->ec2tte[i][j];
   1244          if (tteno == EC2TTE_DELETED)
   1245             continue;
   1246          if (tteno >= N_TTES_PER_SECTOR)
   1247             BAD("implausible tteno");
   1248          TTEntryC* tteC = &sec->ttC[tteno];
   1249          TTEntryH* tteH = &sec->ttH[tteno];
   1250          if (tteH->status != InUse)
   1251             BAD("tteno points to non-inuse tte");
   1252          if (tteC->n_tte2ec < 1 || tteC->n_tte2ec > 3)
   1253             BAD("tteC->n_tte2ec out of range");
   1254          /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
   1255             must equal i.  Inspect tte's eclass info. */
   1256          n = 0;
   1257          for (k = 0; k < tteC->n_tte2ec; k++) {
   1258             if (k < tteC->n_tte2ec-1
   1259                 && tteC->tte2ec_ec[k] >= tteC->tte2ec_ec[k+1])
   1260                BAD("tteC->tte2ec_ec[..] out of order");
   1261             ec_num = tteC->tte2ec_ec[k];
   1262             if (ec_num < 0 || ec_num >= ECLASS_N)
   1263                BAD("tteC->tte2ec_ec[..] out of range");
   1264             if (ec_num != i)
   1265                continue;
   1266             ec_idx = tteC->tte2ec_ix[k];
   1267             if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
   1268                BAD("tteC->tte2ec_ix[..] out of range");
   1269             if (ec_idx == j)
   1270                n++;
   1271          }
   1272          if (n != 1)
   1273             BAD("tteno does not point back at eclass");
   1274       }
   1275    }
   1276 
   1277    /* That establishes that for each forward pointer from TTEntrys
   1278       there is a corresponding backward pointer from the eclass[]
   1279       arrays.  However, it doesn't rule out the possibility of other,
   1280       bogus pointers in the eclass[] arrays.  So do those similarly:
   1281       scan through them and check the TTEntryies they point at point
   1282       back. */
   1283 
   1284    for (tteno = 0; tteno < N_TTES_PER_SECTOR; tteno++) {
   1285 
   1286       TTEntryC* tteC = &sec->ttC[tteno];
   1287       TTEntryH* tteH = &sec->ttH[tteno];
   1288       if (tteH->status == Empty || tteH->status == Deleted) {
   1289          if (tteC->n_tte2ec != 0)
   1290             BAD("tteC->n_tte2ec nonzero for unused tte");
   1291          continue;
   1292       }
   1293 
   1294       vg_assert(tteH->status == InUse);
   1295 
   1296       if (tteC->n_tte2ec < 1 || tteC->n_tte2ec > 3)
   1297          BAD("tteC->n_tte2ec out of range(2)");
   1298 
   1299       for (j = 0; j < tteC->n_tte2ec; j++) {
   1300          ec_num = tteC->tte2ec_ec[j];
   1301          if (ec_num < 0 || ec_num >= ECLASS_N)
   1302             BAD("tteC->tte2ec_ec[..] out of range");
   1303          ec_idx = tteC->tte2ec_ix[j];
   1304          if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
   1305             BAD("tteC->tte2ec_ix[..] out of range(2)");
   1306          if (sec->ec2tte[ec_num][ec_idx] != tteno)
   1307             BAD("ec2tte does not point back to tte");
   1308       }
   1309    }
   1310 
   1311    return True;
   1312 
   1313   bad:
   1314    if (whassup)
   1315       VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
   1316 
   1317 #  if 0
   1318    VG_(printf)("eclass = %d\n", i);
   1319    VG_(printf)("tteno = %d\n", (Int)tteno);
   1320    switch (tte->status) {
   1321       case InUse:   VG_(printf)("InUse\n"); break;
   1322       case Deleted: VG_(printf)("Deleted\n"); break;
   1323       case Empty:   VG_(printf)("Empty\n"); break;
   1324    }
   1325    if (tte->status != Empty) {
   1326       for (k = 0; k < tte->vge.n_used; k++)
   1327          VG_(printf)("0x%lx %u\n", tte->vge.base[k], (UInt)tte->vge.len[k]);
   1328    }
   1329 #  endif
   1330 
   1331    return False;
   1332 
   1333 #  undef BAD
   1334 }
   1335 
   1336 
   1337 /* Sanity check absolutely everything.  True == check passed. */
   1338 
   1339 /* forwards */
   1340 static Bool sanity_check_redir_tt_tc ( void );
   1341 
   1342 static Bool sanity_check_sector_search_order ( void )
   1343 {
   1344    SECno i, j, nListed;
   1345    /* assert the array is the right size */
   1346    vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order)
   1347                                / sizeof(sector_search_order[0])));
   1348    /* Check it's of the form  valid_sector_numbers ++ [INV_SNO, INV_SNO, ..] */
   1349    for (i = 0; i < n_sectors; i++) {
   1350       if (sector_search_order[i] == INV_SNO
   1351           || sector_search_order[i] >= n_sectors)
   1352          break;
   1353    }
   1354    nListed = i;
   1355    for (/* */; i < n_sectors; i++) {
   1356       if (sector_search_order[i] != INV_SNO)
   1357          break;
   1358    }
   1359    if (i != n_sectors)
   1360       return False;
   1361    /* Check each sector number only appears once */
   1362    for (i = 0; i < n_sectors; i++) {
   1363       if (sector_search_order[i] == INV_SNO)
   1364          continue;
   1365       for (j = i+1; j < n_sectors; j++) {
   1366          if (sector_search_order[j] == sector_search_order[i])
   1367             return False;
   1368       }
   1369    }
   1370    /* Check that the number of listed sectors equals the number
   1371       in use, by counting nListed back down. */
   1372    for (i = 0; i < n_sectors; i++) {
   1373       if (sectors[i].tc != NULL)
   1374          nListed--;
   1375    }
   1376    if (nListed != 0)
   1377       return False;
   1378    return True;
   1379 }
   1380 
   1381 static Bool sanity_check_all_sectors ( void )
   1382 {
   1383    SECno   sno;
   1384    Bool    sane;
   1385    Sector* sec;
   1386    for (sno = 0; sno < n_sectors; sno++) {
   1387       Int i;
   1388       Int nr_not_dead_hx = 0;
   1389       Int szhxa;
   1390       sec = &sectors[sno];
   1391       if (sec->tc == NULL)
   1392          continue;
   1393       sane = sanity_check_eclasses_in_sector( sec );
   1394       if (!sane)
   1395          return False;
   1396       szhxa = VG_(sizeXA)(sec->host_extents);
   1397       for (i = 0; i < szhxa; i++) {
   1398          const HostExtent* hx = VG_(indexXA)(sec->host_extents, i);
   1399          if (!HostExtent__is_dead (hx, sec))
   1400             nr_not_dead_hx++;
   1401       }
   1402       if (nr_not_dead_hx != sec->tt_n_inuse) {
   1403          VG_(debugLog)(0, "transtab",
   1404                        "nr_not_dead_hx %d sanity fail "
   1405                        "(expected == in use %d)\n",
   1406                        nr_not_dead_hx, sec->tt_n_inuse);
   1407          return False;
   1408       }
   1409    }
   1410 
   1411    if ( !sanity_check_redir_tt_tc() )
   1412       return False;
   1413    if ( !sanity_check_sector_search_order() )
   1414       return False;
   1415    return True;
   1416 }
   1417 
   1418 
   1419 
   1420 /*-------------------------------------------------------------*/
   1421 /*--- Add/find translations                                 ---*/
   1422 /*-------------------------------------------------------------*/
   1423 
   1424 static UInt vge_osize ( const VexGuestExtents* vge )
   1425 {
   1426    UInt i, n = 0;
   1427    for (i = 0; i < vge->n_used; i++)
   1428       n += (UInt)vge->len[i];
   1429    return n;
   1430 }
   1431 
   1432 static UInt TTEntryH__osize ( const TTEntryH* tteH )
   1433 {
   1434    UInt i, n = 0;
   1435    for (i = 0; i < tteH->vge_n_used; i++)
   1436       n += (UInt)tteH->vge_len[i];
   1437    return n;
   1438 }
   1439 
   1440 static Bool isValidSector ( SECno sector )
   1441 {
   1442    if (sector == INV_SNO || sector >= n_sectors)
   1443       return False;
   1444    return True;
   1445 }
   1446 
   1447 static inline HTTno HASH_TT ( Addr key )
   1448 {
   1449    UInt kHi = sizeof(Addr) == 4 ? 0 : (key >> 32);
   1450    UInt kLo = (UInt)key;
   1451    UInt k32 = kHi ^ kLo;
   1452    UInt ror = 7;
   1453    if (ror > 0)
   1454       k32 = (k32 >> ror) | (k32 << (32-ror));
   1455    return (HTTno)(k32 % N_HTTES_PER_SECTOR);
   1456 }
   1457 
   1458 static void setFastCacheEntry ( Addr key, ULong* tcptr )
   1459 {
   1460    UInt cno = (UInt)VG_TT_FAST_HASH(key);
   1461    VG_(tt_fast)[cno].guest = key;
   1462    VG_(tt_fast)[cno].host  = (Addr)tcptr;
   1463    n_fast_updates++;
   1464    /* This shouldn't fail.  It should be assured by m_translate
   1465       which should reject any attempt to make translation of code
   1466       starting at TRANSTAB_BOGUS_GUEST_ADDR. */
   1467    vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
   1468 }
   1469 
   1470 /* Invalidate the fast cache VG_(tt_fast). */
   1471 static void invalidateFastCache ( void )
   1472 {
   1473    UInt j;
   1474    /* This loop is popular enough to make it worth unrolling a
   1475       bit, at least on ppc32. */
   1476    vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
   1477    for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
   1478       VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
   1479       VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
   1480       VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
   1481       VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
   1482    }
   1483 
   1484    vg_assert(j == VG_TT_FAST_SIZE);
   1485    n_fast_flushes++;
   1486 }
   1487 
   1488 
   1489 static TTEno get_empty_tt_slot(SECno sNo)
   1490 {
   1491    TTEno i;
   1492 
   1493    i = sectors[sNo].empty_tt_list;
   1494    sectors[sNo].empty_tt_list = sectors[sNo].ttC[i].usage.next_empty_tte;
   1495 
   1496    vg_assert (i >= 0 && i < N_TTES_PER_SECTOR);
   1497 
   1498    return i;
   1499 }
   1500 
   1501 static void add_to_empty_tt_list (SECno sNo, TTEno tteno)
   1502 {
   1503    sectors[sNo].ttC[tteno].usage.next_empty_tte = sectors[sNo].empty_tt_list;
   1504    sectors[sNo].empty_tt_list = tteno;
   1505 }
   1506 
   1507 static void initialiseSector ( SECno sno )
   1508 {
   1509    UInt i;
   1510    SysRes  sres;
   1511    Sector* sec;
   1512    vg_assert(isValidSector(sno));
   1513 
   1514    { Bool sane = sanity_check_sector_search_order();
   1515      vg_assert(sane);
   1516    }
   1517    sec = &sectors[sno];
   1518 
   1519    if (sec->tc == NULL) {
   1520 
   1521       /* Sector has never been used before.  Need to allocate tt and
   1522          tc. */
   1523       vg_assert(sec->ttC == NULL);
   1524       vg_assert(sec->ttH == NULL);
   1525       vg_assert(sec->tc_next == NULL);
   1526       vg_assert(sec->tt_n_inuse == 0);
   1527       for (EClassNo e = 0; e < ECLASS_N; e++) {
   1528          vg_assert(sec->ec2tte_size[e] == 0);
   1529          vg_assert(sec->ec2tte_used[e] == 0);
   1530          vg_assert(sec->ec2tte[e] == NULL);
   1531       }
   1532       vg_assert(sec->host_extents == NULL);
   1533 
   1534       if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
   1535          VG_(dmsg)("transtab: " "allocate sector %d\n", sno);
   1536 
   1537       sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
   1538       if (sr_isError(sres)) {
   1539          VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
   1540                                      8 * tc_sector_szQ );
   1541 	 /*NOTREACHED*/
   1542       }
   1543       sec->tc = (ULong*)(Addr)sr_Res(sres);
   1544 
   1545       sres = VG_(am_mmap_anon_float_valgrind)
   1546                 ( N_TTES_PER_SECTOR * sizeof(TTEntryC) );
   1547       if (sr_isError(sres)) {
   1548          VG_(out_of_memory_NORETURN)("initialiseSector(TTC)",
   1549                                      N_TTES_PER_SECTOR * sizeof(TTEntryC) );
   1550 	 /*NOTREACHED*/
   1551       }
   1552       sec->ttC = (TTEntryC*)(Addr)sr_Res(sres);
   1553 
   1554       sres = VG_(am_mmap_anon_float_valgrind)
   1555                 ( N_TTES_PER_SECTOR * sizeof(TTEntryH) );
   1556       if (sr_isError(sres)) {
   1557          VG_(out_of_memory_NORETURN)("initialiseSector(TTH)",
   1558                                      N_TTES_PER_SECTOR * sizeof(TTEntryH) );
   1559 	 /*NOTREACHED*/
   1560       }
   1561       sec->ttH = (TTEntryH*)(Addr)sr_Res(sres);
   1562 
   1563       sec->empty_tt_list = HTT_EMPTY;
   1564       for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
   1565          sec->ttH[ei].status   = Empty;
   1566          sec->ttC[ei].n_tte2ec = 0;
   1567          add_to_empty_tt_list(sno, ei);
   1568       }
   1569 
   1570       sres = VG_(am_mmap_anon_float_valgrind)
   1571                 ( N_HTTES_PER_SECTOR * sizeof(TTEno) );
   1572       if (sr_isError(sres)) {
   1573          VG_(out_of_memory_NORETURN)("initialiseSector(HTT)",
   1574                                      N_HTTES_PER_SECTOR * sizeof(TTEno) );
   1575 	 /*NOTREACHED*/
   1576       }
   1577       sec->htt = (TTEno*)(Addr)sr_Res(sres);
   1578       for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
   1579          sec->htt[hi] = HTT_EMPTY;
   1580 
   1581       /* Set up the host_extents array. */
   1582       sec->host_extents
   1583          = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
   1584                       ttaux_free,
   1585                       sizeof(HostExtent));
   1586 
   1587       /* Add an entry in the sector_search_order */
   1588       for (i = 0; i < n_sectors; i++) {
   1589          if (sector_search_order[i] == INV_SNO)
   1590             break;
   1591       }
   1592       vg_assert(i >= 0 && i < n_sectors);
   1593       sector_search_order[i] = sno;
   1594 
   1595       if (VG_(clo_verbosity) > 2)
   1596          VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
   1597 
   1598    } else {
   1599 
   1600       /* Sector has been used before.  Dump the old contents. */
   1601       if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
   1602          VG_(dmsg)("transtab: " "recycle  sector %d\n", sno);
   1603       n_sectors_recycled++;
   1604 
   1605       vg_assert(sec->ttC != NULL);
   1606       vg_assert(sec->ttH != NULL);
   1607       vg_assert(sec->tc_next != NULL);
   1608       n_dump_count += sec->tt_n_inuse;
   1609 
   1610       VexArch     arch_host = VexArch_INVALID;
   1611       VexArchInfo archinfo_host;
   1612       VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
   1613       VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
   1614       VexEndness endness_host = archinfo_host.endness;
   1615 
   1616       /* Visit each just-about-to-be-abandoned translation. */
   1617       if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n",
   1618                                       sno);
   1619       sec->empty_tt_list = HTT_EMPTY;
   1620       for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
   1621          if (sec->ttH[ei].status == InUse) {
   1622             vg_assert(sec->ttC[ei].n_tte2ec >= 1);
   1623             vg_assert(sec->ttC[ei].n_tte2ec <= 3);
   1624             n_dump_osize += TTEntryH__osize(&sec->ttH[ei]);
   1625             /* Tell the tool too. */
   1626             if (VG_(needs).superblock_discards) {
   1627                VexGuestExtents vge_tmp;
   1628                TTEntryH__to_VexGuestExtents( &vge_tmp, &sec->ttH[ei] );
   1629                VG_TDICT_CALL( tool_discard_superblock_info,
   1630                               sec->ttC[ei].entry, vge_tmp );
   1631             }
   1632             unchain_in_preparation_for_deletion(arch_host,
   1633                                                 endness_host, sno, ei);
   1634          } else {
   1635             vg_assert(sec->ttC[ei].n_tte2ec == 0);
   1636          }
   1637          sec->ttH[ei].status   = Empty;
   1638          sec->ttC[ei].n_tte2ec = 0;
   1639          add_to_empty_tt_list(sno, ei);
   1640       }
   1641       for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
   1642          sec->htt[hi] = HTT_EMPTY;
   1643 
   1644       if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n",
   1645                                       sno);
   1646 
   1647       /* Free up the eclass structures. */
   1648       for (EClassNo e = 0; e < ECLASS_N; e++) {
   1649          if (sec->ec2tte_size[e] == 0) {
   1650             vg_assert(sec->ec2tte_used[e] == 0);
   1651             vg_assert(sec->ec2tte[e] == NULL);
   1652          } else {
   1653             vg_assert(sec->ec2tte[e] != NULL);
   1654             ttaux_free(sec->ec2tte[e]);
   1655             sec->ec2tte[e] = NULL;
   1656             sec->ec2tte_size[e] = 0;
   1657             sec->ec2tte_used[e] = 0;
   1658          }
   1659       }
   1660 
   1661       /* Empty out the host extents array. */
   1662       vg_assert(sec->host_extents != NULL);
   1663       VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
   1664       vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
   1665 
   1666       /* Sanity check: ensure it is already in
   1667          sector_search_order[]. */
   1668       SECno ix;
   1669       for (ix = 0; ix < n_sectors; ix++) {
   1670          if (sector_search_order[ix] == sno)
   1671             break;
   1672       }
   1673       vg_assert(ix >= 0 && ix < n_sectors);
   1674 
   1675       if (VG_(clo_verbosity) > 2)
   1676          VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
   1677    }
   1678 
   1679    sec->tc_next = sec->tc;
   1680    sec->tt_n_inuse = 0;
   1681 
   1682    invalidateFastCache();
   1683 
   1684    { Bool sane = sanity_check_sector_search_order();
   1685      vg_assert(sane);
   1686    }
   1687 }
   1688 
   1689 /* Add a translation of vge to TT/TC.  The translation is temporarily
   1690    in code[0 .. code_len-1].
   1691 
   1692    pre: youngest_sector points to a valid (although possibly full)
   1693    sector.
   1694 */
   1695 void VG_(add_to_transtab)( const VexGuestExtents* vge,
   1696                            Addr             entry,
   1697                            Addr             code,
   1698                            UInt             code_len,
   1699                            Bool             is_self_checking,
   1700                            Int              offs_profInc,
   1701                            UInt             n_guest_instrs )
   1702 {
   1703    Int    tcAvailQ, reqdQ, y;
   1704    ULong  *tcptr, *tcptr2;
   1705    UChar* srcP;
   1706    UChar* dstP;
   1707 
   1708    vg_assert(init_done);
   1709    vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
   1710 
   1711    /* 60000: should agree with N_TMPBUF in m_translate.c. */
   1712    vg_assert(code_len > 0 && code_len < 60000);
   1713 
   1714    /* Generally stay sane */
   1715    vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
   1716 
   1717    if (DEBUG_TRANSTAB)
   1718       VG_(printf)("add_to_transtab(entry = 0x%lx, len = %u) ...\n",
   1719                   entry, code_len);
   1720 
   1721    n_in_count++;
   1722    n_in_tsize += code_len;
   1723    n_in_osize += vge_osize(vge);
   1724    if (is_self_checking)
   1725       n_in_sc_count++;
   1726 
   1727    y = youngest_sector;
   1728    vg_assert(isValidSector(y));
   1729 
   1730    if (sectors[y].tc == NULL)
   1731       initialiseSector(y);
   1732 
   1733    /* Try putting the translation in this sector. */
   1734    reqdQ = (code_len + 7) >> 3;
   1735 
   1736    /* Will it fit in tc? */
   1737    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
   1738               - ((ULong*)(sectors[y].tc_next));
   1739    vg_assert(tcAvailQ >= 0);
   1740    vg_assert(tcAvailQ <= tc_sector_szQ);
   1741 
   1742    if (tcAvailQ < reqdQ
   1743        || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR) {
   1744       /* No.  So move on to the next sector.  Either it's never been
   1745          used before, in which case it will get its tt/tc allocated
   1746          now, or it has been used before, in which case it is set to be
   1747          empty, hence throwing out the oldest sector. */
   1748       vg_assert(tc_sector_szQ > 0);
   1749       Int tt_loading_pct = (100 * sectors[y].tt_n_inuse)
   1750                            / N_HTTES_PER_SECTOR;
   1751       Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ))
   1752                            / tc_sector_szQ;
   1753       if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) {
   1754          VG_(dmsg)("transtab: "
   1755                    "declare  sector %d full "
   1756                    "(TT loading %2d%%, TC loading %2d%%, avg tce size %d)\n",
   1757                    y, tt_loading_pct, tc_loading_pct,
   1758                    8 * (tc_sector_szQ - tcAvailQ)/sectors[y].tt_n_inuse);
   1759       }
   1760       youngest_sector++;
   1761       if (youngest_sector >= n_sectors)
   1762          youngest_sector = 0;
   1763       y = youngest_sector;
   1764       initialiseSector(y);
   1765    }
   1766 
   1767    /* Be sure ... */
   1768    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
   1769               - ((ULong*)(sectors[y].tc_next));
   1770    vg_assert(tcAvailQ >= 0);
   1771    vg_assert(tcAvailQ <= tc_sector_szQ);
   1772    vg_assert(tcAvailQ >= reqdQ);
   1773    vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR);
   1774    vg_assert(sectors[y].tt_n_inuse >= 0);
   1775 
   1776    /* Copy into tc. */
   1777    tcptr = sectors[y].tc_next;
   1778    vg_assert(tcptr >= &sectors[y].tc[0]);
   1779    vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
   1780 
   1781    dstP = (UChar*)tcptr;
   1782    srcP = (UChar*)code;
   1783    VG_(memcpy)(dstP, srcP, code_len);
   1784    sectors[y].tc_next += reqdQ;
   1785    sectors[y].tt_n_inuse++;
   1786 
   1787    /* more paranoia */
   1788    tcptr2 = sectors[y].tc_next;
   1789    vg_assert(tcptr2 >= &sectors[y].tc[0]);
   1790    vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
   1791 
   1792    /* Find an empty tt slot, and use it.  There must be such a slot
   1793       since tt is never allowed to get completely full. */
   1794    TTEno tteix = get_empty_tt_slot(y);
   1795    TTEntryC__init(&sectors[y].ttC[tteix]);
   1796    TTEntryH__init(&sectors[y].ttH[tteix]);
   1797    sectors[y].ttC[tteix].tcptr  = tcptr;
   1798    sectors[y].ttC[tteix].usage.prof.count  = 0;
   1799    sectors[y].ttC[tteix].usage.prof.weight =
   1800       n_guest_instrs == 0 ? 1 : n_guest_instrs;
   1801    sectors[y].ttC[tteix].entry  = entry;
   1802    TTEntryH__from_VexGuestExtents( &sectors[y].ttH[tteix], vge );
   1803    sectors[y].ttH[tteix].status = InUse;
   1804 
   1805    // Point an htt entry to the tt slot
   1806    HTTno htti = HASH_TT(entry);
   1807    vg_assert(htti >= 0 && htti < N_HTTES_PER_SECTOR);
   1808    while (True) {
   1809       if (sectors[y].htt[htti] == HTT_EMPTY
   1810           || sectors[y].htt[htti] == HTT_DELETED)
   1811          break;
   1812       htti++;
   1813       if (htti >= N_HTTES_PER_SECTOR)
   1814          htti = 0;
   1815    }
   1816    sectors[y].htt[htti] = tteix;
   1817 
   1818    /* Patch in the profile counter location, if necessary. */
   1819    if (offs_profInc != -1) {
   1820       vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
   1821       VexArch     arch_host = VexArch_INVALID;
   1822       VexArchInfo archinfo_host;
   1823       VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
   1824       VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
   1825       VexEndness endness_host = archinfo_host.endness;
   1826       VexInvalRange vir
   1827          = LibVEX_PatchProfInc( arch_host, endness_host,
   1828                                 dstP + offs_profInc,
   1829                                 &sectors[y].ttC[tteix].usage.prof.count );
   1830       VG_(invalidate_icache)( (void*)vir.start, vir.len );
   1831    }
   1832 
   1833    VG_(invalidate_icache)( dstP, code_len );
   1834 
   1835    /* Add this entry to the host_extents map, checking that we're
   1836       adding in order. */
   1837    { HostExtent hx;
   1838      hx.start = (UChar*)tcptr;
   1839      hx.len   = code_len;
   1840      hx.tteNo = tteix;
   1841      vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
   1842      XArray* hx_array = sectors[y].host_extents;
   1843      vg_assert(hx_array);
   1844      Word n = VG_(sizeXA)(hx_array);
   1845      if (n > 0) {
   1846         HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
   1847         vg_assert(hx_prev->start + hx_prev->len <= hx.start);
   1848      }
   1849      VG_(addToXA)(hx_array, &hx);
   1850      if (DEBUG_TRANSTAB)
   1851         VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n",
   1852                     hx.start, hx.len, y, tteix);
   1853    }
   1854 
   1855    /* Update the fast-cache. */
   1856    setFastCacheEntry( entry, tcptr );
   1857 
   1858    /* Note the eclass numbers for this translation. */
   1859    upd_eclasses_after_add( &sectors[y], tteix );
   1860 }
   1861 
   1862 
   1863 /* Search for the translation of the given guest address.  If
   1864    requested, a successful search can also cause the fast-caches to be
   1865    updated.
   1866 */
   1867 Bool VG_(search_transtab) ( /*OUT*/Addr*  res_hcode,
   1868                             /*OUT*/SECno* res_sNo,
   1869                             /*OUT*/TTEno* res_tteNo,
   1870                             Addr          guest_addr,
   1871                             Bool          upd_cache )
   1872 {
   1873    SECno i, sno;
   1874    HTTno j, k, kstart;
   1875    TTEno tti;
   1876 
   1877    vg_assert(init_done);
   1878    /* Find the initial probe point just once.  It will be the same in
   1879       all sectors and avoids multiple expensive % operations. */
   1880    n_full_lookups++;
   1881    kstart = HASH_TT(guest_addr);
   1882    vg_assert(kstart >= 0 && kstart < N_HTTES_PER_SECTOR);
   1883 
   1884    /* Search in all the sectors,using sector_search_order[] as a
   1885       heuristic guide as to what order to visit the sectors. */
   1886    for (i = 0; i < n_sectors; i++) {
   1887 
   1888       sno = sector_search_order[i];
   1889       if (UNLIKELY(sno == INV_SNO))
   1890          return False; /* run out of sectors to search */
   1891 
   1892       k = kstart;
   1893       for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
   1894          n_lookup_probes++;
   1895          tti = sectors[sno].htt[k];
   1896          if (tti < N_TTES_PER_SECTOR
   1897              && sectors[sno].ttC[tti].entry == guest_addr) {
   1898             /* found it */
   1899             if (upd_cache)
   1900                setFastCacheEntry(
   1901                   guest_addr, sectors[sno].ttC[tti].tcptr );
   1902             if (res_hcode)
   1903                *res_hcode = (Addr)sectors[sno].ttC[tti].tcptr;
   1904             if (res_sNo)
   1905                *res_sNo = sno;
   1906             if (res_tteNo)
   1907                *res_tteNo = tti;
   1908             /* pull this one one step closer to the front.  For large
   1909                apps this more or less halves the number of required
   1910                probes. */
   1911             if (i > 0) {
   1912                Int tmp = sector_search_order[i-1];
   1913                sector_search_order[i-1] = sector_search_order[i];
   1914                sector_search_order[i] = tmp;
   1915             }
   1916             return True;
   1917          }
   1918          // tti is HTT_EMPTY or HTT_DELETED or not the entry of guest_addr
   1919          if (sectors[sno].htt[k] == HTT_EMPTY)
   1920             break; /* not found in this sector */
   1921          k++;
   1922          if (k == N_HTTES_PER_SECTOR)
   1923             k = 0;
   1924       }
   1925 
   1926       /* If we fall off the end, all entries are InUse and not
   1927          matching, or Deleted.  In any case we did not find it in this
   1928          sector. */
   1929    }
   1930 
   1931    /* Not found in any sector. */
   1932    return False;
   1933 }
   1934 
   1935 
   1936 /*-------------------------------------------------------------*/
   1937 /*--- Delete translations.                                  ---*/
   1938 /*-------------------------------------------------------------*/
   1939 
   1940 /* forward */
   1941 static void unredir_discard_translations( Addr, ULong );
   1942 
   1943 /* Stuff for deleting translations which intersect with a given
   1944    address range.  Unfortunately, to make this run at a reasonable
   1945    speed, it is complex. */
   1946 
   1947 static inline
   1948 Bool overlap1 ( Addr s1, ULong r1, Addr s2, ULong r2 )
   1949 {
   1950    Addr e1 = s1 + r1 - 1;
   1951    Addr e2 = s2 + r2 - 1;
   1952    if (e1 < s2 || e2 < s1)
   1953       return False;
   1954    return True;
   1955 }
   1956 
   1957 static inline
   1958 Bool overlaps ( Addr start, ULong range, const TTEntryH* tteH )
   1959 {
   1960    if (overlap1(start, range, tteH->vge_base[0], tteH->vge_len[0]))
   1961       return True;
   1962    if (tteH->vge_n_used < 2)
   1963       return False;
   1964    if (overlap1(start, range, tteH->vge_base[1], tteH->vge_len[1]))
   1965       return True;
   1966    if (tteH->vge_n_used < 3)
   1967       return False;
   1968    if (overlap1(start, range, tteH->vge_base[2], tteH->vge_len[2]))
   1969       return True;
   1970    return False;
   1971 }
   1972 
   1973 
   1974 /* Delete a tt entry, and update all the eclass data accordingly. */
   1975 
   1976 static void delete_tte ( /*MOD*/Sector* sec, SECno secNo, TTEno tteno,
   1977                          VexArch arch_host, VexEndness endness_host )
   1978 {
   1979    Int      i, ec_idx;
   1980    EClassNo ec_num;
   1981 
   1982    /* sec and secNo are mutually redundant; cross-check. */
   1983    vg_assert(sec == &sectors[secNo]);
   1984 
   1985    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
   1986    TTEntryC* tteC = &sec->ttC[tteno];
   1987    TTEntryH* tteH = &sec->ttH[tteno];
   1988    vg_assert(tteH->status == InUse);
   1989    vg_assert(tteC->n_tte2ec >= 1 && tteC->n_tte2ec <= 3);
   1990 
   1991    /* Unchain .. */
   1992    unchain_in_preparation_for_deletion(arch_host, endness_host, secNo, tteno);
   1993 
   1994    /* Deal with the ec-to-tte links first. */
   1995    for (i = 0; i < tteC->n_tte2ec; i++) {
   1996       ec_num = tteC->tte2ec_ec[i];
   1997       ec_idx = tteC->tte2ec_ix[i];
   1998       vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
   1999       vg_assert(ec_idx >= 0);
   2000       vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
   2001       /* Assert that the two links point at each other. */
   2002       vg_assert(sec->ec2tte[ec_num][ec_idx] == tteno);
   2003       /* "delete" the pointer back to here. */
   2004       sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
   2005    }
   2006 
   2007    /* Now fix up this TTEntry. */
   2008    /* Mark the entry as deleted in htt.
   2009       Note: we could avoid the below hash table search by
   2010       adding a reference from tte to its hash position in tt. */
   2011    HTTno j;
   2012    HTTno k = HASH_TT(tteC->entry);
   2013    vg_assert(k >= 0 && k < N_HTTES_PER_SECTOR);
   2014    for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
   2015       if (sec->htt[k] == tteno)
   2016          break;
   2017       k++;
   2018       if (k == N_HTTES_PER_SECTOR)
   2019          k = 0;
   2020    }
   2021    vg_assert(j < N_HTTES_PER_SECTOR);
   2022    sec->htt[k]    = HTT_DELETED;
   2023    tteH->status   = Deleted;
   2024    tteC->n_tte2ec = 0;
   2025    add_to_empty_tt_list(secNo, tteno);
   2026 
   2027    /* Stats .. */
   2028    sec->tt_n_inuse--;
   2029    n_disc_count++;
   2030    n_disc_osize += TTEntryH__osize(tteH);
   2031 
   2032    /* Tell the tool too. */
   2033    if (VG_(needs).superblock_discards) {
   2034       VexGuestExtents vge_tmp;
   2035       TTEntryH__to_VexGuestExtents( &vge_tmp, tteH );
   2036       VG_TDICT_CALL( tool_discard_superblock_info, tteC->entry, vge_tmp );
   2037    }
   2038 }
   2039 
   2040 
   2041 /* Delete translations from sec which intersect specified range, but
   2042    only consider translations in the specified eclass. */
   2043 
   2044 static
   2045 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, SECno secNo,
   2046                                             Addr guest_start, ULong range,
   2047                                             EClassNo ec,
   2048                                             VexArch arch_host,
   2049                                             VexEndness endness_host )
   2050 {
   2051    Int      i;
   2052    TTEno    tteno;
   2053    Bool     anyDeld = False;
   2054 
   2055    vg_assert(ec >= 0 && ec < ECLASS_N);
   2056 
   2057    for (i = 0; i < sec->ec2tte_used[ec]; i++) {
   2058 
   2059       tteno = sec->ec2tte[ec][i];
   2060       if (tteno == EC2TTE_DELETED) {
   2061          /* already deleted */
   2062          continue;
   2063       }
   2064 
   2065       vg_assert(tteno < N_TTES_PER_SECTOR);
   2066 
   2067       TTEntryH* tteH = &sec->ttH[tteno];
   2068       vg_assert(tteH->status == InUse);
   2069 
   2070       if (overlaps( guest_start, range, tteH )) {
   2071          anyDeld = True;
   2072          delete_tte( sec, secNo, tteno, arch_host, endness_host );
   2073       }
   2074 
   2075    }
   2076 
   2077    return anyDeld;
   2078 }
   2079 
   2080 
   2081 /* Delete translations from sec which intersect specified range, the
   2082    slow way, by inspecting all translations in sec. */
   2083 
   2084 static
   2085 Bool delete_translations_in_sector ( /*MOD*/Sector* sec, SECno secNo,
   2086                                      Addr guest_start, ULong range,
   2087                                      VexArch arch_host,
   2088                                      VexEndness endness_host )
   2089 {
   2090    TTEno i;
   2091    Bool  anyDeld = False;
   2092 
   2093    for (i = 0; i < N_TTES_PER_SECTOR; i++) {
   2094       /* The entire and only purpose of splitting TTEntry into cold
   2095          and hot parts (TTEntryC and TTEntryH) is to make this search
   2096          loop run as fast as possible, by avoiding having to pull all
   2097          of the cold data up the memory hierarchy. */
   2098       if (UNLIKELY(sec->ttH[i].status == InUse
   2099                    && overlaps( guest_start, range, &sec->ttH[i] ))) {
   2100          anyDeld = True;
   2101          delete_tte( sec, secNo, i, arch_host, endness_host );
   2102       }
   2103    }
   2104 
   2105    return anyDeld;
   2106 }
   2107 
   2108 
   2109 void VG_(discard_translations) ( Addr guest_start, ULong range,
   2110                                  const HChar* who )
   2111 {
   2112    Sector* sec;
   2113    SECno   sno;
   2114    EClassNo ec;
   2115    Bool    anyDeleted = False;
   2116 
   2117    vg_assert(init_done);
   2118 
   2119    VG_(debugLog)(2, "transtab",
   2120                     "discard_translations(0x%lx, %llu) req by %s\n",
   2121                     guest_start, range, who );
   2122 
   2123    /* Pre-deletion sanity check */
   2124    if (VG_(clo_sanity_level) >= 4) {
   2125       Bool sane = sanity_check_all_sectors();
   2126       vg_assert(sane);
   2127    }
   2128 
   2129    if (range == 0)
   2130       return;
   2131 
   2132    VexArch     arch_host = VexArch_INVALID;
   2133    VexArchInfo archinfo_host;
   2134    VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
   2135    VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
   2136    VexEndness endness_host = archinfo_host.endness;
   2137 
   2138    /* There are two different ways to do this.
   2139 
   2140       If the range fits within a single address-range equivalence
   2141       class, as will be the case for a cache line sized invalidation,
   2142       then we only have to inspect the set of translations listed in
   2143       that equivalence class, and also in the "sin-bin" equivalence
   2144       class ECLASS_MISC.
   2145 
   2146       Otherwise, the invalidation is of a larger range and probably
   2147       results from munmap.  In this case it's (probably!) faster just
   2148       to inspect all translations, dump those we don't want, and
   2149       regenerate the equivalence class information (since modifying it
   2150       in-situ is even more expensive).
   2151    */
   2152 
   2153    /* First off, figure out if the range falls within a single class,
   2154       and if so which one. */
   2155 
   2156    ec = ECLASS_MISC;
   2157    if (range <= (1ULL << ECLASS_SHIFT))
   2158       ec = range_to_eclass( guest_start, (UInt)range );
   2159 
   2160    /* if ec is ECLASS_MISC then we aren't looking at just a single
   2161       class, so use the slow scheme.  Else use the fast scheme,
   2162       examining 'ec' and ECLASS_MISC. */
   2163 
   2164    if (ec != ECLASS_MISC) {
   2165 
   2166       VG_(debugLog)(2, "transtab",
   2167                        "                    FAST, ec = %d\n", ec);
   2168 
   2169       /* Fast scheme */
   2170       vg_assert(ec >= 0 && ec < ECLASS_MISC);
   2171 
   2172       for (sno = 0; sno < n_sectors; sno++) {
   2173          sec = &sectors[sno];
   2174          if (sec->tc == NULL)
   2175             continue;
   2176          anyDeleted |= delete_translations_in_sector_eclass(
   2177                           sec, sno, guest_start, range, ec,
   2178                           arch_host, endness_host
   2179                        );
   2180          anyDeleted |= delete_translations_in_sector_eclass(
   2181                           sec, sno, guest_start, range, ECLASS_MISC,
   2182                           arch_host, endness_host
   2183                        );
   2184       }
   2185 
   2186    } else {
   2187 
   2188       /* slow scheme */
   2189 
   2190       VG_(debugLog)(2, "transtab",
   2191                        "                    SLOW, ec = %d\n", ec);
   2192 
   2193       for (sno = 0; sno < n_sectors; sno++) {
   2194          sec = &sectors[sno];
   2195          if (sec->tc == NULL)
   2196             continue;
   2197          anyDeleted |= delete_translations_in_sector(
   2198                           sec, sno, guest_start, range,
   2199                           arch_host, endness_host
   2200                        );
   2201       }
   2202 
   2203    }
   2204 
   2205    if (anyDeleted)
   2206       invalidateFastCache();
   2207 
   2208    /* don't forget the no-redir cache */
   2209    unredir_discard_translations( guest_start, range );
   2210 
   2211    /* Post-deletion sanity check */
   2212    if (VG_(clo_sanity_level) >= 4) {
   2213       TTEno i;
   2214       Bool  sane = sanity_check_all_sectors();
   2215       vg_assert(sane);
   2216       /* But now, also check the requested address range isn't
   2217          present anywhere. */
   2218       for (sno = 0; sno < n_sectors; sno++) {
   2219          sec = &sectors[sno];
   2220          if (sec->tc == NULL)
   2221             continue;
   2222          for (i = 0; i < N_TTES_PER_SECTOR; i++) {
   2223             TTEntryH* tteH = &sec->ttH[i];
   2224             if (tteH->status != InUse)
   2225                continue;
   2226             vg_assert(!overlaps( guest_start, range, tteH ));
   2227          }
   2228       }
   2229    }
   2230 }
   2231 
   2232 /* Whether or not tools may discard translations. */
   2233 Bool  VG_(ok_to_discard_translations) = False;
   2234 
   2235 /* This function is exported to tools which can use it to discard
   2236    translations, provided it is safe to do so. */
   2237 void VG_(discard_translations_safely) ( Addr  start, SizeT len,
   2238                                         const HChar* who )
   2239 {
   2240    vg_assert(VG_(ok_to_discard_translations));
   2241    VG_(discard_translations)(start, len, who);
   2242 }
   2243 
   2244 /*------------------------------------------------------------*/
   2245 /*--- AUXILIARY: the unredirected TT/TC                    ---*/
   2246 /*------------------------------------------------------------*/
   2247 
   2248 /* A very simple translation cache which holds a small number of
   2249    unredirected translations.  This is completely independent of the
   2250    main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
   2251    both structures are simply dumped and we start over.
   2252 
   2253    Since these translations are unredirected, the search key is (by
   2254    definition) the first address entry in the .vge field. */
   2255 
   2256 /* Sized to hold 500 translations of average size 1000 bytes. */
   2257 
   2258 #define UNREDIR_SZB   1000
   2259 
   2260 #define N_UNREDIR_TT  500
   2261 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
   2262 
   2263 typedef
   2264    struct {
   2265       VexGuestExtents vge;
   2266       Addr            hcode;
   2267       Bool            inUse;
   2268    }
   2269    UTCEntry;
   2270 
   2271 /* We just allocate forwards in _tc, never deleting. */
   2272 static ULong    *unredir_tc;
   2273 static Int      unredir_tc_used = N_UNREDIR_TCQ;
   2274 
   2275 /* Slots in _tt can come into use and out again (.inUse).
   2276    Nevertheless _tt_highwater is maintained so that invalidations
   2277    don't have to scan all the slots when only a few are in use.
   2278    _tt_highwater holds the index of the highest ever allocated
   2279    slot. */
   2280 static UTCEntry unredir_tt[N_UNREDIR_TT];
   2281 static Int      unredir_tt_highwater;
   2282 
   2283 
   2284 static void init_unredir_tt_tc ( void )
   2285 {
   2286    Int i;
   2287    if (unredir_tc == NULL) {
   2288       SysRes sres = VG_(am_mmap_anon_float_valgrind)
   2289                        ( N_UNREDIR_TT * UNREDIR_SZB );
   2290       if (sr_isError(sres)) {
   2291          VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
   2292                                      N_UNREDIR_TT * UNREDIR_SZB);
   2293          /*NOTREACHED*/
   2294       }
   2295       unredir_tc = (ULong *)(Addr)sr_Res(sres);
   2296    }
   2297    unredir_tc_used = 0;
   2298    for (i = 0; i < N_UNREDIR_TT; i++)
   2299       unredir_tt[i].inUse = False;
   2300    unredir_tt_highwater = -1;
   2301 }
   2302 
   2303 /* Do a sanity check; return False on failure. */
   2304 static Bool sanity_check_redir_tt_tc ( void )
   2305 {
   2306    Int i;
   2307    if (unredir_tt_highwater < -1) return False;
   2308    if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
   2309 
   2310    for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
   2311       if (unredir_tt[i].inUse)
   2312          return False;
   2313 
   2314    if (unredir_tc_used < 0) return False;
   2315    if (unredir_tc_used > N_UNREDIR_TCQ) return False;
   2316 
   2317    return True;
   2318 }
   2319 
   2320 
   2321 /* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
   2322    is temporarily in code[0 .. code_len-1].
   2323 */
   2324 void VG_(add_to_unredir_transtab)( const VexGuestExtents* vge,
   2325                                    Addr             entry,
   2326                                    Addr             code,
   2327                                    UInt             code_len )
   2328 {
   2329    Int   i, j, code_szQ;
   2330    HChar *srcP, *dstP;
   2331 
   2332    vg_assert(sanity_check_redir_tt_tc());
   2333 
   2334    /* This is the whole point: it's not redirected! */
   2335    vg_assert(entry == vge->base[0]);
   2336 
   2337    /* How many unredir_tt slots are needed */
   2338    code_szQ = (code_len + 7) / 8;
   2339 
   2340    /* Look for an empty unredir_tc slot */
   2341    for (i = 0; i < N_UNREDIR_TT; i++)
   2342       if (!unredir_tt[i].inUse)
   2343          break;
   2344 
   2345    if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
   2346       /* It's full; dump everything we currently have */
   2347       init_unredir_tt_tc();
   2348       i = 0;
   2349    }
   2350 
   2351    vg_assert(unredir_tc_used >= 0);
   2352    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
   2353    vg_assert(code_szQ > 0);
   2354    vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
   2355    vg_assert(i >= 0 && i < N_UNREDIR_TT);
   2356    vg_assert(unredir_tt[i].inUse == False);
   2357 
   2358    if (i > unredir_tt_highwater)
   2359       unredir_tt_highwater = i;
   2360 
   2361    dstP = (HChar*)&unredir_tc[unredir_tc_used];
   2362    srcP = (HChar*)code;
   2363    for (j = 0; j < code_len; j++)
   2364       dstP[j] = srcP[j];
   2365 
   2366    VG_(invalidate_icache)( dstP, code_len );
   2367 
   2368    unredir_tt[i].inUse = True;
   2369    unredir_tt[i].vge   = *vge;
   2370    unredir_tt[i].hcode = (Addr)dstP;
   2371 
   2372    unredir_tc_used += code_szQ;
   2373    vg_assert(unredir_tc_used >= 0);
   2374    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
   2375 
   2376    vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
   2377 }
   2378 
   2379 Bool VG_(search_unredir_transtab) ( /*OUT*/Addr*  result,
   2380                                     Addr          guest_addr )
   2381 {
   2382    Int i;
   2383    for (i = 0; i < N_UNREDIR_TT; i++) {
   2384       if (!unredir_tt[i].inUse)
   2385          continue;
   2386       if (unredir_tt[i].vge.base[0] == guest_addr) {
   2387          *result = unredir_tt[i].hcode;
   2388          return True;
   2389       }
   2390    }
   2391    return False;
   2392 }
   2393 
   2394 static void unredir_discard_translations( Addr guest_start, ULong range )
   2395 {
   2396    Int i;
   2397 
   2398    vg_assert(sanity_check_redir_tt_tc());
   2399 
   2400    for (i = 0; i <= unredir_tt_highwater; i++) {
   2401       if (unredir_tt[i].inUse) {
   2402          /* Fake up a TTEntryH just so we can pass it to overlaps()
   2403             without having to make a new version of overlaps() just for
   2404             this rare case. */
   2405          TTEntryH tmp_tteH;
   2406          TTEntryH__from_VexGuestExtents( &tmp_tteH, &unredir_tt[i].vge );
   2407          tmp_tteH.status = Empty; /* Completely irrelevant; pure paranoia. */
   2408          if (overlaps( guest_start, range, &tmp_tteH )) {
   2409             unredir_tt[i].inUse = False;
   2410          }
   2411       }
   2412    }
   2413 }
   2414 
   2415 
   2416 /*------------------------------------------------------------*/
   2417 /*--- Initialisation.                                      ---*/
   2418 /*------------------------------------------------------------*/
   2419 
   2420 void VG_(init_tt_tc) ( void )
   2421 {
   2422    Int i, avg_codeszQ;
   2423 
   2424    vg_assert(!init_done);
   2425    init_done = True;
   2426 
   2427    /* Otherwise lots of things go wrong... */
   2428    vg_assert(sizeof(ULong) == 8);
   2429    vg_assert(sizeof(TTEno) == sizeof(HTTno));
   2430    vg_assert(sizeof(TTEno) == 2);
   2431    vg_assert(N_TTES_PER_SECTOR <= N_HTTES_PER_SECTOR);
   2432    vg_assert(N_HTTES_PER_SECTOR < INV_TTE);
   2433    vg_assert(N_HTTES_PER_SECTOR < EC2TTE_DELETED);
   2434    vg_assert(N_HTTES_PER_SECTOR < HTT_EMPTY);
   2435    /* check fast cache entries really are 2 words long */
   2436    vg_assert(sizeof(Addr) == sizeof(void*));
   2437    vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
   2438    /* check fast cache entries are packed back-to-back with no spaces */
   2439    vg_assert(sizeof( VG_(tt_fast) )
   2440              == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
   2441    /* check fast cache is aligned as we requested.  Not fatal if it
   2442       isn't, but we might as well make sure. */
   2443    vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
   2444 
   2445    /* The TTEntryH size is critical for keeping the LLC miss rate down
   2446       when doing a lot of discarding.  Hence check it here.  We also
   2447       have a lot of TTEntryCs so let's check that too. */
   2448    if (sizeof(HWord) == 8) {
   2449       vg_assert(sizeof(TTEntryH) <= 32);
   2450       vg_assert(sizeof(TTEntryC) <= 112);
   2451    }
   2452    else if (sizeof(HWord) == 4) {
   2453       vg_assert(sizeof(TTEntryH) <= 20);
   2454 #     if defined(VGP_ppc32_linux) || defined(VGP_mips32_linux) \
   2455          || defined(VGP_arm_linux)
   2456       /* On PPC32, MIPS32, ARM32 platforms, alignof(ULong) == 8, so the
   2457          structure is larger than on other 32 bit targets. */
   2458       vg_assert(sizeof(TTEntryC) <= 96);
   2459 #     else
   2460       vg_assert(sizeof(TTEntryC) <= 88);
   2461 #     endif
   2462    }
   2463    else {
   2464       vg_assert(0);
   2465    }
   2466 
   2467    /* All the hassle about converting between TTEntryH and
   2468       VexGuestExtents was so as to ensure the following. */
   2469    vg_assert(sizeof(TTEntryH) == sizeof(VexGuestExtents));
   2470 
   2471    if (VG_(clo_verbosity) > 2)
   2472       VG_(message)(Vg_DebugMsg,
   2473                    "TT/TC: VG_(init_tt_tc) "
   2474                    "(startup of code management)\n");
   2475 
   2476    /* Figure out how big each tc area should be.  */
   2477    if (VG_(clo_avg_transtab_entry_size) == 0)
   2478       avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
   2479    else
   2480       avg_codeszQ   = (VG_(clo_avg_transtab_entry_size) + 7) / 8;
   2481    tc_sector_szQ = N_TTES_PER_SECTOR * (1 + avg_codeszQ);
   2482 
   2483    /* Ensure the calculated value is not way crazy. */
   2484    vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR);
   2485    vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR);
   2486 
   2487    n_sectors = VG_(clo_num_transtab_sectors);
   2488    vg_assert(n_sectors >= MIN_N_SECTORS);
   2489    vg_assert(n_sectors <= MAX_N_SECTORS);
   2490 
   2491    /* Initialise the sectors, even the ones we aren't going to use.
   2492       Set all fields to zero. */
   2493    youngest_sector = 0;
   2494    for (i = 0; i < MAX_N_SECTORS; i++)
   2495       VG_(memset)(&sectors[i], 0, sizeof(sectors[i]));
   2496 
   2497    /* Initialise the sector_search_order hint table, including the
   2498       entries we aren't going to use. */
   2499    for (i = 0; i < MAX_N_SECTORS; i++)
   2500       sector_search_order[i] = INV_SNO;
   2501 
   2502    /* Initialise the fast cache. */
   2503    invalidateFastCache();
   2504 
   2505    /* and the unredir tt/tc */
   2506    init_unredir_tt_tc();
   2507 
   2508    if (VG_(clo_verbosity) > 2 || VG_(clo_stats)
   2509        || VG_(debugLog_getLevel) () >= 2) {
   2510       VG_(message)(Vg_DebugMsg,
   2511          "TT/TC: cache: %s--avg-transtab-entry-size=%u, "
   2512          "%stool provided default %u\n",
   2513          VG_(clo_avg_transtab_entry_size) == 0 ? "ignoring " : "using ",
   2514          VG_(clo_avg_transtab_entry_size),
   2515          VG_(clo_avg_transtab_entry_size) == 0 ? "using " : "ignoring ",
   2516          VG_(details).avg_translation_sizeB);
   2517       VG_(message)(Vg_DebugMsg,
   2518          "TT/TC: cache: %d sectors of %'d bytes each = %'d total TC\n",
   2519           n_sectors, 8 * tc_sector_szQ,
   2520           n_sectors * 8 * tc_sector_szQ );
   2521       VG_(message)(Vg_DebugMsg,
   2522          "TT/TC: table: %'d tables[%d] of C %'d + H %'d bytes each "
   2523          "= %'d total TT\n",
   2524           n_sectors, N_TTES_PER_SECTOR,
   2525           (int)(N_TTES_PER_SECTOR * sizeof(TTEntryC)),
   2526           (int)(N_TTES_PER_SECTOR * sizeof(TTEntryH)),
   2527           (int)(n_sectors * N_TTES_PER_SECTOR
   2528                           * (sizeof(TTEntryC) + sizeof(TTEntryH))));
   2529       VG_(message)(Vg_DebugMsg,
   2530          "TT/TC: table: %d tt entries each = %'d total tt entries\n",
   2531          N_TTES_PER_SECTOR, n_sectors * N_TTES_PER_SECTOR);
   2532       VG_(message)(Vg_DebugMsg,
   2533          "TT/TC: table: %d htt[%d] of %'d bytes each = %'d total HTT"
   2534                        " (htt[%d] %d%% max occup)\n",
   2535          n_sectors, N_HTTES_PER_SECTOR,
   2536          (int)(N_HTTES_PER_SECTOR * sizeof(TTEno)),
   2537          (int)(n_sectors * N_HTTES_PER_SECTOR * sizeof(TTEno)),
   2538          N_HTTES_PER_SECTOR, SECTOR_TT_LIMIT_PERCENT);
   2539    }
   2540 
   2541    if (0) {
   2542       VG_(printf)("XXXX sizeof(VexGuestExtents) = %d\n",
   2543                   (Int)sizeof(VexGuestExtents));
   2544       VG_(printf)("XXXX sizeof(InEdge)     = %d\n", (Int)sizeof(InEdge));
   2545       VG_(printf)("XXXX sizeof(OutEdge)    = %d\n", (Int)sizeof(OutEdge));
   2546       VG_(printf)("XXXX sizeof(InEdgeArr)  = %d\n", (Int)sizeof(InEdgeArr));
   2547       VG_(printf)("XXXX sizeof(OutEdgeArr) = %d\n", (Int)sizeof(OutEdgeArr));
   2548       VG_(printf)("XXXX sizeof(TTEntryC)   = %d\n", (Int)sizeof(TTEntryC));
   2549       VG_(printf)("XXXX sizeof(TTEntryH)   = %d\n", (Int)sizeof(TTEntryH));
   2550    }
   2551 }
   2552 
   2553 
   2554 /*------------------------------------------------------------*/
   2555 /*--- Printing out statistics.                             ---*/
   2556 /*------------------------------------------------------------*/
   2557 
   2558 static Double safe_idiv( ULong a, ULong b )
   2559 {
   2560    return (b == 0 ? 0 : (Double)a / (Double)b);
   2561 }
   2562 
   2563 UInt VG_(get_bbs_translated) ( void )
   2564 {
   2565    return n_in_count;
   2566 }
   2567 
   2568 void VG_(print_tt_tc_stats) ( void )
   2569 {
   2570    VG_(message)(Vg_DebugMsg,
   2571       "    tt/tc: %'llu tt lookups requiring %'llu probes\n",
   2572       n_full_lookups, n_lookup_probes );
   2573    VG_(message)(Vg_DebugMsg,
   2574       "    tt/tc: %'llu fast-cache updates, %'llu flushes\n",
   2575       n_fast_updates, n_fast_flushes );
   2576 
   2577    VG_(message)(Vg_DebugMsg,
   2578                 " transtab: new        %'llu "
   2579                 "(%'llu -> %'llu; ratio %3.1f) [%'llu scs] "
   2580                 "avg tce size %llu\n",
   2581                 n_in_count, n_in_osize, n_in_tsize,
   2582                 safe_idiv(n_in_tsize, n_in_osize),
   2583                 n_in_sc_count,
   2584                 n_in_tsize / (n_in_count ? n_in_count : 1));
   2585    VG_(message)(Vg_DebugMsg,
   2586                 " transtab: dumped     %'llu (%'llu -> ?" "?) "
   2587                 "(sectors recycled %'llu)\n",
   2588                 n_dump_count, n_dump_osize, n_sectors_recycled );
   2589    VG_(message)(Vg_DebugMsg,
   2590                 " transtab: discarded  %'llu (%'llu -> ?" "?)\n",
   2591                 n_disc_count, n_disc_osize );
   2592 
   2593    if (DEBUG_TRANSTAB) {
   2594       VG_(printf)("\n");
   2595       for (EClassNo e = 0; e < ECLASS_N; e++) {
   2596          VG_(printf)(" %4d", sectors[0].ec2tte_used[e]);
   2597          if (e % 16 == 15)
   2598             VG_(printf)("\n");
   2599       }
   2600       VG_(printf)("\n\n");
   2601    }
   2602 }
   2603 
   2604 /*------------------------------------------------------------*/
   2605 /*--- Printing out of profiling results.                   ---*/
   2606 /*------------------------------------------------------------*/
   2607 
   2608 static ULong score ( const TTEntryC* tteC )
   2609 {
   2610    return ((ULong)tteC->usage.prof.weight) * ((ULong)tteC->usage.prof.count);
   2611 }
   2612 
   2613 ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops )
   2614 {
   2615    SECno sno;
   2616    Int   r, s;
   2617    ULong score_total;
   2618    TTEno i;
   2619 
   2620    /* First, compute the total weighted count, and find the top N
   2621       ttes.  tops contains pointers to the most-used n_tops blocks, in
   2622       descending order (viz, tops[0] is the highest scorer). */
   2623    for (s = 0; s < n_tops; s++) {
   2624       tops[s].addr  = 0;
   2625       tops[s].score = 0;
   2626    }
   2627 
   2628    score_total = 0;
   2629 
   2630    for (sno = 0; sno < n_sectors; sno++) {
   2631       if (sectors[sno].tc == NULL)
   2632          continue;
   2633       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
   2634          if (sectors[sno].ttH[i].status != InUse)
   2635             continue;
   2636          score_total += score(&sectors[sno].ttC[i]);
   2637          /* Find the rank for sectors[sno].tt{C,H}[i]. */
   2638          r = n_tops-1;
   2639          while (True) {
   2640             if (r == -1)
   2641                break;
   2642              if (tops[r].addr == 0) {
   2643                r--;
   2644                continue;
   2645              }
   2646              if ( score(&sectors[sno].ttC[i]) > tops[r].score ) {
   2647                 r--;
   2648                 continue;
   2649              }
   2650              break;
   2651          }
   2652          r++;
   2653          vg_assert(r >= 0 && r <= n_tops);
   2654          /* This bb should be placed at r, and bbs above it shifted
   2655             upwards one slot. */
   2656          if (r < n_tops) {
   2657             for (s = n_tops-1; s > r; s--)
   2658                tops[s] = tops[s-1];
   2659             tops[r].addr  = sectors[sno].ttC[i].entry;
   2660             tops[r].score = score( &sectors[sno].ttC[i] );
   2661          }
   2662       }
   2663    }
   2664 
   2665    /* Now zero out all the counter fields, so that we can make
   2666       multiple calls here and just get the values since the last call,
   2667       each time, rather than values accumulated for the whole run. */
   2668    for (sno = 0; sno < n_sectors; sno++) {
   2669       if (sectors[sno].tc == NULL)
   2670          continue;
   2671       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
   2672          if (sectors[sno].ttH[i].status != InUse)
   2673             continue;
   2674          sectors[sno].ttC[i].usage.prof.count = 0;
   2675       }
   2676    }
   2677 
   2678    return score_total;
   2679 }
   2680 
   2681 /*--------------------------------------------------------------------*/
   2682 /*--- end                                                          ---*/
   2683 /*--------------------------------------------------------------------*/
   2684