Home | History | Annotate | Download | only in coregrind
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Management of the translation table and cache.               ---*/
      4 /*---                                                 m_transtab.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2000-2012 Julian Seward
     12       jseward (at) acm.org
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #include "pub_core_basics.h"
     33 #include "pub_core_debuglog.h"
     34 #include "pub_core_machine.h"    // For VG_(machine_get_VexArchInfo)
     35 #include "pub_core_libcbase.h"
     36 #include "pub_core_vki.h"        // to keep pub_core_libproc.h happy, sigh
     37 #include "pub_core_libcproc.h"   // VG_(invalidate_icache)
     38 #include "pub_core_libcassert.h"
     39 #include "pub_core_libcprint.h"
     40 #include "pub_core_options.h"
     41 #include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
     42 #include "pub_core_transtab.h"
     43 #include "pub_core_aspacemgr.h"
     44 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
     45 #include "pub_core_xarray.h"
     46 #include "pub_core_dispatch.h"   // For VG_(disp_cp*) addresses
     47 
     48 
     49 /* #define DEBUG_TRANSTAB */
     50 
     51 
     52 /*-------------------------------------------------------------*/
     53 /*--- Management of the FIFO-based translation table+cache. ---*/
     54 /*-------------------------------------------------------------*/
     55 
     56 /*------------------ CONSTANTS ------------------*/
     57 
     58 /* Number of sectors the TC is divided into.  If you need a larger
     59    overall translation cache, increase this value. */
     60 #define N_SECTORS 8
     61 
     62 /* Number of TC entries in each sector.  This needs to be a prime
     63    number to work properly, it must be <= 65535 (so that a TT index
     64    fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
     65    'deleted') and it is strongly recommended not to change this.
     66    65521 is the largest prime <= 65535. */
     67 #define N_TTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
     68 
     69 /* Because each sector contains a hash table of TTEntries, we need to
     70    specify the maximum allowable loading, after which the sector is
     71    deemed full. */
     72 #define SECTOR_TT_LIMIT_PERCENT 65
     73 
     74 /* The sector is deemed full when this many entries are in it. */
     75 #define N_TTES_PER_SECTOR_USABLE \
     76            ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
     77 
     78 /* Equivalence classes for fast address range deletion.  There are 1 +
     79    2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
     80    address range which does not fall cleanly within any specific bin.
     81    Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
     82 #define ECLASS_SHIFT 11
     83 #define ECLASS_WIDTH 8
     84 #define ECLASS_MISC  (1 << ECLASS_WIDTH)
     85 #define ECLASS_N     (1 + ECLASS_MISC)
     86 
     87 #define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
     88 
     89 
     90 /*------------------ TYPES ------------------*/
     91 
     92 /* In edges ("to-me") in the graph created by chaining. */
     93 typedef
     94    struct {
     95       UInt from_sNo;   /* sector number */
     96       UInt from_tteNo; /* TTE number in given sector */
     97       UInt from_offs;  /* code offset from TCEntry::tcptr where the patch is */
     98       Bool to_fastEP;  /* Is the patch to a fast or slow entry point? */
     99    }
    100    InEdge;
    101 
    102 
    103 /* Out edges ("from-me") in the graph created by chaining. */
    104 typedef
    105    struct {
    106       UInt to_sNo;    /* sector number */
    107       UInt to_tteNo;  /* TTE number in given sector */
    108       UInt from_offs; /* code offset in owning translation where patch is */
    109    }
    110    OutEdge;
    111 
    112 
    113 #define N_FIXED_IN_EDGE_ARR 3
    114 typedef
    115    struct {
    116       UInt     n_fixed; /* 0 .. N_FIXED_IN_EDGE_ARR */
    117       InEdge   fixed[N_FIXED_IN_EDGE_ARR];
    118       XArray*  var; /* XArray* of InEdgeArr */
    119    }
    120    InEdgeArr;
    121 
    122 #define N_FIXED_OUT_EDGE_ARR 2
    123 typedef
    124    struct {
    125       UInt    n_fixed; /* 0 .. N_FIXED_OUT_EDGE_ARR */
    126       OutEdge fixed[N_FIXED_OUT_EDGE_ARR];
    127       XArray* var; /* XArray* of OutEdgeArr */
    128    }
    129    OutEdgeArr;
    130 
    131 
    132 /* A translation-table entry.  This indicates precisely which areas of
    133    guest code are included in the translation, and contains all other
    134    auxiliary info too.  */
    135 typedef
    136    struct {
    137       /* Profiling only: the count and weight (arbitrary meaning) for
    138          this translation.  Weight is a property of the translation
    139          itself and computed once when the translation is created.
    140          Count is an entry count for the translation and is
    141          incremented by 1 every time the translation is used, if we
    142          are profiling. */
    143       ULong    count;
    144       UShort   weight;
    145 
    146       /* Status of the slot.  Note, we need to be able to do lazy
    147          deletion, hence the Deleted state. */
    148       enum { InUse, Deleted, Empty } status;
    149 
    150       /* 64-bit aligned pointer to one or more 64-bit words containing
    151          the corresponding host code (must be in the same sector!)
    152          This is a pointer into the sector's tc (code) area. */
    153       ULong* tcptr;
    154 
    155       /* This is the original guest address that purportedly is the
    156          entry point of the translation.  You might think that .entry
    157          should be the same as .vge->base[0], and most of the time it
    158          is.  However, when doing redirections, that is not the case.
    159          .vge must always correctly describe the guest code sections
    160          from which this translation was made.  However, .entry may or
    161          may not be a lie, depending on whether or not we're doing
    162          redirection. */
    163       Addr64 entry;
    164 
    165       /* This structure describes precisely what ranges of guest code
    166          the translation covers, so we can decide whether or not to
    167          delete it when translations of a given address range are
    168          invalidated. */
    169       VexGuestExtents vge;
    170 
    171       /* Address range summary info: these are pointers back to
    172          eclass[] entries in the containing Sector.  Those entries in
    173          turn point back here -- the two structures are mutually
    174          redundant but both necessary to make fast deletions work.
    175          The eclass info is similar to, and derived from, this entry's
    176          'vge' field, but it is not the same */
    177       UShort n_tte2ec;      // # tte2ec pointers (1 to 3)
    178       UShort tte2ec_ec[3];  // for each, the eclass #
    179       UInt   tte2ec_ix[3];  // and the index within the eclass.
    180       // for i in 0 .. n_tte2ec-1
    181       //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
    182       // should be the index
    183       // of this TTEntry in the containing Sector's tt array.
    184 
    185       /* Admin information for chaining.  'in_edges' is a set of the
    186          patch points which jump to this translation -- hence are
    187          predecessors in the control flow graph.  'out_edges' points
    188          to successors in the control flow graph -- translations to
    189          which this one has a patched jump.  In short these are just
    190          backwards and forwards edges in the graph of patched-together
    191          blocks.  The 'in_edges' contain slightly more info, enough
    192          that we can undo the chaining of each mentioned patch point.
    193          The 'out_edges' list exists only so that we can visit the
    194          'in_edges' entries of all blocks we're patched through to, in
    195          order to remove ourselves from then when we're deleted. */
    196 
    197       /* A translation can disappear for two reasons:
    198           1. erased (as part of the oldest sector cleanup) when the
    199              youngest sector is full.
    200           2. discarded due to calls to VG_(discard_translations).
    201              VG_(discard_translations) sets the status of the
    202              translation to 'Deleted'.
    203              A.o., the gdbserver discards one or more translations
    204              when a breakpoint is inserted or removed at an Addr,
    205              or when single stepping mode is enabled/disabled
    206              or when a translation is instrumented for gdbserver
    207              (all the target jumps of this translation are
    208               invalidated).
    209 
    210          So, it is possible that the translation A to be patched
    211          (to obtain a patched jump from A to B) is invalidated
    212          after B is translated and before A is patched.
    213          In case a translation is erased or discarded, the patching
    214          cannot be done.  VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
    215          are checking the 'from' translation still exists before
    216          doing the patching.
    217 
    218          Is it safe to erase or discard the current translation E being
    219          executed ? Amazing, but yes, it is safe.
    220          Here is the explanation:
    221 
    222          The translation E being executed can only be erased if a new
    223          translation N is being done. A new translation is done only
    224          if the host addr is a not yet patched jump to another
    225          translation. In such a case, the guest address of N is
    226          assigned to the PC in the VEX state. Control is returned
    227          to the scheduler. N will be translated. This can erase the
    228          translation E (in case of sector full). VG_(tt_tc_do_chaining)
    229          will not do the chaining to a non found translation E.
    230          The execution will continue at the current guest PC
    231          (i.e. the translation N).
    232          => it is safe to erase the current translation being executed.
    233 
    234          The current translation E being executed can also be discarded
    235          (e.g. by gdbserver). VG_(discard_translations) will mark
    236          this translation E as Deleted, but the translation itself
    237          is not erased. In particular, its host code can only
    238          be overwritten or erased in case a new translation is done.
    239          A new translation will only be done if a not yet translated
    240          jump is to be executed. The execution of the Deleted translation
    241          E will continue till a non patched jump is encountered.
    242          This situation is then similar to the 'erasing' case above :
    243          the current translation E can be erased or overwritten, as the
    244          execution will continue at the new translation N.
    245 
    246       */
    247 
    248       /* It is possible, although very unlikely, that a block A has
    249          more than one patched jump to block B.  This could happen if
    250          (eg) A finishes "jcond B; jmp B".
    251 
    252          This means in turn that B's in_edges set can list A more than
    253          once (twice in this example).  However, each such entry must
    254          have a different from_offs, since a patched jump can only
    255          jump to one place at once (it's meaningless for it to have
    256          multiple destinations.)  IOW, the successor and predecessor
    257          edges in the graph are not uniquely determined by a
    258          TTEntry --> TTEntry pair, but rather by a
    259          (TTEntry,offset) --> TTEntry triple.
    260 
    261          If A has multiple edges to B then B will mention A multiple
    262          times in its in_edges.  To make things simpler, we then
    263          require that A mentions B exactly the same number of times in
    264          its out_edges.  Furthermore, a matching out-in pair must have
    265          the same offset (from_offs).  This facilitates sanity
    266          checking, and it facilitates establishing the invariant that
    267          a out_edges set may not have duplicates when using the
    268          equality defined by (TTEntry,offset).  Hence the out_edges
    269          and in_edges sets really do have both have set semantics.
    270 
    271          eg if  A has been patched to B at offsets 42 and 87 (in A)
    272          then   A.out_edges = { (B,42), (B,87) }   (in any order)
    273          and    B.in_edges  = { (A,42), (A,87) }   (in any order)
    274 
    275          Hence for each node pair P->Q in the graph, there's a 1:1
    276          mapping between P.out_edges and Q.in_edges.
    277       */
    278       InEdgeArr  in_edges;
    279       OutEdgeArr out_edges;
    280    }
    281    TTEntry;
    282 
    283 
    284 /* A structure used for mapping host code addresses back to the
    285    relevant TTEntry.  Used when doing chaining, for finding the
    286    TTEntry to which some arbitrary patch address belongs. */
    287 typedef
    288    struct {
    289       UChar* start;
    290       UInt   len;
    291       UInt   tteNo;
    292    }
    293    HostExtent;
    294 
    295 /* Finally, a sector itself.  Each sector contains an array of
    296    TCEntries, which hold code, and an array of TTEntries, containing
    297    all required administrative info.  Profiling is supported using the
    298    TTEntry .count and .weight fields, if required.
    299 
    300    If the sector is not in use, all three pointers are NULL and
    301    tt_n_inuse is zero.
    302 */
    303 typedef
    304    struct {
    305       /* The TCEntry area.  Size of this depends on the average
    306          translation size.  We try and size it so it becomes full
    307          precisely when this sector's translation table (tt) reaches
    308          its load limit (SECTOR_TT_LIMIT_PERCENT). */
    309       ULong* tc;
    310 
    311       /* The TTEntry array.  This is a fixed size, always containing
    312          exactly N_TTES_PER_SECTOR entries. */
    313       TTEntry* tt;
    314 
    315       /* This points to the current allocation point in tc. */
    316       ULong* tc_next;
    317 
    318       /* The count of tt entries with state InUse. */
    319       Int tt_n_inuse;
    320 
    321       /* Expandable arrays of tt indices for each of the ECLASS_N
    322          address range equivalence classes.  These hold indices into
    323          the containing sector's tt array, which in turn should point
    324          back here. */
    325       Int     ec2tte_size[ECLASS_N];
    326       Int     ec2tte_used[ECLASS_N];
    327       UShort* ec2tte[ECLASS_N];
    328 
    329       /* The host extents.  The [start, +len) ranges are constructed
    330          in strictly non-overlapping order, so we can binary search
    331          them at any time. */
    332       XArray* host_extents; /* XArray* of HostExtent */
    333    }
    334    Sector;
    335 
    336 
    337 /*------------------ DECLS ------------------*/
    338 
    339 /* The root data structure is an array of sectors.  The index of the
    340    youngest sector is recorded, and new translations are put into that
    341    sector.  When it fills up, we move along to the next sector and
    342    start to fill that up, wrapping around at the end of the array.
    343    That way, once all N_TC_SECTORS have been bought into use for the
    344    first time, and are full, we then re-use the oldest sector,
    345    endlessly.
    346 
    347    When running, youngest sector should be between >= 0 and <
    348    N_TC_SECTORS.  The initial -1 value indicates the TT/TC system is
    349    not yet initialised.
    350 */
    351 static Sector sectors[N_SECTORS];
    352 static Int    youngest_sector = -1;
    353 
    354 /* The number of ULongs in each TCEntry area.  This is computed once
    355    at startup and does not change. */
    356 static Int    tc_sector_szQ;
    357 
    358 
    359 /* A list of sector numbers, in the order which they should be
    360    searched to find translations.  This is an optimisation to be used
    361    when searching for translations and should not affect
    362    correctness.  -1 denotes "no entry". */
    363 static Int sector_search_order[N_SECTORS];
    364 
    365 
    366 /* Fast helper for the TC.  A direct-mapped cache which holds a set of
    367    recently used (guest address, host address) pairs.  This array is
    368    referred to directly from m_dispatch/dispatch-<platform>.S.
    369 
    370    Entries in tt_fast may refer to any valid TC entry, regardless of
    371    which sector it's in.  Consequently we must be very careful to
    372    invalidate this cache when TC entries are changed or disappear.
    373 
    374    A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
    375    pointed at to cause that cache entry to miss.  This relies on the
    376    assumption that no guest code actually has that address, hence a
    377    value 0x1 seems good.  m_translate gives the client a synthetic
    378    segfault if it tries to execute at this address.
    379 */
    380 /*
    381 typedef
    382    struct {
    383       Addr guest;
    384       Addr host;
    385    }
    386    FastCacheEntry;
    387 */
    388 /*global*/ __attribute__((aligned(16)))
    389            FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
    390 
    391 /* Make sure we're not used before initialisation. */
    392 static Bool init_done = False;
    393 
    394 
    395 /*------------------ STATS DECLS ------------------*/
    396 
    397 /* Number of fast-cache updates and flushes done. */
    398 static ULong n_fast_flushes = 0;
    399 static ULong n_fast_updates = 0;
    400 
    401 /* Number of full lookups done. */
    402 static ULong n_full_lookups = 0;
    403 static ULong n_lookup_probes = 0;
    404 
    405 /* Number/osize/tsize of translations entered; also the number of
    406    those for which self-checking was requested. */
    407 static ULong n_in_count    = 0;
    408 static ULong n_in_osize    = 0;
    409 static ULong n_in_tsize    = 0;
    410 static ULong n_in_sc_count = 0;
    411 
    412 /* Number/osize of translations discarded due to lack of space. */
    413 static ULong n_dump_count = 0;
    414 static ULong n_dump_osize = 0;
    415 
    416 /* Number/osize of translations discarded due to requests to do so. */
    417 static ULong n_disc_count = 0;
    418 static ULong n_disc_osize = 0;
    419 
    420 
    421 /*-------------------------------------------------------------*/
    422 /*--- Misc                                                  ---*/
    423 /*-------------------------------------------------------------*/
    424 
    425 static void* ttaux_malloc ( HChar* tag, SizeT n )
    426 {
    427    return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
    428 }
    429 
    430 static void ttaux_free ( void* p )
    431 {
    432    VG_(arena_free)(VG_AR_TTAUX, p);
    433 }
    434 
    435 
    436 /*-------------------------------------------------------------*/
    437 /*--- Chaining support                                      ---*/
    438 /*-------------------------------------------------------------*/
    439 
    440 static inline TTEntry* index_tte ( UInt sNo, UInt tteNo )
    441 {
    442    vg_assert(sNo < N_SECTORS);
    443    vg_assert(tteNo < N_TTES_PER_SECTOR);
    444    Sector* s = &sectors[sNo];
    445    vg_assert(s->tt);
    446    TTEntry* tte = &s->tt[tteNo];
    447    vg_assert(tte->status == InUse);
    448    return tte;
    449 }
    450 
    451 static void InEdge__init ( InEdge* ie )
    452 {
    453    ie->from_sNo   = -1; /* invalid */
    454    ie->from_tteNo = 0;
    455    ie->from_offs  = 0;
    456    ie->to_fastEP  = False;
    457 }
    458 
    459 static void OutEdge__init ( OutEdge* oe )
    460 {
    461    oe->to_sNo    = -1; /* invalid */
    462    oe->to_tteNo  = 0;
    463    oe->from_offs = 0;
    464 }
    465 
    466 static void TTEntry__init ( TTEntry* tte )
    467 {
    468    VG_(memset)(tte, 0, sizeof(*tte));
    469 }
    470 
    471 static UWord InEdgeArr__size ( InEdgeArr* iea )
    472 {
    473    if (iea->var) {
    474       vg_assert(iea->n_fixed == 0);
    475       return VG_(sizeXA)(iea->var);
    476    } else {
    477       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
    478       return iea->n_fixed;
    479    }
    480 }
    481 
    482 static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
    483 {
    484    if (iea->var) {
    485       vg_assert(iea->n_fixed == 0);
    486       VG_(deleteXA)(iea->var);
    487       iea->var = NULL;
    488    } else {
    489       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
    490       iea->n_fixed = 0;
    491    }
    492 }
    493 
    494 static
    495 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
    496 {
    497    if (iea->var) {
    498       vg_assert(iea->n_fixed == 0);
    499       return (InEdge*)VG_(indexXA)(iea->var, i);
    500    } else {
    501       vg_assert(i < iea->n_fixed);
    502       return &iea->fixed[i];
    503    }
    504 }
    505 
    506 static
    507 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
    508 {
    509    if (iea->var) {
    510       vg_assert(iea->n_fixed == 0);
    511       VG_(removeIndexXA)(iea->var, i);
    512    } else {
    513       vg_assert(i < iea->n_fixed);
    514       for (; i+1 < iea->n_fixed; i++) {
    515          iea->fixed[i] = iea->fixed[i+1];
    516       }
    517       iea->n_fixed--;
    518    }
    519 }
    520 
    521 static
    522 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
    523 {
    524    if (iea->var) {
    525       vg_assert(iea->n_fixed == 0);
    526       VG_(addToXA)(iea->var, ie);
    527    } else {
    528       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
    529       if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
    530          /* The fixed array is full, so we have to initialise an
    531             XArray and copy the fixed array into it. */
    532          iea->var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
    533                                ttaux_free,
    534                                sizeof(InEdge));
    535          UWord i;
    536          for (i = 0; i < iea->n_fixed; i++) {
    537             VG_(addToXA)(iea->var, &iea->fixed[i]);
    538          }
    539          VG_(addToXA)(iea->var, ie);
    540          iea->n_fixed = 0;
    541       } else {
    542          /* Just add to the fixed array. */
    543          iea->fixed[iea->n_fixed++] = *ie;
    544       }
    545    }
    546 }
    547 
    548 static UWord OutEdgeArr__size ( OutEdgeArr* oea )
    549 {
    550    if (oea->var) {
    551       vg_assert(oea->n_fixed == 0);
    552       return VG_(sizeXA)(oea->var);
    553    } else {
    554       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
    555       return oea->n_fixed;
    556    }
    557 }
    558 
    559 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
    560 {
    561    if (oea->var) {
    562       vg_assert(oea->n_fixed == 0);
    563       VG_(deleteXA)(oea->var);
    564       oea->var = NULL;
    565    } else {
    566       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
    567       oea->n_fixed = 0;
    568    }
    569 }
    570 
    571 static
    572 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
    573 {
    574    if (oea->var) {
    575       vg_assert(oea->n_fixed == 0);
    576       return (OutEdge*)VG_(indexXA)(oea->var, i);
    577    } else {
    578       vg_assert(i < oea->n_fixed);
    579       return &oea->fixed[i];
    580    }
    581 }
    582 
    583 static
    584 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
    585 {
    586    if (oea->var) {
    587       vg_assert(oea->n_fixed == 0);
    588       VG_(removeIndexXA)(oea->var, i);
    589    } else {
    590       vg_assert(i < oea->n_fixed);
    591       for (; i+1 < oea->n_fixed; i++) {
    592          oea->fixed[i] = oea->fixed[i+1];
    593       }
    594       oea->n_fixed--;
    595    }
    596 }
    597 
    598 static
    599 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
    600 {
    601    if (oea->var) {
    602       vg_assert(oea->n_fixed == 0);
    603       VG_(addToXA)(oea->var, oe);
    604    } else {
    605       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
    606       if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
    607          /* The fixed array is full, so we have to initialise an
    608             XArray and copy the fixed array into it. */
    609          oea->var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
    610                                ttaux_free,
    611                                sizeof(OutEdge));
    612          UWord i;
    613          for (i = 0; i < oea->n_fixed; i++) {
    614             VG_(addToXA)(oea->var, &oea->fixed[i]);
    615          }
    616          VG_(addToXA)(oea->var, oe);
    617          oea->n_fixed = 0;
    618       } else {
    619          /* Just add to the fixed array. */
    620          oea->fixed[oea->n_fixed++] = *oe;
    621       }
    622    }
    623 }
    624 
    625 static
    626 Int HostExtent__cmpOrd ( void* v1, void* v2 )
    627 {
    628    HostExtent* hx1 = (HostExtent*)v1;
    629    HostExtent* hx2 = (HostExtent*)v2;
    630    if (hx1->start + hx1->len <= hx2->start) return -1;
    631    if (hx2->start + hx2->len <= hx1->start) return 1;
    632    return 0; /* partial overlap */
    633 }
    634 
    635 static __attribute__((noinline))
    636 Bool find_TTEntry_from_hcode( /*OUT*/UInt* from_sNo,
    637                               /*OUT*/UInt* from_tteNo,
    638                               void* hcode )
    639 {
    640    Int i;
    641 
    642    /* Search order logic copied from VG_(search_transtab). */
    643    for (i = 0; i < N_SECTORS; i++) {
    644       Int sno = sector_search_order[i];
    645       if (UNLIKELY(sno == -1))
    646          return False; /* run out of sectors to search */
    647 
    648       Sector* sec = &sectors[sno];
    649       XArray* /* of HostExtent */ host_extents = sec->host_extents;
    650       vg_assert(host_extents);
    651 
    652       HostExtent key;
    653       VG_(memset)(&key, 0, sizeof(key));
    654       key.start = hcode;
    655       key.len = 1;
    656       Word firstW = -1, lastW = -1;
    657       Bool found  = VG_(lookupXA_UNSAFE)(
    658                        host_extents, &key, &firstW, &lastW,
    659                        (Int(*)(void*,void*))HostExtent__cmpOrd
    660                     );
    661       vg_assert(firstW == lastW); // always true, even if not found
    662       if (found) {
    663          HostExtent* hx = VG_(indexXA)(host_extents, firstW);
    664          UInt tteNo = hx->tteNo;
    665          /* Do some additional sanity checks. */
    666          vg_assert(tteNo <= N_TTES_PER_SECTOR);
    667          /* Entry might have been invalidated. Consider this
    668             as not found. */
    669          if (sec->tt[tteNo].status == Deleted)
    670             return False;
    671          vg_assert(sec->tt[tteNo].status == InUse);
    672          /* Can only half check that the found TTEntry contains hcode,
    673             due to not having a length value for the hcode in the
    674             TTEntry. */
    675          vg_assert((UChar*)sec->tt[tteNo].tcptr <= (UChar*)hcode);
    676          /* Looks plausible */
    677          *from_sNo   = sno;
    678          *from_tteNo = (UInt)tteNo;
    679          return True;
    680       }
    681    }
    682    return False;
    683 }
    684 
    685 
    686 /* Figure out whether or not hcode is jitted code present in the main
    687    code cache (but not in the no-redir cache).  Used for sanity
    688    checking. */
    689 static Bool is_in_the_main_TC ( void* hcode )
    690 {
    691    Int i, sno;
    692    for (i = 0; i < N_SECTORS; i++) {
    693       sno = sector_search_order[i];
    694       if (sno == -1)
    695          break; /* run out of sectors to search */
    696       if ((UChar*)hcode >= (UChar*)sectors[sno].tc
    697           && (UChar*)hcode <= (UChar*)sectors[sno].tc_next
    698                               + sizeof(ULong) - 1)
    699          return True;
    700    }
    701    return False;
    702 }
    703 
    704 
    705 /* Fulfill a chaining request, and record admin info so we
    706    can undo it later, if required.
    707 */
    708 void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
    709                               UInt  to_sNo,
    710                               UInt  to_tteNo,
    711                               Bool  to_fastEP )
    712 {
    713    /* Get the CPU info established at startup. */
    714    VexArch vex_arch = VexArch_INVALID;
    715    VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
    716 
    717    // host_code is where we're patching to.  So it needs to
    718    // take into account, whether we're jumping to the slow
    719    // or fast entry point.  By definition, the fast entry point
    720    // is exactly one event check's worth of code along from
    721    // the slow (tcptr) entry point.
    722    TTEntry* to_tte    = index_tte(to_sNo, to_tteNo);
    723    void*    host_code = ((UChar*)to_tte->tcptr)
    724                         + (to_fastEP ? LibVEX_evCheckSzB(vex_arch) : 0);
    725 
    726    // stay sane -- the patch point (dst) is in this sector's code cache
    727    vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
    728    vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
    729                                    + sizeof(ULong) - 1 );
    730 
    731    /* Find the TTEntry for the from__ code.  This isn't simple since
    732       we only know the patch address, which is going to be somewhere
    733       inside the from_ block. */
    734    UInt from_sNo   = (UInt)-1;
    735    UInt from_tteNo = (UInt)-1;
    736    Bool from_found
    737       = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
    738                                  from__patch_addr );
    739    if (!from_found) {
    740       // The from code might have been discarded due to sector re-use
    741       // or marked Deleted due to translation invalidation.
    742       // In such a case, don't do the chaining.
    743       VG_(debugLog)(1,"transtab",
    744                     "host code %p not found (discarded? sector recycled?)"
    745                     " => no chaining done\n",
    746                     from__patch_addr);
    747       return;
    748    }
    749 
    750    TTEntry* from_tte = index_tte(from_sNo, from_tteNo);
    751 
    752    /* Get VEX to do the patching itself.  We have to hand it off
    753       since it is host-dependent. */
    754    VexInvalRange vir
    755       = LibVEX_Chain(
    756            vex_arch,
    757            from__patch_addr,
    758            VG_(fnptr_to_fnentry)(
    759               to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
    760                         : &VG_(disp_cp_chain_me_to_slowEP)),
    761            (void*)host_code
    762         );
    763    VG_(invalidate_icache)( (void*)vir.start, vir.len );
    764 
    765    /* Now do the tricky bit -- update the ch_succs and ch_preds info
    766       for the two translations involved, so we can undo the chaining
    767       later, which we will have to do if the to_ block gets removed
    768       for whatever reason. */
    769 
    770    /* This is the new from_ -> to_ link to add. */
    771    InEdge ie;
    772    InEdge__init(&ie);
    773    ie.from_sNo   = from_sNo;
    774    ie.from_tteNo = from_tteNo;
    775    ie.to_fastEP  = to_fastEP;
    776    HWord from_offs = (HWord)( (UChar*)from__patch_addr
    777                               - (UChar*)from_tte->tcptr );
    778    vg_assert(from_offs < 100000/* let's say */);
    779    ie.from_offs  = (UInt)from_offs;
    780 
    781    /* This is the new to_ -> from_ backlink to add. */
    782    OutEdge oe;
    783    OutEdge__init(&oe);
    784    oe.to_sNo    = to_sNo;
    785    oe.to_tteNo  = to_tteNo;
    786    oe.from_offs = (UInt)from_offs;
    787 
    788    /* Add .. */
    789    InEdgeArr__add(&to_tte->in_edges, &ie);
    790    OutEdgeArr__add(&from_tte->out_edges, &oe);
    791 }
    792 
    793 
    794 /* Unchain one patch, as described by the specified InEdge.  For
    795    sanity check purposes only (to check that the patched location is
    796    as expected) it also requires the fast and slow entry point
    797    addresses of the destination block (that is, the block that owns
    798    this InEdge). */
    799 __attribute__((noinline))
    800 static void unchain_one ( VexArch vex_arch,
    801                           InEdge* ie,
    802                           void* to_fastEPaddr, void* to_slowEPaddr )
    803 {
    804    vg_assert(ie);
    805    TTEntry* tte
    806       = index_tte(ie->from_sNo, ie->from_tteNo);
    807    UChar* place_to_patch
    808       = ((HChar*)tte->tcptr) + ie->from_offs;
    809    UChar* disp_cp_chain_me
    810       = VG_(fnptr_to_fnentry)(
    811            ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
    812                          : &VG_(disp_cp_chain_me_to_slowEP)
    813         );
    814    UChar* place_to_jump_to_EXPECTED
    815       = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
    816 
    817    // stay sane: both src and dst for this unchaining are
    818    // in the main code cache
    819    vg_assert( is_in_the_main_TC(place_to_patch) ); // src
    820    vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
    821    // dst check is ok because LibVEX_UnChain checks that
    822    // place_to_jump_to_EXPECTED really is the current dst, and
    823    // asserts if it isn't.
    824    VexInvalRange vir
    825        = LibVEX_UnChain( vex_arch, place_to_patch,
    826                          place_to_jump_to_EXPECTED, disp_cp_chain_me );
    827    VG_(invalidate_icache)( (void*)vir.start, vir.len );
    828 }
    829 
    830 
    831 /* The specified block is about to be deleted.  Update the preds and
    832    succs of its associated blocks accordingly.  This includes undoing
    833    any chained jumps to this block. */
    834 static
    835 void unchain_in_preparation_for_deletion ( VexArch vex_arch,
    836                                            UInt here_sNo, UInt here_tteNo )
    837 {
    838    if (0)
    839       VG_(printf)("QQQ unchain_in_prep %u.%u\n", here_sNo, here_tteNo);
    840    UWord    i, j, n, m;
    841    Int      evCheckSzB = LibVEX_evCheckSzB(vex_arch);
    842    TTEntry* here_tte   = index_tte(here_sNo, here_tteNo);
    843    vg_assert(here_tte->status == InUse);
    844 
    845    /* Visit all InEdges owned by here_tte. */
    846    n = InEdgeArr__size(&here_tte->in_edges);
    847    for (i = 0; i < n; i++) {
    848       InEdge* ie = InEdgeArr__index(&here_tte->in_edges, i);
    849       // Undo the chaining.
    850       UChar* here_slow_EP = (UChar*)here_tte->tcptr;
    851       UChar* here_fast_EP = here_slow_EP + evCheckSzB;
    852       unchain_one(vex_arch, ie, here_fast_EP, here_slow_EP);
    853       // Find the corresponding entry in the "from" node's out_edges,
    854       // and remove it.
    855       TTEntry* from_tte = index_tte(ie->from_sNo, ie->from_tteNo);
    856       m = OutEdgeArr__size(&from_tte->out_edges);
    857       vg_assert(m > 0); // it must have at least one entry
    858       for (j = 0; j < m; j++) {
    859          OutEdge* oe = OutEdgeArr__index(&from_tte->out_edges, j);
    860          if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
    861              && oe->from_offs == ie->from_offs)
    862            break;
    863       }
    864       vg_assert(j < m); // "oe must be findable"
    865       OutEdgeArr__deleteIndex(&from_tte->out_edges, j);
    866    }
    867 
    868    /* Visit all OutEdges owned by here_tte. */
    869    n = OutEdgeArr__size(&here_tte->out_edges);
    870    for (i = 0; i < n; i++) {
    871       OutEdge* oe = OutEdgeArr__index(&here_tte->out_edges, i);
    872       // Find the corresponding entry in the "to" node's in_edges,
    873       // and remove it.
    874       TTEntry* to_tte = index_tte(oe->to_sNo, oe->to_tteNo);
    875       m = InEdgeArr__size(&to_tte->in_edges);
    876       vg_assert(m > 0); // it must have at least one entry
    877       for (j = 0; j < m; j++) {
    878          InEdge* ie = InEdgeArr__index(&to_tte->in_edges, j);
    879          if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
    880              && ie->from_offs == oe->from_offs)
    881            break;
    882       }
    883       vg_assert(j < m); // "ie must be findable"
    884       InEdgeArr__deleteIndex(&to_tte->in_edges, j);
    885    }
    886 
    887    InEdgeArr__makeEmpty(&here_tte->in_edges);
    888    OutEdgeArr__makeEmpty(&here_tte->out_edges);
    889 }
    890 
    891 
    892 /*-------------------------------------------------------------*/
    893 /*--- Address-range equivalence class stuff                 ---*/
    894 /*-------------------------------------------------------------*/
    895 
    896 /* Return equivalence class number for a range. */
    897 
    898 static Int range_to_eclass ( Addr64 start, UInt len )
    899 {
    900    UInt mask   = (1 << ECLASS_WIDTH) - 1;
    901    UInt lo     = (UInt)start;
    902    UInt hi     = lo + len - 1;
    903    UInt loBits = (lo >> ECLASS_SHIFT) & mask;
    904    UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
    905    if (loBits == hiBits) {
    906       vg_assert(loBits < ECLASS_N-1);
    907       return loBits;
    908    } else {
    909       return ECLASS_MISC;
    910    }
    911 }
    912 
    913 
    914 /* Calculates the equivalence class numbers for any VexGuestExtent.
    915    These are written in *eclasses, which must be big enough to hold 3
    916    Ints.  The number written, between 1 and 3, is returned.  The
    917    eclasses are presented in order, and any duplicates are removed.
    918 */
    919 
    920 static
    921 Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
    922                                   VexGuestExtents* vge )
    923 {
    924 #  define SWAP(_lv1,_lv2) \
    925       do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
    926 
    927    Int i, j, n_ec, r;
    928 
    929    vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
    930 
    931    n_ec = 0;
    932    for (i = 0; i < vge->n_used; i++) {
    933       r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
    934       if (r == ECLASS_MISC)
    935          goto bad;
    936       /* only add if we haven't already seen it */
    937       for (j = 0; j < n_ec; j++)
    938          if (eclasses[j] == r)
    939             break;
    940       if (j == n_ec)
    941          eclasses[n_ec++] = r;
    942    }
    943 
    944    if (n_ec == 1)
    945       return 1;
    946 
    947    if (n_ec == 2) {
    948       /* sort */
    949       if (eclasses[0] > eclasses[1])
    950          SWAP(eclasses[0], eclasses[1]);
    951       return 2;
    952    }
    953 
    954    if (n_ec == 3) {
    955       /* sort */
    956       if (eclasses[0] > eclasses[2])
    957          SWAP(eclasses[0], eclasses[2]);
    958       if (eclasses[0] > eclasses[1])
    959          SWAP(eclasses[0], eclasses[1]);
    960       if (eclasses[1] > eclasses[2])
    961          SWAP(eclasses[1], eclasses[2]);
    962       return 3;
    963    }
    964 
    965    /* NOTREACHED */
    966    vg_assert(0);
    967 
    968   bad:
    969    eclasses[0] = ECLASS_MISC;
    970    return 1;
    971 
    972 #  undef SWAP
    973 }
    974 
    975 
    976 /* Add tteno to the set of entries listed for equivalence class ec in
    977    this sector.  Returns used location in eclass array. */
    978 
    979 static
    980 UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
    981 {
    982    Int    old_sz, new_sz, i, r;
    983    UShort *old_ar, *new_ar;
    984 
    985    vg_assert(ec >= 0 && ec < ECLASS_N);
    986    vg_assert(tteno < N_TTES_PER_SECTOR);
    987 
    988    if (0) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
    989 
    990    if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
    991 
    992       vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
    993 
    994       old_sz = sec->ec2tte_size[ec];
    995       old_ar = sec->ec2tte[ec];
    996       new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
    997       new_ar = ttaux_malloc("transtab.aECN.1",
    998                             new_sz * sizeof(UShort));
    999       for (i = 0; i < old_sz; i++)
   1000          new_ar[i] = old_ar[i];
   1001       if (old_ar)
   1002          ttaux_free(old_ar);
   1003       sec->ec2tte_size[ec] = new_sz;
   1004       sec->ec2tte[ec] = new_ar;
   1005 
   1006       if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
   1007    }
   1008 
   1009    /* Common case */
   1010    r = sec->ec2tte_used[ec]++;
   1011    vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
   1012    sec->ec2tte[ec][r] = tteno;
   1013    return (UInt)r;
   1014 }
   1015 
   1016 
   1017 /* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
   1018    eclass entries to 'sec'. */
   1019 
   1020 static
   1021 void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
   1022 {
   1023    Int i, r, eclasses[3];
   1024    TTEntry* tte;
   1025    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
   1026 
   1027    tte = &sec->tt[tteno];
   1028    r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
   1029 
   1030    vg_assert(r >= 1 && r <= 3);
   1031    tte->n_tte2ec = r;
   1032 
   1033    for (i = 0; i < r; i++) {
   1034       tte->tte2ec_ec[i] = eclasses[i];
   1035       tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
   1036    }
   1037 }
   1038 
   1039 
   1040 /* Check the eclass info in 'sec' to ensure it is consistent.  Returns
   1041    True if OK, False if something's not right.  Expensive. */
   1042 
   1043 static Bool sanity_check_eclasses_in_sector ( Sector* sec )
   1044 {
   1045 #  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
   1046 
   1047    HChar*   whassup = NULL;
   1048    Int      i, j, k, n, ec_num, ec_idx;
   1049    TTEntry* tte;
   1050    UShort   tteno;
   1051    ULong*   tce;
   1052 
   1053    /* Basic checks on this sector */
   1054    if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
   1055       BAD("invalid sec->tt_n_inuse");
   1056    tce = sec->tc_next;
   1057    if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
   1058       BAD("sec->tc_next points outside tc");
   1059 
   1060    /* For each eclass ... */
   1061    for (i = 0; i < ECLASS_N; i++) {
   1062       if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
   1063          BAD("ec2tte_size/ec2tte mismatch(1)");
   1064       if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
   1065          BAD("ec2tte_size/ec2tte mismatch(2)");
   1066       if (sec->ec2tte_used[i] < 0
   1067           || sec->ec2tte_used[i] > sec->ec2tte_size[i])
   1068          BAD("implausible ec2tte_used");
   1069       if (sec->ec2tte_used[i] == 0)
   1070          continue;
   1071 
   1072       /* For each tt reference in each eclass .. ensure the reference
   1073          is to a valid tt entry, and that the entry's address ranges
   1074          really include this eclass. */
   1075 
   1076       for (j = 0; j < sec->ec2tte_used[i]; j++) {
   1077          tteno = sec->ec2tte[i][j];
   1078          if (tteno == EC2TTE_DELETED)
   1079             continue;
   1080          if (tteno >= N_TTES_PER_SECTOR)
   1081             BAD("implausible tteno");
   1082          tte = &sec->tt[tteno];
   1083          if (tte->status != InUse)
   1084             BAD("tteno points to non-inuse tte");
   1085          if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
   1086             BAD("tte->n_tte2ec out of range");
   1087          /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
   1088             must equal i.  Inspect tte's eclass info. */
   1089          n = 0;
   1090          for (k = 0; k < tte->n_tte2ec; k++) {
   1091             if (k < tte->n_tte2ec-1
   1092                 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
   1093                BAD("tte->tte2ec_ec[..] out of order");
   1094             ec_num = tte->tte2ec_ec[k];
   1095             if (ec_num < 0 || ec_num >= ECLASS_N)
   1096                BAD("tte->tte2ec_ec[..] out of range");
   1097             if (ec_num != i)
   1098                continue;
   1099             ec_idx = tte->tte2ec_ix[k];
   1100             if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
   1101                BAD("tte->tte2ec_ix[..] out of range");
   1102             if (ec_idx == j)
   1103                n++;
   1104          }
   1105          if (n != 1)
   1106             BAD("tteno does not point back at eclass");
   1107       }
   1108    }
   1109 
   1110    /* That establishes that for each forward pointer from TTEntrys
   1111       there is a corresponding backward pointer from the eclass[]
   1112       arrays.  However, it doesn't rule out the possibility of other,
   1113       bogus pointers in the eclass[] arrays.  So do those similarly:
   1114       scan through them and check the TTEntryies they point at point
   1115       back. */
   1116 
   1117    for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
   1118 
   1119       tte = &sec->tt[i];
   1120       if (tte->status == Empty || tte->status == Deleted) {
   1121          if (tte->n_tte2ec != 0)
   1122             BAD("tte->n_eclasses nonzero for unused tte");
   1123          continue;
   1124       }
   1125 
   1126       vg_assert(tte->status == InUse);
   1127 
   1128       if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
   1129          BAD("tte->n_eclasses out of range(2)");
   1130 
   1131       for (j = 0; j < tte->n_tte2ec; j++) {
   1132          ec_num = tte->tte2ec_ec[j];
   1133          if (ec_num < 0 || ec_num >= ECLASS_N)
   1134             BAD("tte->eclass[..] out of range");
   1135          ec_idx = tte->tte2ec_ix[j];
   1136          if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
   1137             BAD("tte->ec_idx[..] out of range(2)");
   1138          if (sec->ec2tte[ec_num][ec_idx] != i)
   1139             BAD("ec2tte does not point back to tte");
   1140       }
   1141    }
   1142 
   1143    return True;
   1144 
   1145   bad:
   1146    if (whassup)
   1147       VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
   1148 
   1149 #  if 0
   1150    VG_(printf)("eclass = %d\n", i);
   1151    VG_(printf)("tteno = %d\n", (Int)tteno);
   1152    switch (tte->status) {
   1153       case InUse:   VG_(printf)("InUse\n"); break;
   1154       case Deleted: VG_(printf)("Deleted\n"); break;
   1155       case Empty:   VG_(printf)("Empty\n"); break;
   1156    }
   1157    if (tte->status != Empty) {
   1158       for (k = 0; k < tte->vge.n_used; k++)
   1159          VG_(printf)("0x%llx %d\n", tte->vge.base[k],
   1160                                     (Int)tte->vge.len[k]);
   1161    }
   1162 #  endif
   1163 
   1164    return False;
   1165 
   1166 #  undef BAD
   1167 }
   1168 
   1169 
   1170 /* Sanity check absolutely everything.  True == check passed. */
   1171 
   1172 /* forwards */
   1173 static Bool sanity_check_redir_tt_tc ( void );
   1174 
   1175 static Bool sanity_check_sector_search_order ( void )
   1176 {
   1177    Int i, j, nListed;
   1178    /* assert the array is the right size */
   1179    vg_assert(N_SECTORS == (sizeof(sector_search_order)
   1180                            / sizeof(sector_search_order[0])));
   1181    /* Check it's of the form  valid_sector_numbers ++ [-1, -1, ..] */
   1182    for (i = 0; i < N_SECTORS; i++) {
   1183       if (sector_search_order[i] < 0 || sector_search_order[i] >= N_SECTORS)
   1184          break;
   1185    }
   1186    nListed = i;
   1187    for (/* */; i < N_SECTORS; i++) {
   1188       if (sector_search_order[i] != -1)
   1189          break;
   1190    }
   1191    if (i != N_SECTORS)
   1192       return False;
   1193    /* Check each sector number only appears once */
   1194    for (i = 0; i < N_SECTORS; i++) {
   1195       if (sector_search_order[i] == -1)
   1196          continue;
   1197       for (j = i+1; j < N_SECTORS; j++) {
   1198          if (sector_search_order[j] == sector_search_order[i])
   1199             return False;
   1200       }
   1201    }
   1202    /* Check that the number of listed sectors equals the number
   1203       in use, by counting nListed back down. */
   1204    for (i = 0; i < N_SECTORS; i++) {
   1205       if (sectors[i].tc != NULL)
   1206          nListed--;
   1207    }
   1208    if (nListed != 0)
   1209       return False;
   1210    return True;
   1211 }
   1212 
   1213 static Bool sanity_check_all_sectors ( void )
   1214 {
   1215    Int     sno;
   1216    Bool    sane;
   1217    Sector* sec;
   1218    for (sno = 0; sno < N_SECTORS; sno++) {
   1219       sec = &sectors[sno];
   1220       if (sec->tc == NULL)
   1221          continue;
   1222       sane = sanity_check_eclasses_in_sector( sec );
   1223       if (!sane)
   1224          return False;
   1225    }
   1226    if ( !sanity_check_redir_tt_tc() )
   1227       return False;
   1228    if ( !sanity_check_sector_search_order() )
   1229       return False;
   1230    return True;
   1231 }
   1232 
   1233 
   1234 
   1235 /*-------------------------------------------------------------*/
   1236 /*--- Add/find translations                                 ---*/
   1237 /*-------------------------------------------------------------*/
   1238 
   1239 static UInt vge_osize ( VexGuestExtents* vge )
   1240 {
   1241    UInt i, n = 0;
   1242    for (i = 0; i < vge->n_used; i++)
   1243       n += (UInt)vge->len[i];
   1244    return n;
   1245 }
   1246 
   1247 static Bool isValidSector ( Int sector )
   1248 {
   1249    if (sector < 0 || sector >= N_SECTORS)
   1250       return False;
   1251    return True;
   1252 }
   1253 
   1254 static inline UInt HASH_TT ( Addr64 key )
   1255 {
   1256    UInt kHi = (UInt)(key >> 32);
   1257    UInt kLo = (UInt)key;
   1258    UInt k32 = kHi ^ kLo;
   1259    UInt ror = 7;
   1260    if (ror > 0)
   1261       k32 = (k32 >> ror) | (k32 << (32-ror));
   1262    return k32 % N_TTES_PER_SECTOR;
   1263 }
   1264 
   1265 static void setFastCacheEntry ( Addr64 key, ULong* tcptr )
   1266 {
   1267    UInt cno = (UInt)VG_TT_FAST_HASH(key);
   1268    VG_(tt_fast)[cno].guest = (Addr)key;
   1269    VG_(tt_fast)[cno].host  = (Addr)tcptr;
   1270    n_fast_updates++;
   1271    /* This shouldn't fail.  It should be assured by m_translate
   1272       which should reject any attempt to make translation of code
   1273       starting at TRANSTAB_BOGUS_GUEST_ADDR. */
   1274    vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
   1275 }
   1276 
   1277 /* Invalidate the fast cache VG_(tt_fast). */
   1278 static void invalidateFastCache ( void )
   1279 {
   1280    UInt j;
   1281    /* This loop is popular enough to make it worth unrolling a
   1282       bit, at least on ppc32. */
   1283    vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
   1284    for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
   1285       VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
   1286       VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
   1287       VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
   1288       VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
   1289    }
   1290 
   1291    vg_assert(j == VG_TT_FAST_SIZE);
   1292    n_fast_flushes++;
   1293 }
   1294 
   1295 static void initialiseSector ( Int sno )
   1296 {
   1297    Int     i;
   1298    SysRes  sres;
   1299    Sector* sec;
   1300    vg_assert(isValidSector(sno));
   1301 
   1302    { Bool sane = sanity_check_sector_search_order();
   1303      vg_assert(sane);
   1304    }
   1305    sec = &sectors[sno];
   1306 
   1307    if (sec->tc == NULL) {
   1308 
   1309       /* Sector has never been used before.  Need to allocate tt and
   1310          tc. */
   1311       vg_assert(sec->tt == NULL);
   1312       vg_assert(sec->tc_next == NULL);
   1313       vg_assert(sec->tt_n_inuse == 0);
   1314       for (i = 0; i < ECLASS_N; i++) {
   1315          vg_assert(sec->ec2tte_size[i] == 0);
   1316          vg_assert(sec->ec2tte_used[i] == 0);
   1317          vg_assert(sec->ec2tte[i] == NULL);
   1318       }
   1319       vg_assert(sec->host_extents == NULL);
   1320 
   1321       VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
   1322 
   1323       sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
   1324       if (sr_isError(sres)) {
   1325          VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
   1326                                      8 * tc_sector_szQ );
   1327 	 /*NOTREACHED*/
   1328       }
   1329       sec->tc = (ULong*)(AddrH)sr_Res(sres);
   1330 
   1331       sres = VG_(am_mmap_anon_float_valgrind)
   1332                 ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
   1333       if (sr_isError(sres)) {
   1334          VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
   1335                                      N_TTES_PER_SECTOR * sizeof(TTEntry) );
   1336 	 /*NOTREACHED*/
   1337       }
   1338       sec->tt = (TTEntry*)(AddrH)sr_Res(sres);
   1339 
   1340       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
   1341          sec->tt[i].status   = Empty;
   1342          sec->tt[i].n_tte2ec = 0;
   1343       }
   1344 
   1345       /* Set up the host_extents array. */
   1346       sec->host_extents
   1347          = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
   1348                       ttaux_free,
   1349                       sizeof(HostExtent));
   1350 
   1351       /* Add an entry in the sector_search_order */
   1352       for (i = 0; i < N_SECTORS; i++) {
   1353          if (sector_search_order[i] == -1)
   1354             break;
   1355       }
   1356       vg_assert(i >= 0 && i < N_SECTORS);
   1357       sector_search_order[i] = sno;
   1358 
   1359       if (VG_(clo_verbosity) > 2)
   1360          VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
   1361 
   1362    } else {
   1363 
   1364       /* Sector has been used before.  Dump the old contents. */
   1365       VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
   1366       vg_assert(sec->tt != NULL);
   1367       vg_assert(sec->tc_next != NULL);
   1368       n_dump_count += sec->tt_n_inuse;
   1369 
   1370       VexArch vex_arch = VexArch_INVALID;
   1371       VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
   1372 
   1373       /* Visit each just-about-to-be-abandoned translation. */
   1374       if (0) VG_(printf)("QQQ unlink-entire-sector: %d START\n", sno);
   1375       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
   1376          if (sec->tt[i].status == InUse) {
   1377             vg_assert(sec->tt[i].n_tte2ec >= 1);
   1378             vg_assert(sec->tt[i].n_tte2ec <= 3);
   1379             n_dump_osize += vge_osize(&sec->tt[i].vge);
   1380             /* Tell the tool too. */
   1381             if (VG_(needs).superblock_discards) {
   1382                VG_TDICT_CALL( tool_discard_superblock_info,
   1383                               sec->tt[i].entry,
   1384                               sec->tt[i].vge );
   1385             }
   1386             unchain_in_preparation_for_deletion(vex_arch, sno, i);
   1387          } else {
   1388             vg_assert(sec->tt[i].n_tte2ec == 0);
   1389          }
   1390          sec->tt[i].status   = Empty;
   1391          sec->tt[i].n_tte2ec = 0;
   1392       }
   1393       if (0) VG_(printf)("QQQ unlink-entire-sector: %d END\n", sno);
   1394 
   1395       /* Free up the eclass structures. */
   1396       for (i = 0; i < ECLASS_N; i++) {
   1397          if (sec->ec2tte_size[i] == 0) {
   1398             vg_assert(sec->ec2tte_used[i] == 0);
   1399             vg_assert(sec->ec2tte[i] == NULL);
   1400          } else {
   1401             vg_assert(sec->ec2tte[i] != NULL);
   1402             ttaux_free(sec->ec2tte[i]);
   1403             sec->ec2tte[i] = NULL;
   1404             sec->ec2tte_size[i] = 0;
   1405             sec->ec2tte_used[i] = 0;
   1406          }
   1407       }
   1408 
   1409       /* Empty out the host extents array. */
   1410       vg_assert(sec->host_extents != NULL);
   1411       VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
   1412       vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
   1413 
   1414       /* Sanity check: ensure it is already in
   1415          sector_search_order[]. */
   1416       for (i = 0; i < N_SECTORS; i++) {
   1417          if (sector_search_order[i] == sno)
   1418             break;
   1419       }
   1420       vg_assert(i >= 0 && i < N_SECTORS);
   1421 
   1422       if (VG_(clo_verbosity) > 2)
   1423          VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
   1424    }
   1425 
   1426    sec->tc_next = sec->tc;
   1427    sec->tt_n_inuse = 0;
   1428 
   1429    invalidateFastCache();
   1430 
   1431    { Bool sane = sanity_check_sector_search_order();
   1432      vg_assert(sane);
   1433    }
   1434 }
   1435 
   1436 
   1437 /* Add a translation of vge to TT/TC.  The translation is temporarily
   1438    in code[0 .. code_len-1].
   1439 
   1440    pre: youngest_sector points to a valid (although possibly full)
   1441    sector.
   1442 */
   1443 void VG_(add_to_transtab)( VexGuestExtents* vge,
   1444                            Addr64           entry,
   1445                            AddrH            code,
   1446                            UInt             code_len,
   1447                            Bool             is_self_checking,
   1448                            Int              offs_profInc,
   1449                            UInt             n_guest_instrs,
   1450                            VexArch          arch_host )
   1451 {
   1452    Int    tcAvailQ, reqdQ, y, i;
   1453    ULong  *tcptr, *tcptr2;
   1454    UChar* srcP;
   1455    UChar* dstP;
   1456 
   1457    vg_assert(init_done);
   1458    vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
   1459 
   1460    /* 60000: should agree with N_TMPBUF in m_translate.c. */
   1461    vg_assert(code_len > 0 && code_len < 60000);
   1462 
   1463    /* Generally stay sane */
   1464    vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
   1465 
   1466    if (0)
   1467       VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n",
   1468                   entry, code_len);
   1469 
   1470    n_in_count++;
   1471    n_in_tsize += code_len;
   1472    n_in_osize += vge_osize(vge);
   1473    if (is_self_checking)
   1474       n_in_sc_count++;
   1475 
   1476    y = youngest_sector;
   1477    vg_assert(isValidSector(y));
   1478 
   1479    if (sectors[y].tc == NULL)
   1480       initialiseSector(y);
   1481 
   1482    /* Try putting the translation in this sector. */
   1483    reqdQ = (code_len + 7) >> 3;
   1484 
   1485    /* Will it fit in tc? */
   1486    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
   1487               - ((ULong*)(sectors[y].tc_next));
   1488    vg_assert(tcAvailQ >= 0);
   1489    vg_assert(tcAvailQ <= tc_sector_szQ);
   1490 
   1491    if (tcAvailQ < reqdQ
   1492        || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
   1493       /* No.  So move on to the next sector.  Either it's never been
   1494          used before, in which case it will get its tt/tc allocated
   1495          now, or it has been used before, in which case it is set to be
   1496          empty, hence throwing out the oldest sector. */
   1497       vg_assert(tc_sector_szQ > 0);
   1498       VG_(debugLog)(1,"transtab",
   1499                       "declare sector %d full "
   1500                       "(TT loading %2d%%, TC loading %2d%%)\n",
   1501                       y,
   1502                       (100 * sectors[y].tt_n_inuse)
   1503                          / N_TTES_PER_SECTOR,
   1504                       (100 * (tc_sector_szQ - tcAvailQ))
   1505                          / tc_sector_szQ);
   1506       youngest_sector++;
   1507       if (youngest_sector >= N_SECTORS)
   1508          youngest_sector = 0;
   1509       y = youngest_sector;
   1510       initialiseSector(y);
   1511    }
   1512 
   1513    /* Be sure ... */
   1514    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
   1515               - ((ULong*)(sectors[y].tc_next));
   1516    vg_assert(tcAvailQ >= 0);
   1517    vg_assert(tcAvailQ <= tc_sector_szQ);
   1518    vg_assert(tcAvailQ >= reqdQ);
   1519    vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
   1520    vg_assert(sectors[y].tt_n_inuse >= 0);
   1521 
   1522    /* Copy into tc. */
   1523    tcptr = sectors[y].tc_next;
   1524    vg_assert(tcptr >= &sectors[y].tc[0]);
   1525    vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
   1526 
   1527    dstP = (UChar*)tcptr;
   1528    srcP = (UChar*)code;
   1529    VG_(memcpy)(dstP, srcP, code_len);
   1530    sectors[y].tc_next += reqdQ;
   1531    sectors[y].tt_n_inuse++;
   1532 
   1533    /* more paranoia */
   1534    tcptr2 = sectors[y].tc_next;
   1535    vg_assert(tcptr2 >= &sectors[y].tc[0]);
   1536    vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
   1537 
   1538    /* Find an empty tt slot, and use it.  There must be such a slot
   1539       since tt is never allowed to get completely full. */
   1540    i = HASH_TT(entry);
   1541    vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
   1542    while (True) {
   1543       if (sectors[y].tt[i].status == Empty
   1544           || sectors[y].tt[i].status == Deleted)
   1545          break;
   1546       i++;
   1547       if (i >= N_TTES_PER_SECTOR)
   1548          i = 0;
   1549    }
   1550 
   1551    TTEntry__init(&sectors[y].tt[i]);
   1552    sectors[y].tt[i].status = InUse;
   1553    sectors[y].tt[i].tcptr  = tcptr;
   1554    sectors[y].tt[i].count  = 0;
   1555    sectors[y].tt[i].weight = n_guest_instrs == 0 ? 1 : n_guest_instrs;
   1556    sectors[y].tt[i].vge    = *vge;
   1557    sectors[y].tt[i].entry  = entry;
   1558 
   1559    /* Patch in the profile counter location, if necessary. */
   1560    if (offs_profInc != -1) {
   1561       vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
   1562       VexInvalRange vir
   1563          = LibVEX_PatchProfInc( arch_host,
   1564                                 dstP + offs_profInc,
   1565                                 &sectors[y].tt[i].count );
   1566       VG_(invalidate_icache)( (void*)vir.start, vir.len );
   1567    }
   1568 
   1569    VG_(invalidate_icache)( dstP, code_len );
   1570 
   1571    /* Add this entry to the host_extents map, checking that we're
   1572       adding in order. */
   1573    { HostExtent hx;
   1574      hx.start = (UChar*)tcptr;
   1575      hx.len   = code_len;
   1576      hx.tteNo = i;
   1577      vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
   1578      XArray* hx_array = sectors[y].host_extents;
   1579      vg_assert(hx_array);
   1580      Word n = VG_(sizeXA)(hx_array);
   1581      if (n > 0) {
   1582         HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
   1583         vg_assert(hx_prev->start + hx_prev->len <= hx.start);
   1584      }
   1585      VG_(addToXA)(hx_array, &hx);
   1586    }
   1587 
   1588    /* Update the fast-cache. */
   1589    setFastCacheEntry( entry, tcptr );
   1590 
   1591    /* Note the eclass numbers for this translation. */
   1592    upd_eclasses_after_add( &sectors[y], i );
   1593 }
   1594 
   1595 
   1596 /* Search for the translation of the given guest address.  If
   1597    requested, a successful search can also cause the fast-caches to be
   1598    updated.
   1599 */
   1600 Bool VG_(search_transtab) ( /*OUT*/AddrH* res_hcode,
   1601                             /*OUT*/UInt*  res_sNo,
   1602                             /*OUT*/UInt*  res_tteNo,
   1603                             Addr64        guest_addr,
   1604                             Bool          upd_cache )
   1605 {
   1606    Int i, j, k, kstart, sno;
   1607 
   1608    vg_assert(init_done);
   1609    /* Find the initial probe point just once.  It will be the same in
   1610       all sectors and avoids multiple expensive % operations. */
   1611    n_full_lookups++;
   1612    k      = -1;
   1613    kstart = HASH_TT(guest_addr);
   1614    vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
   1615 
   1616    /* Search in all the sectors,using sector_search_order[] as a
   1617       heuristic guide as to what order to visit the sectors. */
   1618    for (i = 0; i < N_SECTORS; i++) {
   1619 
   1620       sno = sector_search_order[i];
   1621       if (UNLIKELY(sno == -1))
   1622          return False; /* run out of sectors to search */
   1623 
   1624       k = kstart;
   1625       for (j = 0; j < N_TTES_PER_SECTOR; j++) {
   1626          n_lookup_probes++;
   1627          if (sectors[sno].tt[k].status == InUse
   1628              && sectors[sno].tt[k].entry == guest_addr) {
   1629             /* found it */
   1630             if (upd_cache)
   1631                setFastCacheEntry(
   1632                   guest_addr, sectors[sno].tt[k].tcptr );
   1633             if (res_hcode)
   1634                *res_hcode = (AddrH)sectors[sno].tt[k].tcptr;
   1635             if (res_sNo)
   1636                *res_sNo = sno;
   1637             if (res_tteNo)
   1638                *res_tteNo = k;
   1639             /* pull this one one step closer to the front.  For large
   1640                apps this more or less halves the number of required
   1641                probes. */
   1642             if (i > 0) {
   1643                Int tmp = sector_search_order[i-1];
   1644                sector_search_order[i-1] = sector_search_order[i];
   1645                sector_search_order[i] = tmp;
   1646             }
   1647             return True;
   1648          }
   1649          if (sectors[sno].tt[k].status == Empty)
   1650             break; /* not found in this sector */
   1651          k++;
   1652          if (k == N_TTES_PER_SECTOR)
   1653             k = 0;
   1654       }
   1655 
   1656       /* If we fall off the end, all entries are InUse and not
   1657          matching, or Deleted.  In any case we did not find it in this
   1658          sector. */
   1659    }
   1660 
   1661    /* Not found in any sector. */
   1662    return False;
   1663 }
   1664 
   1665 
   1666 /*-------------------------------------------------------------*/
   1667 /*--- Delete translations.                                  ---*/
   1668 /*-------------------------------------------------------------*/
   1669 
   1670 /* forward */
   1671 static void unredir_discard_translations( Addr64, ULong );
   1672 
   1673 /* Stuff for deleting translations which intersect with a given
   1674    address range.  Unfortunately, to make this run at a reasonable
   1675    speed, it is complex. */
   1676 
   1677 static inline
   1678 Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
   1679 {
   1680    Addr64 e1 = s1 + r1 - 1ULL;
   1681    Addr64 e2 = s2 + r2 - 1ULL;
   1682    if (e1 < s2 || e2 < s1)
   1683       return False;
   1684    return True;
   1685 }
   1686 
   1687 static inline
   1688 Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
   1689 {
   1690    if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
   1691       return True;
   1692    if (vge->n_used < 2)
   1693       return False;
   1694    if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
   1695       return True;
   1696    if (vge->n_used < 3)
   1697       return False;
   1698    if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
   1699       return True;
   1700    return False;
   1701 }
   1702 
   1703 
   1704 /* Delete a tt entry, and update all the eclass data accordingly. */
   1705 
   1706 static void delete_tte ( /*MOD*/Sector* sec, UInt secNo, Int tteno,
   1707                          VexArch vex_arch )
   1708 {
   1709    Int      i, ec_num, ec_idx;
   1710    TTEntry* tte;
   1711 
   1712    /* sec and secNo are mutually redundant; cross-check. */
   1713    vg_assert(sec == &sectors[secNo]);
   1714 
   1715    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
   1716    tte = &sec->tt[tteno];
   1717    vg_assert(tte->status == InUse);
   1718    vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
   1719 
   1720    /* Unchain .. */
   1721    unchain_in_preparation_for_deletion(vex_arch, secNo, tteno);
   1722 
   1723    /* Deal with the ec-to-tte links first. */
   1724    for (i = 0; i < tte->n_tte2ec; i++) {
   1725       ec_num = (Int)tte->tte2ec_ec[i];
   1726       ec_idx = tte->tte2ec_ix[i];
   1727       vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
   1728       vg_assert(ec_idx >= 0);
   1729       vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
   1730       /* Assert that the two links point at each other. */
   1731       vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
   1732       /* "delete" the pointer back to here. */
   1733       sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
   1734    }
   1735 
   1736    /* Now fix up this TTEntry. */
   1737    tte->status   = Deleted;
   1738    tte->n_tte2ec = 0;
   1739 
   1740    /* Stats .. */
   1741    sec->tt_n_inuse--;
   1742    n_disc_count++;
   1743    n_disc_osize += vge_osize(&tte->vge);
   1744 
   1745    /* Tell the tool too. */
   1746    if (VG_(needs).superblock_discards) {
   1747       VG_TDICT_CALL( tool_discard_superblock_info,
   1748                      tte->entry,
   1749                      tte->vge );
   1750    }
   1751 }
   1752 
   1753 
   1754 /* Delete translations from sec which intersect specified range, but
   1755    only consider translations in the specified eclass. */
   1756 
   1757 static
   1758 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, UInt secNo,
   1759                                             Addr64 guest_start, ULong range,
   1760                                             Int ec,
   1761                                             VexArch vex_arch )
   1762 {
   1763    Int      i;
   1764    UShort   tteno;
   1765    Bool     anyDeld = False;
   1766    TTEntry* tte;
   1767 
   1768    vg_assert(ec >= 0 && ec < ECLASS_N);
   1769 
   1770    for (i = 0; i < sec->ec2tte_used[ec]; i++) {
   1771 
   1772       tteno = sec->ec2tte[ec][i];
   1773       if (tteno == EC2TTE_DELETED) {
   1774          /* already deleted */
   1775          continue;
   1776       }
   1777 
   1778       vg_assert(tteno < N_TTES_PER_SECTOR);
   1779 
   1780       tte = &sec->tt[tteno];
   1781       vg_assert(tte->status == InUse);
   1782 
   1783       if (overlaps( guest_start, range, &tte->vge )) {
   1784          anyDeld = True;
   1785          delete_tte( sec, secNo, (Int)tteno, vex_arch );
   1786       }
   1787 
   1788    }
   1789 
   1790    return anyDeld;
   1791 }
   1792 
   1793 
   1794 /* Delete translations from sec which intersect specified range, the
   1795    slow way, by inspecting all translations in sec. */
   1796 
   1797 static
   1798 Bool delete_translations_in_sector ( /*MOD*/Sector* sec, UInt secNo,
   1799                                      Addr64 guest_start, ULong range,
   1800                                      VexArch vex_arch )
   1801 {
   1802    Int  i;
   1803    Bool anyDeld = False;
   1804 
   1805    for (i = 0; i < N_TTES_PER_SECTOR; i++) {
   1806       if (sec->tt[i].status == InUse
   1807           && overlaps( guest_start, range, &sec->tt[i].vge )) {
   1808          anyDeld = True;
   1809          delete_tte( sec, secNo, i, vex_arch );
   1810       }
   1811    }
   1812 
   1813    return anyDeld;
   1814 }
   1815 
   1816 
   1817 void VG_(discard_translations) ( Addr64 guest_start, ULong range,
   1818                                  HChar* who )
   1819 {
   1820    Sector* sec;
   1821    Int     sno, ec;
   1822    Bool    anyDeleted = False;
   1823 
   1824    vg_assert(init_done);
   1825 
   1826    VG_(debugLog)(2, "transtab",
   1827                     "discard_translations(0x%llx, %lld) req by %s\n",
   1828                     guest_start, range, who );
   1829 
   1830    /* Pre-deletion sanity check */
   1831    if (VG_(clo_sanity_level >= 4)) {
   1832       Bool sane = sanity_check_all_sectors();
   1833       vg_assert(sane);
   1834    }
   1835 
   1836    if (range == 0)
   1837       return;
   1838 
   1839    VexArch vex_arch = VexArch_INVALID;
   1840    VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
   1841 
   1842    /* There are two different ways to do this.
   1843 
   1844       If the range fits within a single address-range equivalence
   1845       class, as will be the case for a cache line sized invalidation,
   1846       then we only have to inspect the set of translations listed in
   1847       that equivalence class, and also in the "sin-bin" equivalence
   1848       class ECLASS_MISC.
   1849 
   1850       Otherwise, the invalidation is of a larger range and probably
   1851       results from munmap.  In this case it's (probably!) faster just
   1852       to inspect all translations, dump those we don't want, and
   1853       regenerate the equivalence class information (since modifying it
   1854       in-situ is even more expensive).
   1855    */
   1856 
   1857    /* First off, figure out if the range falls within a single class,
   1858       and if so which one. */
   1859 
   1860    ec = ECLASS_MISC;
   1861    if (range < (1ULL << ECLASS_SHIFT))
   1862       ec = range_to_eclass( guest_start, (UInt)range );
   1863 
   1864    /* if ec is ECLASS_MISC then we aren't looking at just a single
   1865       class, so use the slow scheme.  Else use the fast scheme,
   1866       examining 'ec' and ECLASS_MISC. */
   1867 
   1868    if (ec != ECLASS_MISC) {
   1869 
   1870       VG_(debugLog)(2, "transtab",
   1871                        "                    FAST, ec = %d\n", ec);
   1872 
   1873       /* Fast scheme */
   1874       vg_assert(ec >= 0 && ec < ECLASS_MISC);
   1875 
   1876       for (sno = 0; sno < N_SECTORS; sno++) {
   1877          sec = &sectors[sno];
   1878          if (sec->tc == NULL)
   1879             continue;
   1880          anyDeleted |= delete_translations_in_sector_eclass(
   1881                           sec, sno, guest_start, range, ec,
   1882                           vex_arch
   1883                        );
   1884          anyDeleted |= delete_translations_in_sector_eclass(
   1885                           sec, sno, guest_start, range, ECLASS_MISC,
   1886                           vex_arch
   1887                        );
   1888       }
   1889 
   1890    } else {
   1891 
   1892       /* slow scheme */
   1893 
   1894       VG_(debugLog)(2, "transtab",
   1895                        "                    SLOW, ec = %d\n", ec);
   1896 
   1897       for (sno = 0; sno < N_SECTORS; sno++) {
   1898          sec = &sectors[sno];
   1899          if (sec->tc == NULL)
   1900             continue;
   1901          anyDeleted |= delete_translations_in_sector(
   1902                           sec, sno, guest_start, range, vex_arch );
   1903       }
   1904 
   1905    }
   1906 
   1907    if (anyDeleted)
   1908       invalidateFastCache();
   1909 
   1910    /* don't forget the no-redir cache */
   1911    unredir_discard_translations( guest_start, range );
   1912 
   1913    /* Post-deletion sanity check */
   1914    if (VG_(clo_sanity_level >= 4)) {
   1915       Int      i;
   1916       TTEntry* tte;
   1917       Bool     sane = sanity_check_all_sectors();
   1918       vg_assert(sane);
   1919       /* But now, also check the requested address range isn't
   1920          present anywhere. */
   1921       for (sno = 0; sno < N_SECTORS; sno++) {
   1922          sec = &sectors[sno];
   1923          if (sec->tc == NULL)
   1924             continue;
   1925          for (i = 0; i < N_TTES_PER_SECTOR; i++) {
   1926             tte = &sec->tt[i];
   1927             if (tte->status != InUse)
   1928                continue;
   1929             vg_assert(!overlaps( guest_start, range, &tte->vge ));
   1930          }
   1931       }
   1932    }
   1933 }
   1934 
   1935 
   1936 /*------------------------------------------------------------*/
   1937 /*--- AUXILIARY: the unredirected TT/TC                    ---*/
   1938 /*------------------------------------------------------------*/
   1939 
   1940 /* A very simple translation cache which holds a small number of
   1941    unredirected translations.  This is completely independent of the
   1942    main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
   1943    both structures are simply dumped and we start over.
   1944 
   1945    Since these translations are unredirected, the search key is (by
   1946    definition) the first address entry in the .vge field. */
   1947 
   1948 /* Sized to hold 500 translations of average size 1000 bytes. */
   1949 
   1950 #define UNREDIR_SZB   1000
   1951 
   1952 #define N_UNREDIR_TT  500
   1953 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
   1954 
   1955 typedef
   1956    struct {
   1957       VexGuestExtents vge;
   1958       Addr            hcode;
   1959       Bool            inUse;
   1960    }
   1961    UTCEntry;
   1962 
   1963 /* We just allocate forwards in _tc, never deleting. */
   1964 static ULong    *unredir_tc;
   1965 static Int      unredir_tc_used = N_UNREDIR_TCQ;
   1966 
   1967 /* Slots in _tt can come into use and out again (.inUse).
   1968    Nevertheless _tt_highwater is maintained so that invalidations
   1969    don't have to scan all the slots when only a few are in use.
   1970    _tt_highwater holds the index of the highest ever allocated
   1971    slot. */
   1972 static UTCEntry unredir_tt[N_UNREDIR_TT];
   1973 static Int      unredir_tt_highwater;
   1974 
   1975 
   1976 static void init_unredir_tt_tc ( void )
   1977 {
   1978    Int i;
   1979    if (unredir_tc == NULL) {
   1980       SysRes sres = VG_(am_mmap_anon_float_valgrind)
   1981                        ( N_UNREDIR_TT * UNREDIR_SZB );
   1982       if (sr_isError(sres)) {
   1983          VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
   1984                                      N_UNREDIR_TT * UNREDIR_SZB);
   1985          /*NOTREACHED*/
   1986       }
   1987       unredir_tc = (ULong *)(AddrH)sr_Res(sres);
   1988    }
   1989    unredir_tc_used = 0;
   1990    for (i = 0; i < N_UNREDIR_TT; i++)
   1991       unredir_tt[i].inUse = False;
   1992    unredir_tt_highwater = -1;
   1993 }
   1994 
   1995 /* Do a sanity check; return False on failure. */
   1996 static Bool sanity_check_redir_tt_tc ( void )
   1997 {
   1998    Int i;
   1999    if (unredir_tt_highwater < -1) return False;
   2000    if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
   2001 
   2002    for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
   2003       if (unredir_tt[i].inUse)
   2004          return False;
   2005 
   2006    if (unredir_tc_used < 0) return False;
   2007    if (unredir_tc_used > N_UNREDIR_TCQ) return False;
   2008 
   2009    return True;
   2010 }
   2011 
   2012 
   2013 /* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
   2014    is temporarily in code[0 .. code_len-1].
   2015 */
   2016 void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
   2017                                    Addr64           entry,
   2018                                    AddrH            code,
   2019                                    UInt             code_len )
   2020 {
   2021    Int   i, j, code_szQ;
   2022    HChar *srcP, *dstP;
   2023 
   2024    vg_assert(sanity_check_redir_tt_tc());
   2025 
   2026    /* This is the whole point: it's not redirected! */
   2027    vg_assert(entry == vge->base[0]);
   2028 
   2029    /* How many unredir_tt slots are needed */
   2030    code_szQ = (code_len + 7) / 8;
   2031 
   2032    /* Look for an empty unredir_tc slot */
   2033    for (i = 0; i < N_UNREDIR_TT; i++)
   2034       if (!unredir_tt[i].inUse)
   2035          break;
   2036 
   2037    if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
   2038       /* It's full; dump everything we currently have */
   2039       init_unredir_tt_tc();
   2040       i = 0;
   2041    }
   2042 
   2043    vg_assert(unredir_tc_used >= 0);
   2044    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
   2045    vg_assert(code_szQ > 0);
   2046    vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
   2047    vg_assert(i >= 0 && i < N_UNREDIR_TT);
   2048    vg_assert(unredir_tt[i].inUse == False);
   2049 
   2050    if (i > unredir_tt_highwater)
   2051       unredir_tt_highwater = i;
   2052 
   2053    dstP = (HChar*)&unredir_tc[unredir_tc_used];
   2054    srcP = (HChar*)code;
   2055    for (j = 0; j < code_len; j++)
   2056       dstP[j] = srcP[j];
   2057 
   2058    VG_(invalidate_icache)( dstP, code_len );
   2059 
   2060    unredir_tt[i].inUse = True;
   2061    unredir_tt[i].vge   = *vge;
   2062    unredir_tt[i].hcode = (Addr)dstP;
   2063 
   2064    unredir_tc_used += code_szQ;
   2065    vg_assert(unredir_tc_used >= 0);
   2066    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
   2067 
   2068    vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
   2069 }
   2070 
   2071 Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
   2072                                     Addr64        guest_addr )
   2073 {
   2074    Int i;
   2075    for (i = 0; i < N_UNREDIR_TT; i++) {
   2076       if (!unredir_tt[i].inUse)
   2077          continue;
   2078       if (unredir_tt[i].vge.base[0] == guest_addr) {
   2079          *result = (AddrH)unredir_tt[i].hcode;
   2080          return True;
   2081       }
   2082    }
   2083    return False;
   2084 }
   2085 
   2086 static void unredir_discard_translations( Addr64 guest_start, ULong range )
   2087 {
   2088    Int i;
   2089 
   2090    vg_assert(sanity_check_redir_tt_tc());
   2091 
   2092    for (i = 0; i <= unredir_tt_highwater; i++) {
   2093       if (unredir_tt[i].inUse
   2094           && overlaps( guest_start, range, &unredir_tt[i].vge))
   2095          unredir_tt[i].inUse = False;
   2096    }
   2097 }
   2098 
   2099 
   2100 /*------------------------------------------------------------*/
   2101 /*--- Initialisation.                                      ---*/
   2102 /*------------------------------------------------------------*/
   2103 
   2104 void VG_(init_tt_tc) ( void )
   2105 {
   2106    Int i, j, avg_codeszQ;
   2107 
   2108    vg_assert(!init_done);
   2109    init_done = True;
   2110 
   2111    /* Otherwise lots of things go wrong... */
   2112    vg_assert(sizeof(ULong) == 8);
   2113    vg_assert(sizeof(Addr64) == 8);
   2114    /* check fast cache entries really are 2 words long */
   2115    vg_assert(sizeof(Addr) == sizeof(void*));
   2116    vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
   2117    /* check fast cache entries are packed back-to-back with no spaces */
   2118    vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
   2119    /* check fast cache is aligned as we requested.  Not fatal if it
   2120       isn't, but we might as well make sure. */
   2121    vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
   2122 
   2123    if (VG_(clo_verbosity) > 2)
   2124       VG_(message)(Vg_DebugMsg,
   2125                    "TT/TC: VG_(init_tt_tc) "
   2126                    "(startup of code management)\n");
   2127 
   2128    /* Figure out how big each tc area should be.  */
   2129    avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
   2130    tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
   2131 
   2132    /* Ensure the calculated value is not way crazy. */
   2133    vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
   2134    vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR_USABLE);
   2135 
   2136    /* Initialise the sectors */
   2137    youngest_sector = 0;
   2138    for (i = 0; i < N_SECTORS; i++) {
   2139       sectors[i].tc = NULL;
   2140       sectors[i].tt = NULL;
   2141       sectors[i].tc_next = NULL;
   2142       sectors[i].tt_n_inuse = 0;
   2143       for (j = 0; j < ECLASS_N; j++) {
   2144          sectors[i].ec2tte_size[j] = 0;
   2145          sectors[i].ec2tte_used[j] = 0;
   2146          sectors[i].ec2tte[j] = NULL;
   2147       }
   2148       sectors[i].host_extents = NULL;
   2149    }
   2150 
   2151    /* Initialise the sector_search_order hint table. */
   2152    for (i = 0; i < N_SECTORS; i++)
   2153       sector_search_order[i] = -1;
   2154 
   2155    /* Initialise the fast cache. */
   2156    invalidateFastCache();
   2157 
   2158    /* and the unredir tt/tc */
   2159    init_unredir_tt_tc();
   2160 
   2161    if (VG_(clo_verbosity) > 2) {
   2162       VG_(message)(Vg_DebugMsg,
   2163          "TT/TC: cache: %d sectors of %d bytes each = %d total\n",
   2164           N_SECTORS, 8 * tc_sector_szQ,
   2165           N_SECTORS * 8 * tc_sector_szQ );
   2166       VG_(message)(Vg_DebugMsg,
   2167          "TT/TC: table: %d total entries, max occupancy %d (%d%%)\n",
   2168          N_SECTORS * N_TTES_PER_SECTOR,
   2169          N_SECTORS * N_TTES_PER_SECTOR_USABLE,
   2170          SECTOR_TT_LIMIT_PERCENT );
   2171    }
   2172 
   2173    VG_(debugLog)(2, "transtab",
   2174       "cache: %d sectors of %d bytes each = %d total\n",
   2175        N_SECTORS, 8 * tc_sector_szQ,
   2176        N_SECTORS * 8 * tc_sector_szQ );
   2177    VG_(debugLog)(2, "transtab",
   2178       "table: %d total entries, max occupancy %d (%d%%)\n",
   2179       N_SECTORS * N_TTES_PER_SECTOR,
   2180       N_SECTORS * N_TTES_PER_SECTOR_USABLE,
   2181       SECTOR_TT_LIMIT_PERCENT );
   2182 }
   2183 
   2184 
   2185 /*------------------------------------------------------------*/
   2186 /*--- Printing out statistics.                             ---*/
   2187 /*------------------------------------------------------------*/
   2188 
   2189 static ULong safe_idiv( ULong a, ULong b )
   2190 {
   2191    return (b == 0 ? 0 : a / b);
   2192 }
   2193 
   2194 UInt VG_(get_bbs_translated) ( void )
   2195 {
   2196    return n_in_count;
   2197 }
   2198 
   2199 void VG_(print_tt_tc_stats) ( void )
   2200 {
   2201    VG_(message)(Vg_DebugMsg,
   2202       "    tt/tc: %'llu tt lookups requiring %'llu probes\n",
   2203       n_full_lookups, n_lookup_probes );
   2204    VG_(message)(Vg_DebugMsg,
   2205       "    tt/tc: %'llu fast-cache updates, %'llu flushes\n",
   2206       n_fast_updates, n_fast_flushes );
   2207 
   2208    VG_(message)(Vg_DebugMsg,
   2209                 " transtab: new        %'lld "
   2210                 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n",
   2211                 n_in_count, n_in_osize, n_in_tsize,
   2212                 safe_idiv(10*n_in_tsize, n_in_osize),
   2213                 n_in_sc_count);
   2214    VG_(message)(Vg_DebugMsg,
   2215                 " transtab: dumped     %'llu (%'llu -> ?" "?)\n",
   2216                 n_dump_count, n_dump_osize );
   2217    VG_(message)(Vg_DebugMsg,
   2218                 " transtab: discarded  %'llu (%'llu -> ?" "?)\n",
   2219                 n_disc_count, n_disc_osize );
   2220 
   2221    if (0) {
   2222       Int i;
   2223       VG_(printf)("\n");
   2224       for (i = 0; i < ECLASS_N; i++) {
   2225          VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
   2226          if (i % 16 == 15)
   2227             VG_(printf)("\n");
   2228       }
   2229       VG_(printf)("\n\n");
   2230    }
   2231 }
   2232 
   2233 /*------------------------------------------------------------*/
   2234 /*--- Printing out of profiling results.                   ---*/
   2235 /*------------------------------------------------------------*/
   2236 
   2237 static ULong score ( TTEntry* tte )
   2238 {
   2239    return ((ULong)tte->weight) * ((ULong)tte->count);
   2240 }
   2241 
   2242 ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops )
   2243 {
   2244    Int   sno, i, r, s;
   2245    ULong score_total;
   2246 
   2247    /* First, compute the total weighted count, and find the top N
   2248       ttes.  tops contains pointers to the most-used n_tops blocks, in
   2249       descending order (viz, tops[0] is the highest scorer). */
   2250    for (i = 0; i < n_tops; i++) {
   2251       tops[i].addr  = 0;
   2252       tops[i].score = 0;
   2253    }
   2254 
   2255    score_total = 0;
   2256 
   2257    for (sno = 0; sno < N_SECTORS; sno++) {
   2258       if (sectors[sno].tc == NULL)
   2259          continue;
   2260       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
   2261          if (sectors[sno].tt[i].status != InUse)
   2262             continue;
   2263          score_total += score(&sectors[sno].tt[i]);
   2264          /* Find the rank for sectors[sno].tt[i]. */
   2265          r = n_tops-1;
   2266          while (True) {
   2267             if (r == -1)
   2268                break;
   2269              if (tops[r].addr == 0) {
   2270                r--;
   2271                continue;
   2272              }
   2273              if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
   2274                 r--;
   2275                 continue;
   2276              }
   2277              break;
   2278          }
   2279          r++;
   2280          vg_assert(r >= 0 && r <= n_tops);
   2281          /* This bb should be placed at r, and bbs above it shifted
   2282             upwards one slot. */
   2283          if (r < n_tops) {
   2284             for (s = n_tops-1; s > r; s--)
   2285                tops[s] = tops[s-1];
   2286             tops[r].addr  = sectors[sno].tt[i].entry;
   2287             tops[r].score = score( &sectors[sno].tt[i] );
   2288          }
   2289       }
   2290    }
   2291 
   2292    return score_total;
   2293 }
   2294 
   2295 /*--------------------------------------------------------------------*/
   2296 /*--- end                                                          ---*/
   2297 /*--------------------------------------------------------------------*/
   2298