Home | History | Annotate | Download | only in exp-sgcheck
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Ptrcheck: a pointer-use checker.                             ---*/
      4 /*--- This file checks stack and global array accesses.            ---*/
      5 /*---                                                    sg_main.c ---*/
      6 /*--------------------------------------------------------------------*/
      7 
      8 /*
      9    This file is part of Ptrcheck, a Valgrind tool for checking pointer
     10    use in programs.
     11 
     12    Copyright (C) 2008-2015 OpenWorks Ltd
     13       info (at) open-works.co.uk
     14 
     15    This program is free software; you can redistribute it and/or
     16    modify it under the terms of the GNU General Public License as
     17    published by the Free Software Foundation; either version 2 of the
     18    License, or (at your option) any later version.
     19 
     20    This program is distributed in the hope that it will be useful, but
     21    WITHOUT ANY WARRANTY; without even the implied warranty of
     22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23    General Public License for more details.
     24 
     25    You should have received a copy of the GNU General Public License
     26    along with this program; if not, write to the Free Software
     27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     28    02111-1307, USA.
     29 
     30    The GNU General Public License is contained in the file COPYING.
     31 
     32    Neither the names of the U.S. Department of Energy nor the
     33    University of California nor the names of its contributors may be
     34    used to endorse or promote products derived from this software
     35    without prior written permission.
     36 */
     37 
     38 #include "pub_tool_basics.h"
     39 #include "pub_tool_libcbase.h"
     40 #include "pub_tool_libcassert.h"
     41 #include "pub_tool_libcprint.h"
     42 #include "pub_tool_tooliface.h"
     43 #include "pub_tool_wordfm.h"
     44 #include "pub_tool_xarray.h"
     45 #include "pub_tool_threadstate.h"
     46 #include "pub_tool_mallocfree.h"
     47 #include "pub_tool_machine.h"
     48 #include "pub_tool_debuginfo.h"
     49 #include "pub_tool_options.h"
     50 
     51 #include "pc_common.h"
     52 
     53 #include "sg_main.h"      // self
     54 
     55 
     56 static
     57 void preen_global_Invars ( Addr a, SizeT len ); /*fwds*/
     58 
     59 
     60 //////////////////////////////////////////////////////////////
     61 //                                                          //
     62 // Basic Stuff                                              //
     63 //                                                          //
     64 //////////////////////////////////////////////////////////////
     65 
     66 static inline Bool is_sane_TId ( ThreadId tid )
     67 {
     68    return tid >= 0 && tid < VG_N_THREADS
     69           && tid != VG_INVALID_THREADID;
     70 }
     71 
     72 static void* sg_malloc ( const HChar* cc, SizeT n ) {
     73    void* p;
     74    tl_assert(n > 0);
     75    p = VG_(malloc)( cc, n );
     76    return p;
     77 }
     78 
     79 static void sg_free ( void* p ) {
     80    tl_assert(p);
     81    VG_(free)(p);
     82 }
     83 
     84 
     85 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2).  Return -1 if the
     86    first interval is lower, 1 if the first interval is higher, and 0
     87    if there is any overlap.  Redundant paranoia with casting is there
     88    following what looked distinctly like a bug in gcc-4.1.2, in which
     89    some of the comparisons were done signedly instead of
     90    unsignedly. */
     91 inline
     92 static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
     93                                      Addr a2, SizeT n2 ) {
     94    UWord a1w = (UWord)a1;
     95    UWord n1w = (UWord)n1;
     96    UWord a2w = (UWord)a2;
     97    UWord n2w = (UWord)n2;
     98    tl_assert(n1w > 0 && n2w > 0);
     99    if (a1w + n1w <= a2w) return -1L;
    100    if (a2w + n2w <= a1w) return 1L;
    101    return 0;
    102 }
    103 
    104 /* Return true iff [aSmall,aSmall+nSmall) is entirely contained
    105    within [aBig,aBig+nBig). */
    106 inline
    107 static Bool is_subinterval_of ( Addr aBig, SizeT nBig,
    108                                 Addr aSmall, SizeT nSmall ) {
    109    tl_assert(nBig > 0 && nSmall > 0);
    110    return aBig <= aSmall && aSmall + nSmall <= aBig + nBig;
    111 }
    112 
    113 inline
    114 static Addr Addr__min ( Addr a1, Addr a2 ) {
    115    return a1 < a2 ? a1 : a2;
    116 }
    117 
    118 inline
    119 static Addr Addr__max ( Addr a1, Addr a2 ) {
    120    return a1 < a2 ? a2 : a1;
    121 }
    122 
    123 
    124 //////////////////////////////////////////////////////////////
    125 //                                                          //
    126 // StackBlocks Persistent Cache                             //
    127 //                                                          //
    128 //////////////////////////////////////////////////////////////
    129 
    130 /* We maintain a set of XArray* of StackBlocks.  These are never
    131    freed.  When a new StackBlock vector is acquired from
    132    VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
    133    If not present, it is added.  If present, the just-acquired one is
    134    freed and the copy used.
    135 
    136    This simplifies storage management elsewhere.  It allows us to
    137    assume that a pointer to an XArray* of StackBlock is valid forever.
    138    It also means there are no duplicates anywhere, which could be
    139    important from a space point of view for programs that generate a
    140    lot of translations, or where translations are frequently discarded
    141    and re-made.
    142 
    143    Note that we normalise the arrays by sorting the elements according
    144    to an arbitrary total order, so as to avoid the situation that two
    145    vectors describe the same set of variables but are not structurally
    146    identical. */
    147 
    148 static inline Bool StackBlock__sane ( const StackBlock* fb )
    149 {
    150    if (fb->name[ sizeof(fb->name)-1 ] != 0)
    151       return False;
    152    if (fb->spRel != False && fb->spRel != True)
    153       return False;
    154    if (fb->isVec != False && fb->isVec != True)
    155       return False;
    156    return True;
    157 }
    158 
    159 /* Generate an arbitrary total ordering on StackBlocks. */
    160 static Word StackBlock__cmp ( const StackBlock* fb1, const StackBlock* fb2 )
    161 {
    162    Word r;
    163    tl_assert(StackBlock__sane(fb1));
    164    tl_assert(StackBlock__sane(fb2));
    165    /* Hopefully the .base test hits most of the time.  For the blocks
    166       associated with any particular instruction, if the .base values
    167       are the same then probably it doesn't make sense for the other
    168       fields to be different.  But this is supposed to be a completely
    169       general structural total order, so we have to compare everything
    170       anyway. */
    171    if (fb1->base < fb2->base) return -1;
    172    if (fb1->base > fb2->base) return 1;
    173    /* compare sizes */
    174    if (fb1->szB < fb2->szB) return -1;
    175    if (fb1->szB > fb2->szB) return 1;
    176    /* compare sp/fp flag */
    177    if (fb1->spRel < fb2->spRel) return -1;
    178    if (fb1->spRel > fb2->spRel) return 1;
    179    /* compare is/is-not array-typed flag */
    180    if (fb1->isVec < fb2->isVec) return -1;
    181    if (fb1->isVec > fb2->isVec) return 1;
    182    /* compare the name */
    183    r = (Word)VG_(strcmp)(fb1->name, fb2->name);
    184    return r;
    185 }
    186 
    187 /* Returns True if all fields except .szB are the same.  szBs may or
    188    may not be the same; they are simply not consulted. */
    189 static Bool StackBlock__all_fields_except_szB_are_equal (
    190                StackBlock* fb1,
    191                StackBlock* fb2
    192             )
    193 {
    194    tl_assert(StackBlock__sane(fb1));
    195    tl_assert(StackBlock__sane(fb2));
    196    return fb1->base == fb2->base
    197           && fb1->spRel == fb2->spRel
    198           && fb1->isVec == fb2->isVec
    199           && 0 == VG_(strcmp)(fb1->name, fb2->name);
    200 }
    201 
    202 
    203 /* Generate an arbitrary total ordering on vectors of StackBlocks. */
    204 static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s )
    205 {
    206    Word i, r, n1, n2;
    207    n1 = VG_(sizeXA)( fb1s );
    208    n2 = VG_(sizeXA)( fb2s );
    209    if (n1 < n2) return -1;
    210    if (n1 > n2) return 1;
    211    for (i = 0; i < n1; i++) {
    212       StackBlock *fb1, *fb2;
    213       fb1 = VG_(indexXA)( fb1s, i );
    214       fb2 = VG_(indexXA)( fb2s, i );
    215       r = StackBlock__cmp( fb1, fb2 );
    216       if (r != 0) return r;
    217    }
    218    tl_assert(i == n1 && i == n2);
    219    return 0;
    220 }
    221 
    222 static void pp_StackBlocks ( XArray* sbs )
    223 {
    224    Word i, n = VG_(sizeXA)( sbs );
    225    VG_(message)(Vg_DebugMsg, "<<< STACKBLOCKS\n" );
    226    for (i = 0; i < n; i++) {
    227       StackBlock* sb = (StackBlock*)VG_(indexXA)( sbs, i );
    228       VG_(message)(Vg_DebugMsg,
    229          "   StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }\n",
    230          sb->base, sb->szB, sb->spRel ? 'Y' : 'N',
    231          sb->isVec ? 'Y' : 'N', &sb->name[0]
    232       );
    233    }
    234    VG_(message)(Vg_DebugMsg, ">>> STACKBLOCKS\n" );
    235 }
    236 
    237 
    238 /* ---------- The StackBlock vector cache ---------- */
    239 
    240 static WordFM* /* XArray* of StackBlock -> nothing */
    241        frameBlocks_set = NULL;
    242 
    243 static void init_StackBlocks_set ( void )
    244 {
    245    tl_assert(!frameBlocks_set);
    246    frameBlocks_set
    247       = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free,
    248                     (Word(*)(UWord,UWord))StackBlocks__cmp );
    249    tl_assert(frameBlocks_set);
    250 }
    251 
    252 /* Find the given StackBlock-vector in our collection thereof.  If
    253    found, deallocate the supplied one, and return the address of the
    254    copy.  If not found, add the supplied one to our collection and
    255    return its address. */
    256 static XArray* /* of StackBlock */
    257        StackBlocks__find_and_dealloc__or_add
    258           ( XArray* /* of StackBlock */ orig )
    259 {
    260    UWord key, val;
    261 
    262    /* First, normalise, as per comments above. */
    263    VG_(setCmpFnXA)( orig, (XACmpFn_t)StackBlock__cmp );
    264    VG_(sortXA)( orig );
    265 
    266    /* Now get rid of any exact duplicates. */
    267   nuke_dups:
    268    { Word r, w, nEQ, n = VG_(sizeXA)( orig );
    269      if (n >= 2) {
    270         w = 0;
    271         nEQ = 0;
    272         for (r = 0; r < n; r++) {
    273            if (r+1 < n) {
    274               StackBlock* pR0 = VG_(indexXA)( orig, r+0 );
    275               StackBlock* pR1 = VG_(indexXA)( orig, r+1 );
    276               Word c = StackBlock__cmp(pR0,pR1);
    277               tl_assert(c == -1 || c == 0);
    278               if (c == 0) { nEQ++; continue; }
    279            }
    280            if (w != r) {
    281               StackBlock* pW = VG_(indexXA)( orig, w );
    282               StackBlock* pR = VG_(indexXA)( orig, r );
    283               *pW = *pR;
    284            }
    285            w++;
    286         }
    287         tl_assert(r == n);
    288         tl_assert(w + nEQ == n);
    289         if (w < n) {
    290            VG_(dropTailXA)( orig, n-w );
    291         }
    292         if (0) VG_(printf)("delta %ld\n", n-w);
    293      }
    294    }
    295 
    296    /* Deal with the following strangeness, where two otherwise
    297       identical blocks are claimed to have different sizes.  In which
    298       case we use the larger size. */
    299    /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
    300       StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
    301       StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
    302    */
    303    { Word i, n = VG_(sizeXA)( orig );
    304      if (n >= 2) {
    305         for (i = 0; i < n-1; i++) {
    306            StackBlock* sb0 = VG_(indexXA)( orig, i+0 );
    307            StackBlock* sb1 = VG_(indexXA)( orig, i+1 );
    308            if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) {
    309               /* They can't be identical because the previous tidying
    310                  pass would have removed the duplicates.  And they
    311                  can't be > because the earlier sorting pass would
    312                  have ordered otherwise-identical descriptors
    313                  according to < on .szB fields.  Hence: */
    314               tl_assert(sb0->szB < sb1->szB);
    315               sb0->szB = sb1->szB;
    316               /* This makes the blocks identical, at the size of the
    317                  larger one.  Rather than go to all the hassle of
    318                  sliding the rest down, simply go back to the
    319                  remove-duplicates stage.  The assertion guarantees
    320                  that we eventually make progress, since the rm-dups
    321                  stage will get rid of one of the blocks.  This is
    322                  expected to happen only exceedingly rarely. */
    323               tl_assert(StackBlock__cmp(sb0,sb1) == 0);
    324               goto nuke_dups;
    325            }
    326         }
    327      }
    328    }
    329 
    330    /* If there are any blocks which overlap and have the same
    331       fpRel-ness, junk the whole descriptor; it's obviously bogus.
    332       Icc11 certainly generates bogus info from time to time.
    333 
    334       This check is pretty weak; really we ought to have a stronger
    335       sanity check. */
    336    { Word i, n = VG_(sizeXA)( orig );
    337      static Int moans = 3;
    338      for (i = 0; i < n-1; i++) {
    339        StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i );
    340        StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 );
    341        if (sb1->spRel == sb2->spRel
    342            && (sb1->base >= sb2->base
    343                || sb1->base + sb1->szB > sb2->base)) {
    344           if (moans > 0 && !VG_(clo_xml)) {
    345              moans--;
    346              VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
    347                                       "overlapping stack blocks\n");
    348              if (VG_(clo_verbosity) >= 2)
    349                 pp_StackBlocks(orig);
    350              if (moans == 0)
    351                 VG_(message)(Vg_UserMsg, "Further instances of this "
    352                                          "message will not be shown\n" );
    353           }
    354           VG_(dropTailXA)( orig, VG_(sizeXA)( orig ));
    355           break;
    356        }
    357      }
    358    }
    359 
    360    /* Now, do we have it already? */
    361    if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) {
    362       /* yes */
    363       XArray* res;
    364       tl_assert(val == 0);
    365       tl_assert(key != (UWord)orig);
    366       VG_(deleteXA)(orig);
    367       res = (XArray*)key;
    368       return res;
    369    } else {
    370       /* no */
    371       VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 );
    372       return orig;
    373    }
    374 }
    375 
    376 /* Top level function for getting the StackBlock vector for a given
    377    instruction.  It is guaranteed that the returned pointer will be
    378    valid for the entire rest of the run, and also that the addresses
    379    of the individual elements of the array will not change. */
    380 
    381 static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip )
    382 {
    383    XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ );
    384    tl_assert(blocks);
    385    return StackBlocks__find_and_dealloc__or_add( blocks );
    386 }
    387 
    388 
    389 //////////////////////////////////////////////////////////////
    390 //                                                          //
    391 // GlobalBlocks Persistent Cache                            //
    392 //                                                          //
    393 //////////////////////////////////////////////////////////////
    394 
    395 /* Generate an arbitrary total ordering on GlobalBlocks. */
    396 static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 )
    397 {
    398    Word r;
    399    /* compare addrs */
    400    if (gb1->addr < gb2->addr) return -1;
    401    if (gb1->addr > gb2->addr) return 1;
    402    /* compare sizes */
    403    if (gb1->szB < gb2->szB) return -1;
    404    if (gb1->szB > gb2->szB) return 1;
    405    /* compare is/is-not array-typed flag */
    406    if (gb1->isVec < gb2->isVec) return -1;
    407    if (gb1->isVec > gb2->isVec) return 1;
    408    /* compare the name */
    409    r = (Word)VG_(strcmp)(gb1->name, gb2->name);
    410    if (r != 0) return r;
    411    /* compare the soname */
    412    r = (Word)VG_(strcmp)(gb1->soname, gb2->soname);
    413    return r;
    414 }
    415 
    416 static WordFM* /* GlobalBlock* -> nothing */
    417        globalBlock_set = NULL;
    418 
    419 static void init_GlobalBlock_set ( void )
    420 {
    421    tl_assert(!globalBlock_set);
    422     globalBlock_set
    423        = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free,
    424                      (Word(*)(UWord,UWord))GlobalBlock__cmp );
    425    tl_assert(globalBlock_set);
    426 }
    427 
    428 
    429 /* Top level function for making GlobalBlocks persistent.  Call here
    430    with a non-persistent version, and the returned one is guaranteed
    431    to be valid for the entire rest of the run.  The supplied one is
    432    copied, not stored, so can be freed after the call. */
    433 
    434 static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig )
    435 {
    436    UWord key, val;
    437    /* Now, do we have it already? */
    438    if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) {
    439       /* yes, return the copy */
    440       GlobalBlock* res;
    441       tl_assert(val == 0);
    442       res = (GlobalBlock*)key;
    443       tl_assert(res != orig);
    444       return res;
    445    } else {
    446       /* no.  clone it, store the clone and return the clone's
    447          address. */
    448       GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1",
    449                                       sizeof(GlobalBlock) );
    450       tl_assert(clone);
    451       *clone = *orig;
    452       VG_(addToFM)( globalBlock_set, (UWord)clone, 0 );
    453       return clone;
    454    }
    455 }
    456 
    457 
    458 //////////////////////////////////////////////////////////////
    459 //                                                          //
    460 // Interval tree of StackTreeBlock                          //
    461 //                                                          //
    462 //////////////////////////////////////////////////////////////
    463 
    464 /* A node in a stack interval tree.  Zero length intervals (.szB == 0)
    465    are not allowed.
    466 
    467    A stack interval tree is a (WordFM StackTreeNode* void).  There is
    468    one stack interval tree for each thread.
    469 */
    470 typedef
    471    struct {
    472       Addr        addr;
    473       SizeT       szB;   /* copied from .descr->szB */
    474       StackBlock* descr; /* it's an instance of this block */
    475       UWord       depth; /* depth of stack at time block was pushed */
    476    }
    477    StackTreeNode;
    478 
    479 static void pp_StackTree ( WordFM* sitree, const HChar* who )
    480 {
    481    UWord keyW, valW;
    482    VG_(printf)("<<< BEGIN pp_StackTree %s\n", who );
    483    VG_(initIterFM)( sitree );
    484    while (VG_(nextIterFM)( sitree, &keyW, &valW )) {
    485       StackTreeNode* nd = (StackTreeNode*)keyW;
    486       VG_(printf)("  [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB,
    487                   nd->descr, nd->descr->name, nd->descr->szB);
    488    }
    489    VG_(printf)(">>> END   pp_StackTree %s\n", who );
    490 }
    491 
    492 /* Interval comparison function for StackTreeNode */
    493 static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1,
    494                                           StackTreeNode* sn2 )
    495 {
    496    return cmp_nonempty_intervals(sn1->addr, sn1->szB,
    497                                  sn2->addr, sn2->szB);
    498 }
    499 
    500 /* Find the node holding 'a', if any. */
    501 static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a )
    502 {
    503    UWord keyW, valW;
    504    StackTreeNode key;
    505    tl_assert(sitree);
    506    key.addr = a;
    507    key.szB  = 1;
    508    if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) {
    509       StackTreeNode* res = (StackTreeNode*)keyW;
    510       tl_assert(valW == 0);
    511       tl_assert(res != &key);
    512       return res;
    513    } else {
    514       return NULL;
    515    }
    516 }
    517 
    518 /* Note that the supplied XArray of FrameBlock must have been
    519    made persistent already. */
    520 __attribute__((noinline))
    521 static void add_blocks_to_StackTree (
    522                /*MOD*/WordFM* sitree,
    523                XArray* /* FrameBlock */ descrs,
    524                XArray* /* Addr */ bases,
    525                UWord depth
    526             )
    527 {
    528    Bool debug = (Bool)0;
    529    Word i, nDescrs, nBases;
    530 
    531    nDescrs = VG_(sizeXA)( descrs ),
    532    nBases = VG_(sizeXA)( bases );
    533    tl_assert(nDescrs == nBases);
    534 
    535    if (nDescrs == 0) return;
    536 
    537    tl_assert(sitree);
    538    if (debug) {
    539       VG_(printf)("\ndepth = %lu\n", depth);
    540       pp_StackTree( sitree, "add_blocks_to_StackTree-pre" );
    541       pp_StackBlocks(descrs);
    542    }
    543 
    544    for (i = 0; i < nDescrs; i++) {
    545       Bool already_present;
    546       StackTreeNode* nyu;
    547       Addr        addr  = *(Addr*)VG_(indexXA)( bases, i );
    548       StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i );
    549       tl_assert(descr->szB > 0);
    550       nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) );
    551       nyu->addr  = addr;
    552       nyu->szB   = descr->szB;
    553       nyu->descr = descr;
    554       nyu->depth = depth;
    555       if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB);
    556       already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 );
    557       /* The interval can't already be there; else we have
    558          overlapping stack blocks. */
    559       tl_assert(!already_present);
    560       if (debug) {
    561          pp_StackTree( sitree, "add_blocks_to_StackTree-step" );
    562       }
    563    }
    564    if (debug) {
    565       pp_StackTree( sitree, "add_blocks_to_StackTree-post" );
    566       VG_(printf)("\n");
    567    }
    568 }
    569 
    570 static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree,
    571                                         XArray* /* Addr */ bases )
    572 {
    573    UWord oldK, oldV;
    574    Word i, nBases = VG_(sizeXA)( bases );
    575    for (i = 0; i < nBases; i++) {
    576       Bool b;
    577       Addr addr = *(Addr*)VG_(indexXA)( bases, i );
    578       StackTreeNode* nd = find_StackTreeNode(sitree, addr);
    579       /* The interval must be there; we added it earlier when
    580          the associated frame was created. */
    581       tl_assert(nd);
    582       b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd );
    583       /* we just found the block! */
    584       tl_assert(b);
    585       tl_assert(oldV == 0);
    586       tl_assert(nd == (StackTreeNode*)oldK);
    587       sg_free(nd);
    588    }
    589 }
    590 
    591 
    592 static void delete_StackTree__kFin ( UWord keyW ) {
    593    StackTreeNode* nd = (StackTreeNode*)keyW;
    594    tl_assert(nd);
    595    sg_free(nd);
    596 }
    597 static void delete_StackTree__vFin ( UWord valW ) {
    598    tl_assert(valW == 0);
    599 }
    600 static void delete_StackTree ( WordFM* sitree )
    601 {
    602    VG_(deleteFM)( sitree,
    603                  delete_StackTree__kFin, delete_StackTree__vFin );
    604 }
    605 
    606 static WordFM* new_StackTree ( void ) {
    607    return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free,
    608                       (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode );
    609 }
    610 
    611 
    612 //////////////////////////////////////////////////////////////
    613 //                                                          //
    614 // Interval tree of GlobalTreeBlock                         //
    615 //                                                          //
    616 //////////////////////////////////////////////////////////////
    617 
    618 /* A node in a global interval tree.  Zero length intervals
    619    (.szB == 0) are not allowed.
    620 
    621    A global interval tree is a (WordFM GlobalTreeNode* void).  There
    622    is one global interval tree for the entire process.
    623 */
    624 typedef
    625    struct {
    626       Addr         addr; /* copied from .descr->addr */
    627       SizeT        szB; /* copied from .descr->szB */
    628       GlobalBlock* descr; /* it's this block */
    629    }
    630    GlobalTreeNode;
    631 
    632 static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) {
    633    tl_assert(nd->descr);
    634    VG_(printf)("GTNode [%#lx,+%lu) %s",
    635                nd->addr, nd->szB, nd->descr->name);
    636 }
    637 
    638 static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree,
    639                              const HChar* who )
    640 {
    641    UWord keyW, valW;
    642    GlobalTreeNode* nd;
    643    VG_(printf)("<<< GlobalBlockTree (%s)\n", who);
    644    VG_(initIterFM)( gitree );
    645    while (VG_(nextIterFM)( gitree, &keyW, &valW )) {
    646       tl_assert(valW == 0);
    647       nd = (GlobalTreeNode*)keyW;
    648       VG_(printf)("  ");
    649       GlobalTreeNode__pp(nd);
    650       VG_(printf)("\n");
    651    }
    652    VG_(doneIterFM)( gitree );
    653    VG_(printf)(">>>\n");
    654 }
    655 
    656 /* Interval comparison function for GlobalTreeNode */
    657 static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1,
    658                                            GlobalTreeNode* gn2 )
    659 {
    660    return cmp_nonempty_intervals( gn1->addr, gn1->szB,
    661                                   gn2->addr, gn2->szB );
    662 }
    663 
    664 /* Find the node holding 'a', if any. */
    665 static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a )
    666 {
    667    UWord keyW, valW;
    668    GlobalTreeNode key;
    669    key.addr = a;
    670    key.szB  = 1;
    671    if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
    672       GlobalTreeNode* res = (GlobalTreeNode*)keyW;
    673       tl_assert(valW == 0);
    674       tl_assert(res != &key);
    675       return res;
    676    } else {
    677       return NULL;
    678    }
    679 }
    680 
    681 /* Note that the supplied GlobalBlock must have been made persistent
    682    already. */
    683 static void add_block_to_GlobalTree (
    684                /*MOD*/WordFM* gitree,
    685                GlobalBlock* descr
    686             )
    687 {
    688    Bool already_present;
    689    GlobalTreeNode *nyu, *nd;
    690    UWord keyW, valW;
    691    static Int moans = 3;
    692 
    693    tl_assert(descr->szB > 0);
    694    nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) );
    695    nyu->addr  = descr->addr;
    696    nyu->szB   = descr->szB;
    697    nyu->descr = descr;
    698 
    699    /* Basically it's an error to add a global block to the tree that
    700       is already in the tree.  However, detect and ignore attempts to
    701       insert exact duplicates; they do appear for some reason
    702       (possible a bug in m_debuginfo?) */
    703    already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu );
    704    if (already_present) {
    705       tl_assert(valW == 0);
    706       nd = (GlobalTreeNode*)keyW;
    707       tl_assert(nd);
    708       tl_assert(nd != nyu);
    709       tl_assert(nd->descr);
    710       tl_assert(nyu->descr);
    711       if (nd->addr == nyu->addr && nd->szB == nyu->szB
    712           /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
    713           /* Although it seems reasonable to demand that duplicate
    714              blocks have identical names, that is too strict.  For
    715              example, reading debuginfo from glibc produces two
    716              otherwise identical blocks with names "tzname" and
    717              "__tzname".  A constraint of the form "must be identical,
    718              or one must be a substring of the other" would fix that.
    719              However, such trickery is scuppered by the fact that we
    720              truncate all variable names to 15 characters to make
    721              storage management simpler, hence giving pairs like
    722              "__EI___pthread_" (truncated) vs "__pthread_keys".  So
    723              it's simplest just to skip the name comparison
    724              completely. */
    725           && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) {
    726          /* exact duplicate; ignore it */
    727          sg_free(nyu);
    728          return;
    729       }
    730       /* else fall through; the assertion below will catch it */
    731    }
    732 
    733    already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 );
    734    /* The interval can't already be there; else we have
    735       overlapping global blocks. */
    736    /* Unfortunately (25 Jan 09) at least icc11 has been seen to
    737       generate overlapping block descriptions in the Dwarf3; clearly
    738       bogus. */
    739    if (already_present && moans > 0 && !VG_(clo_xml)) {
    740       moans--;
    741       VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
    742                                "overlapping global blocks\n");
    743       if (VG_(clo_verbosity) >= 2) {
    744          GlobalTree__pp( gitree,
    745                          "add_block_to_GlobalTree: non-exact duplicate" );
    746          VG_(printf)("Overlapping block: ");
    747          GlobalTreeNode__pp(nyu);
    748          VG_(printf)("\n");
    749       }
    750       if (moans == 0)
    751          VG_(message)(Vg_UserMsg, "Further instances of this "
    752                                   "message will not be shown\n" );
    753    }
    754    /* tl_assert(!already_present); */
    755 }
    756 
    757 static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree,
    758                                    Addr a, SizeT szB )
    759 {
    760    /* One easy way to do this: look up [a,a+szB) in the tree.  That
    761       will either succeed, producing a block which intersects that
    762       range, in which case we delete it and repeat; or it will fail,
    763       in which case there are no blocks intersecting the range, and we
    764       can bring the process to a halt. */
    765    UWord keyW, valW, oldK, oldV;
    766    GlobalTreeNode key, *nd;
    767    Bool b, anyFound;
    768 
    769    tl_assert(szB > 0);
    770 
    771    anyFound = False;
    772 
    773    key.addr = a;
    774    key.szB  = szB;
    775 
    776    while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
    777       anyFound = True;
    778       nd = (GlobalTreeNode*)keyW;
    779       tl_assert(valW == 0);
    780       tl_assert(nd != &key);
    781       tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0);
    782 
    783       b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key );
    784       tl_assert(b);
    785       tl_assert(oldV == 0);
    786       tl_assert(oldK == keyW); /* check we deleted the node we just found */
    787    }
    788 
    789    return anyFound;
    790 }
    791 
    792 
    793 //////////////////////////////////////////////////////////////
    794 //                                                          //
    795 // Invar                                                    //
    796 //                                                          //
    797 //////////////////////////////////////////////////////////////
    798 
    799 /* An invariant, as resulting from watching the destination of a
    800    memory referencing instruction.  Initially is Inv_Unset until the
    801    instruction makes a first access. */
    802 
    803 typedef
    804    enum {
    805       Inv_Unset=1,  /* not established yet */
    806       Inv_Unknown,  /* unknown location */
    807       Inv_Stack0,   /* array-typed stack block in innermost frame */
    808       Inv_StackN,   /* array-typed stack block in non-innermost frame */
    809       Inv_Global,   /* array-typed global block */
    810    }
    811    InvarTag;
    812 
    813 typedef
    814    struct {
    815       InvarTag tag;
    816       union {
    817          struct {
    818          } Unset;
    819          struct {
    820          } Unknown;
    821          struct {
    822             Addr  addr;
    823             SizeT szB;
    824             StackBlock* descr;
    825          } Stack0; /* innermost stack frame */
    826          struct {
    827             /* Pointer to a node in the interval tree for
    828               this thread. */
    829             StackTreeNode* nd;
    830          } StackN; /* non-innermost stack frame */
    831          struct {
    832            /* Pointer to a GlobalBlock in the interval tree of
    833               global blocks. */
    834            GlobalTreeNode* nd;
    835          } Global;
    836       }
    837       Inv;
    838    }
    839    Invar;
    840 
    841 /* Partial debugging printing for an Invar. */
    842 static void pp_Invar ( Invar* i )
    843 {
    844    switch (i->tag) {
    845       case Inv_Unset:
    846          VG_(printf)("Unset");
    847          break;
    848       case Inv_Unknown:
    849          VG_(printf)("Unknown");
    850          break;
    851       case Inv_Stack0:
    852          VG_(printf)("Stack0 [%#lx,+%lu)",
    853                      i->Inv.Stack0.addr, i->Inv.Stack0.szB);
    854          break;
    855       case Inv_StackN:
    856          VG_(printf)("StackN [%#lx,+%lu)",
    857                      i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB);
    858          break;
    859       case Inv_Global:
    860          VG_(printf)("Global [%#lx,+%lu)",
    861                      i->Inv.Global.nd->addr, i->Inv.Global.nd->szB);
    862          break;
    863       default:
    864          tl_assert(0);
    865    }
    866 }
    867 
    868 /* Compare two Invars for equality. */
    869 static Bool eq_Invar ( Invar* i1, Invar* i2 )
    870 {
    871    if (i1->tag != i2->tag)
    872       return False;
    873    switch (i1->tag) {
    874       case Inv_Unset:
    875          return True;
    876       case Inv_Unknown:
    877          return True;
    878       case Inv_Stack0:
    879          return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr
    880                 && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB;
    881       case Inv_StackN:
    882          return i1->Inv.StackN.nd == i2->Inv.StackN.nd;
    883       case Inv_Global:
    884          return i1->Inv.Global.nd == i2->Inv.Global.nd;
    885       default:
    886          tl_assert(0);
    887    }
    888    /*NOTREACHED*/
    889    tl_assert(0);
    890 }
    891 
    892 /* Generate a piece of text showing 'ea' is relative to 'invar', if
    893    known.  If unknown, generate an empty string.  'buf' must be at
    894    least 32 bytes in size.  Also return the absolute value of the
    895    delta, if known, or zero if not known.
    896 */
    897 static void gen_delta_str ( /*OUT*/HChar* buf,
    898                             /*OUT*/UWord* absDelta,
    899                             Invar* inv, Addr ea )
    900 {
    901    Addr  block = 0;
    902    SizeT szB   = 0;
    903 
    904    buf[0] = 0;
    905    *absDelta = 0;
    906 
    907    switch (inv->tag) {
    908       case Inv_Unknown:
    909       case Inv_Unset:
    910          return; /* unknown */
    911       case Inv_Stack0:
    912          block = inv->Inv.Stack0.addr;
    913          szB   = inv->Inv.Stack0.szB;
    914          break;
    915       case Inv_StackN:
    916          block = inv->Inv.StackN.nd->addr;
    917          szB   = inv->Inv.StackN.nd->szB;
    918          break;
    919       case Inv_Global:
    920          block = inv->Inv.Global.nd->addr;
    921          szB = inv->Inv.Global.nd->szB;
    922          break;
    923       default:
    924          tl_assert(0);
    925    }
    926    tl_assert(szB > 0);
    927    if (ea < block) {
    928       *absDelta = block - ea;
    929       VG_(sprintf)(buf, "%'lu before", *absDelta);
    930    }
    931    else if (ea >= block + szB) {
    932       *absDelta = ea - (block + szB);
    933       VG_(sprintf)(buf, "%'lu after", *absDelta);
    934    }
    935    else {
    936      // Leave *absDelta at zero.
    937      VG_(sprintf)(buf, "%'lu inside", ea - block);
    938    }
    939 }
    940 
    941 
    942 /* Print selected parts of an Invar, suitable for use in error
    943    messages. */
    944 static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth )
    945 {
    946    const HChar* str;
    947    tl_assert(nBuf >= 128);
    948    buf[0] = 0;
    949    switch (inv->tag) {
    950       case Inv_Unknown:
    951          VG_(sprintf)(buf, "%s", "unknown");
    952          break;
    953       case Inv_Stack0:
    954          str = "array";
    955          VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in this frame",
    956                       str, inv->Inv.Stack0.descr->name,
    957                       inv->Inv.Stack0.szB );
    958          break;
    959       case Inv_StackN:
    960          str = "array";
    961          VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in frame %lu back from here",
    962                       str, inv->Inv.StackN.nd->descr->name,
    963                            inv->Inv.StackN.nd->descr->szB,
    964                            depth - inv->Inv.StackN.nd->depth );
    965          break;
    966       case Inv_Global:
    967          str = "array";
    968          VG_(sprintf)(buf, "global %s \"%s\" of size %'lu in object with soname \"%s\"",
    969                       str, inv->Inv.Global.nd->descr->name,
    970                            inv->Inv.Global.nd->descr->szB,
    971                            inv->Inv.Global.nd->descr->soname );
    972          break;
    973       case Inv_Unset:
    974          VG_(sprintf)(buf, "%s", "Unset!");
    975          break;
    976       default:
    977          tl_assert(0);
    978    }
    979 }
    980 
    981 
    982 //////////////////////////////////////////////////////////////
    983 //                                                          //
    984 // our globals                                              //
    985 //                                                          //
    986 //////////////////////////////////////////////////////////////
    987 
    988 //////////////////////////////////////////////////////////////
    989 ///
    990 
    991 #define N_QCACHE 16
    992 
    993 /* Powers of two only, else the result will be chaos */
    994 #define QCACHE_ADVANCE_EVERY 16
    995 
    996 /* Per-thread query cache.  Note that the invar can only be Inv_StackN
    997    (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
    998 typedef
    999    struct {
   1000       Addr  addr;
   1001       SizeT szB;
   1002       Invar inv;
   1003    }
   1004    QCElem;
   1005 
   1006 typedef
   1007    struct {
   1008       Word   nInUse;
   1009       QCElem elems[N_QCACHE];
   1010    }
   1011    QCache;
   1012 
   1013 static void QCache__invalidate ( QCache* qc ) {
   1014    tl_assert(qc->nInUse >= 0);
   1015    qc->nInUse = 0;
   1016 }
   1017 
   1018 static void QCache__pp ( QCache* qc, const HChar* who )
   1019 {
   1020    Word i;
   1021    VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who);
   1022    for (i = 0; i < qc->nInUse; i++) {
   1023       VG_(printf)("  [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB);
   1024       pp_Invar(&qc->elems[i].inv);
   1025       VG_(printf)("\n");
   1026    }
   1027    VG_(printf)(">>>\n");
   1028 }
   1029 
   1030 static ULong stats__qcache_queries = 0;
   1031 static ULong stats__qcache_misses  = 0;
   1032 static ULong stats__qcache_probes  = 0;
   1033 
   1034 ///
   1035 //////////////////////////////////////////////////////////////
   1036 
   1037 /* Each thread has:
   1038    * a shadow stack of StackFrames, which is a double-linked list
   1039    * an stack block interval tree
   1040 */
   1041 static  struct _StackFrame**         shadowStacks;
   1042 
   1043 static  WordFM** /* StackTreeNode */ siTrees;
   1044 
   1045 static  QCache*                      qcaches;
   1046 
   1047 
   1048 /* Additionally, there is one global variable interval tree
   1049    for the entire process.
   1050 */
   1051 static WordFM* /* GlobalTreeNode */ giTree;
   1052 
   1053 
   1054 static void invalidate_all_QCaches ( void )
   1055 {
   1056    Word i;
   1057    for (i = 0; i < VG_N_THREADS; i++) {
   1058       QCache__invalidate( &qcaches[i] );
   1059    }
   1060 }
   1061 
   1062 static void ourGlobals_init ( void )
   1063 {
   1064    Word i;
   1065 
   1066    shadowStacks = sg_malloc( "di.sg_main.oGi.2",
   1067                              VG_N_THREADS * sizeof shadowStacks[0] );
   1068    siTrees = sg_malloc( "di.sg_main.oGi.3", VG_N_THREADS * sizeof siTrees[0] );
   1069    qcaches = sg_malloc( "di.sg_main.oGi.4", VG_N_THREADS * sizeof qcaches[0] );
   1070 
   1071    for (i = 0; i < VG_N_THREADS; i++) {
   1072       shadowStacks[i] = NULL;
   1073       siTrees[i] = NULL;
   1074       qcaches[i] = (QCache){};
   1075    }
   1076    invalidate_all_QCaches();
   1077    giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free,
   1078                         (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode );
   1079 }
   1080 
   1081 
   1082 //////////////////////////////////////////////////////////////
   1083 //                                                          //
   1084 // Handle global variable load/unload events                //
   1085 //                                                          //
   1086 //////////////////////////////////////////////////////////////
   1087 
   1088 static void acquire_globals ( ULong di_handle )
   1089 {
   1090    Word n, i;
   1091    XArray* /* of GlobalBlock */ gbs;
   1092    if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle );
   1093    gbs = VG_(di_get_global_blocks_from_dihandle)
   1094             (di_handle, True/*arrays only*/);
   1095    if (0) VG_(printf)("   GOT %ld globals\n", VG_(sizeXA)( gbs ));
   1096 
   1097    n = VG_(sizeXA)( gbs );
   1098    for (i = 0; i < n; i++) {
   1099       GlobalBlock* gbp;
   1100       GlobalBlock* gb = VG_(indexXA)( gbs, i );
   1101       if (0) VG_(printf)("   new Global size %2lu at %#lx:  %s %s\n",
   1102                          gb->szB, gb->addr, gb->soname, gb->name );
   1103       tl_assert(gb->szB > 0);
   1104       /* Make a persistent copy of each GlobalBlock, and add it
   1105          to the tree. */
   1106       gbp = get_persistent_GlobalBlock( gb );
   1107       add_block_to_GlobalTree( giTree, gbp );
   1108    }
   1109 
   1110    VG_(deleteXA)( gbs );
   1111 }
   1112 
   1113 
   1114 /* We only intercept these two because we need to see any di_handles
   1115    that might arise from the mappings/allocations. */
   1116 void sg_new_mem_mmap( Addr a, SizeT len,
   1117                       Bool rr, Bool ww, Bool xx, ULong di_handle )
   1118 {
   1119    if (di_handle > 0)
   1120       acquire_globals(di_handle);
   1121 }
   1122 void sg_new_mem_startup( Addr a, SizeT len,
   1123                          Bool rr, Bool ww, Bool xx, ULong di_handle )
   1124 {
   1125    if (di_handle > 0)
   1126       acquire_globals(di_handle);
   1127 }
   1128 void sg_die_mem_munmap ( Addr a, SizeT len )
   1129 {
   1130    Bool debug = (Bool)0;
   1131    Bool overlap = False;
   1132 
   1133    if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len );
   1134 
   1135    if (len == 0)
   1136       return;
   1137 
   1138    overlap = del_GlobalTree_range(giTree, a, len);
   1139 
   1140    { /* redundant sanity check */
   1141      UWord keyW, valW;
   1142      VG_(initIterFM)( giTree );
   1143      while (VG_(nextIterFM)( giTree, &keyW, &valW )) {
   1144        GlobalTreeNode* nd = (GlobalTreeNode*)keyW;
   1145         tl_assert(valW == 0);
   1146         tl_assert(nd->szB > 0);
   1147         tl_assert(nd->addr + nd->szB <= a
   1148                   || a + len <= nd->addr);
   1149      }
   1150      VG_(doneIterFM)( giTree );
   1151    }
   1152 
   1153    if (!overlap)
   1154       return;
   1155 
   1156    /* Ok, the range contained some blocks.  Therefore we'll need to
   1157       visit all the Invars in all the thread shadow stacks, and
   1158       convert all Inv_Global entries that intersect [a,a+len) to
   1159       Inv_Unknown. */
   1160    tl_assert(len > 0);
   1161    preen_global_Invars( a, len );
   1162    invalidate_all_QCaches();
   1163 }
   1164 
   1165 
   1166 //////////////////////////////////////////////////////////////
   1167 //                                                          //
   1168 // StackFrame                                               //
   1169 //                                                          //
   1170 //////////////////////////////////////////////////////////////
   1171 
   1172 static ULong stats__total_accesses   = 0;
   1173 static ULong stats__classify_Stack0  = 0;
   1174 static ULong stats__classify_StackN  = 0;
   1175 static ULong stats__classify_Global  = 0;
   1176 static ULong stats__classify_Unknown = 0;
   1177 static ULong stats__Invars_preened   = 0;
   1178 static ULong stats__Invars_changed   = 0;
   1179 static ULong stats__t_i_b_empty      = 0;
   1180 static ULong stats__htab_fast        = 0;
   1181 static ULong stats__htab_searches    = 0;
   1182 static ULong stats__htab_probes      = 0;
   1183 static ULong stats__htab_resizes     = 0;
   1184 
   1185 
   1186 /* A dynamic instance of an instruction */
   1187 typedef
   1188    struct {
   1189       /* IMMUTABLE */
   1190       Addr    insn_addr; /* NB! zero means 'not in use' */
   1191       XArray* blocks; /* XArray* of StackBlock, or NULL if none */
   1192       /* MUTABLE */
   1193       Invar invar;
   1194    }
   1195    IInstance;
   1196 
   1197 
   1198 #define N_HTAB_FIXED 64
   1199 
   1200 typedef
   1201    struct _StackFrame {
   1202       /* The sp when the frame was created, so we know when to get rid
   1203          of it. */
   1204       Addr creation_sp;
   1205       /* The stack frames for a thread are arranged as a doubly linked
   1206          list.  Obviously the outermost frame in the stack has .outer
   1207          as NULL and the innermost in theory has .inner as NULL.
   1208          However, when a function returns, we don't delete the
   1209          just-vacated StackFrame.  Instead, it is retained in the list
   1210          and will be re-used when the next call happens.  This is so
   1211          as to avoid constantly having to dynamically allocate and
   1212          deallocate frames. */
   1213       struct _StackFrame* inner;
   1214       struct _StackFrame* outer;
   1215       Word depth; /* 0 for outermost; increases inwards */
   1216       /* Information for each memory referencing instruction, for this
   1217          instantiation of the function.  The iinstances array is
   1218          operated as a simple linear-probe hash table, which is
   1219          dynamically expanded as necessary.  Once critical thing is
   1220          that an IInstance with a .insn_addr of zero is interpreted to
   1221          mean that hash table slot is unused.  This means we can't
   1222          store an IInstance for address zero. */
   1223       /* Note that htab initially points to htab_fixed.  If htab_fixed
   1224          turns out not to be big enough then htab is made to point to
   1225          dynamically allocated memory.  But it's often the case that
   1226          htab_fixed is big enough, so this optimisation saves a huge
   1227          number of sg_malloc/sg_free call pairs. */
   1228       IInstance* htab;
   1229       UWord      htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */
   1230       UWord      htab_used; /* number of hash table slots currently in use */
   1231       /* If this frame is currently making a call, then the following
   1232          are relevant. */
   1233       Addr sp_at_call;
   1234       Addr fp_at_call;
   1235       XArray* /* of Addr */ blocks_added_by_call;
   1236       /* See comment just above */
   1237       IInstance htab_fixed[N_HTAB_FIXED];
   1238    }
   1239    StackFrame;
   1240 
   1241 
   1242 
   1243 
   1244 
   1245 /* Move this somewhere else? */
   1246 /* Visit all Invars in the entire system.  If 'isHeap' is True, change
   1247    all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown.  If
   1248    'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
   1249    instead. */
   1250 
   1251 __attribute__((noinline))
   1252 static void preen_global_Invar ( Invar* inv, Addr a, SizeT len )
   1253 {
   1254    stats__Invars_preened++;
   1255    tl_assert(len > 0);
   1256    tl_assert(inv);
   1257    switch (inv->tag) {
   1258       case Inv_Global:
   1259          tl_assert(inv->Inv.Global.nd);
   1260          tl_assert(inv->Inv.Global.nd->szB > 0);
   1261          if (0) VG_(printf)("preen_Invar Global %#lx %lu\n",
   1262                             inv->Inv.Global.nd->addr,
   1263                             inv->Inv.Global.nd->szB);
   1264          if (0 == cmp_nonempty_intervals(a, len, inv->Inv.Global.nd->addr,
   1265                                                  inv->Inv.Global.nd->szB)) {
   1266             inv->tag = Inv_Unknown;
   1267             stats__Invars_changed++;
   1268          }
   1269          break;
   1270       case Inv_Stack0:
   1271       case Inv_StackN:
   1272       case Inv_Unknown:
   1273          break;
   1274       case Inv_Unset: /* this should never happen */
   1275          /* fallthrough */
   1276       default:
   1277          tl_assert(0);
   1278    }
   1279 }
   1280 
   1281 __attribute__((noinline))
   1282 static void preen_global_Invars ( Addr a, SizeT len )
   1283 {
   1284    Int         i;
   1285    UWord       u;
   1286    StackFrame* frame;
   1287    tl_assert(len > 0);
   1288    for (i = 0; i < VG_N_THREADS; i++) {
   1289       frame = shadowStacks[i];
   1290       if (!frame)
   1291          continue; /* no frames for this thread */
   1292       /* start from the innermost frame */
   1293       while (frame->inner)
   1294          frame = frame->inner;
   1295       tl_assert(frame->outer);
   1296       /* work through the frames from innermost to outermost.  The
   1297          order isn't important; we just need to ensure we visit each
   1298          frame once (including those which are not actually active,
   1299          more 'inner' than the 'innermost active frame', viz, just
   1300          hanging around waiting to be used, when the current innermost
   1301          active frame makes more calls.  See comments on definition of
   1302          struct _StackFrame. */
   1303       for (; frame; frame = frame->outer) {
   1304          UWord xx = 0; /* sanity check only; count of used htab entries */
   1305          if (!frame->htab)
   1306             continue; /* frame not in use.  See shadowStack_unwind(). */
   1307          for (u = 0; u < frame->htab_size; u++) {
   1308             IInstance* ii = &frame->htab[u];
   1309             if (ii->insn_addr == 0)
   1310                continue; /* not in use */
   1311             if (0) { pp_Invar(&ii->invar); VG_(printf)(" x\n"); }
   1312             preen_global_Invar( &ii->invar, a, len );
   1313             xx++;
   1314          }
   1315          tl_assert(xx == frame->htab_used);
   1316       }
   1317    }
   1318 }
   1319 
   1320 
   1321 /* XXX this should be >> 2 on ppc32/64 since the bottom two bits
   1322    of the ip are guaranteed to be zero */
   1323 inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) {
   1324    return (ip >> 0) & (htab_size - 1);
   1325 }
   1326 
   1327 __attribute__((noinline))
   1328 static void initialise_II_hash_table ( StackFrame* sf )
   1329 {
   1330    UWord i;
   1331    sf->htab_size = N_HTAB_FIXED; /* initial hash table size */
   1332    sf->htab = &sf->htab_fixed[0];
   1333    tl_assert(sf->htab);
   1334    sf->htab_used = 0;
   1335    for (i = 0; i < sf->htab_size; i++)
   1336       sf->htab[i].insn_addr = 0; /* NOT IN USE */
   1337 }
   1338 
   1339 
   1340 __attribute__((noinline))
   1341 static void resize_II_hash_table ( StackFrame* sf )
   1342 {
   1343    UWord     i, j, ix, old_size, new_size;
   1344    IInstance *old_htab, *new_htab, *old;
   1345 
   1346    tl_assert(sf && sf->htab);
   1347    old_size = sf->htab_size;
   1348    new_size = 2 * old_size;
   1349    old_htab = sf->htab;
   1350    new_htab = sg_malloc( "di.sg_main.rIht.1",
   1351                          new_size * sizeof(IInstance) );
   1352    for (i = 0; i < new_size; i++) {
   1353       new_htab[i].insn_addr = 0; /* NOT IN USE */
   1354    }
   1355    for (i = 0; i < old_size; i++) {
   1356       old = &old_htab[i];
   1357       if (old->insn_addr == 0 /* NOT IN USE */)
   1358          continue;
   1359       ix = compute_II_hash(old->insn_addr, new_size);
   1360       /* find out where to put this, in the new table */
   1361       j = new_size;
   1362       while (1) {
   1363          if (new_htab[ix].insn_addr == 0)
   1364             break;
   1365          /* This can't ever happen, because it would mean the new
   1366             table is full; that isn't allowed -- even the old table is
   1367             only allowed to become half full. */
   1368          tl_assert(j > 0);
   1369          j--;
   1370          ix++; if (ix == new_size) ix = 0;
   1371       }
   1372       /* copy the old entry to this location */
   1373       tl_assert(ix < new_size);
   1374       tl_assert(new_htab[ix].insn_addr == 0);
   1375       new_htab[ix] = *old;
   1376       tl_assert(new_htab[ix].insn_addr != 0);
   1377    }
   1378    /* all entries copied; free old table. */
   1379    if (old_htab != &sf->htab_fixed[0])
   1380       sg_free(old_htab);
   1381    sf->htab = new_htab;
   1382    sf->htab_size = new_size;
   1383    /* check sf->htab_used is correct.  Optional and a bit expensive
   1384       but anyway: */
   1385    j = 0;
   1386    for (i = 0; i < new_size; i++) {
   1387       if (new_htab[i].insn_addr != 0) {
   1388          j++;
   1389       }
   1390    }
   1391    tl_assert(j == sf->htab_used);
   1392    if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size);
   1393 }
   1394 
   1395 
   1396 __attribute__((noinline))
   1397 static IInstance* find_or_create_IInstance_SLOW (
   1398                      StackFrame* sf,
   1399                      Addr ip,
   1400                      XArray* /* StackBlock */ ip_frameblocks
   1401                   )
   1402 {
   1403    UWord i, ix;
   1404 
   1405    stats__htab_searches++;
   1406 
   1407    tl_assert(sf);
   1408    tl_assert(sf->htab);
   1409 
   1410    /* Make sure the table loading doesn't get too high. */
   1411    if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) {
   1412       stats__htab_resizes++;
   1413       resize_II_hash_table(sf);
   1414    }
   1415    tl_assert(2 * sf->htab_used <= sf->htab_size);
   1416 
   1417    ix = compute_II_hash(ip, sf->htab_size);
   1418    i = sf->htab_size;
   1419    while (1) {
   1420       stats__htab_probes++;
   1421       /* Note that because of the way the fast-case handler works,
   1422          these two tests are actually redundant in the first iteration
   1423          of this loop.  (Except they aren't redundant if the code just
   1424          above resized the table first. :-) */
   1425       if (sf->htab[ix].insn_addr == ip)
   1426          return &sf->htab[ix];
   1427       if (sf->htab[ix].insn_addr == 0)
   1428          break;
   1429       /* If i ever gets to zero and we have found neither what we're
   1430          looking for nor an empty slot, the table must be full.  Which
   1431          isn't possible -- we monitor the load factor to ensure it
   1432          doesn't get above say 50%; if that ever does happen the table
   1433          is resized. */
   1434       tl_assert(i > 0);
   1435       i--;
   1436       ix++;
   1437       if (ix == sf->htab_size) ix = 0;
   1438    }
   1439 
   1440    /* So now we've found a free slot at ix, and we can use that. */
   1441    tl_assert(sf->htab[ix].insn_addr == 0);
   1442 
   1443    /* Add a new record in this slot. */
   1444    tl_assert(ip != 0); /* CAN'T REPRESENT THIS */
   1445    sf->htab[ix].insn_addr = ip;
   1446    sf->htab[ix].blocks    = ip_frameblocks;
   1447    sf->htab[ix].invar.tag = Inv_Unset;
   1448    sf->htab_used++;
   1449    return &sf->htab[ix];
   1450 }
   1451 
   1452 
   1453 inline
   1454 static IInstance* find_or_create_IInstance (
   1455                      StackFrame* sf,
   1456                      Addr ip,
   1457                      XArray* /* StackBlock */ ip_frameblocks
   1458                   )
   1459 {
   1460    UWord ix = compute_II_hash(ip, sf->htab_size);
   1461    /* Is it in the first slot we come to? */
   1462    if (LIKELY(sf->htab[ix].insn_addr == ip)) {
   1463       stats__htab_fast++;
   1464       return &sf->htab[ix];
   1465    }
   1466    /* If the first slot we come to is empty, bag it. */
   1467    if (LIKELY(sf->htab[ix].insn_addr == 0)) {
   1468       stats__htab_fast++;
   1469       tl_assert(ip != 0);
   1470       sf->htab[ix].insn_addr = ip;
   1471       sf->htab[ix].blocks    = ip_frameblocks;
   1472       sf->htab[ix].invar.tag = Inv_Unset;
   1473       sf->htab_used++;
   1474       return &sf->htab[ix];
   1475    }
   1476    /* Otherwise we hand off to the slow case, which searches other
   1477       slots, and optionally resizes the table if necessary. */
   1478    return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks );
   1479 }
   1480 
   1481 
   1482 __attribute__((noinline))
   1483 static Addr calculate_StackBlock_EA ( StackBlock* descr,
   1484                                       Addr sp, Addr fp ) {
   1485    UWord w1 = (UWord)descr->base;
   1486    UWord w2 = (UWord)(descr->spRel ? sp : fp);
   1487    UWord ea = w1 + w2;
   1488    return ea;
   1489 }
   1490 
   1491 /* Given an array of StackBlocks, return an array of Addrs, holding
   1492    their effective addresses.  Caller deallocates result array. */
   1493 __attribute__((noinline))
   1494 static XArray* /* Addr */ calculate_StackBlock_EAs (
   1495                              XArray* /* StackBlock */ blocks,
   1496                              Addr sp, Addr fp
   1497                           )
   1498 {
   1499    XArray* res;
   1500    Word i, n = VG_(sizeXA)( blocks );
   1501    tl_assert(n > 0);
   1502    res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) );
   1503    for (i = 0; i < n; i++) {
   1504       StackBlock* blk = VG_(indexXA)( blocks, i );
   1505       Addr ea = calculate_StackBlock_EA( blk, sp, fp );
   1506       VG_(addToXA)( res, &ea );
   1507    }
   1508    return res;
   1509 }
   1510 
   1511 
   1512 /* Try to classify the block into which a memory access falls, and
   1513    write the result in 'inv'.  This writes all relevant fields of
   1514    'inv'. */
   1515 __attribute__((noinline))
   1516 static void classify_address ( /*OUT*/Invar* inv,
   1517                                ThreadId tid,
   1518                                Addr ea, Addr sp, Addr fp,
   1519                                UWord szB,
   1520                                XArray* /* of StackBlock */ thisInstrBlocks )
   1521 {
   1522    tl_assert(szB > 0);
   1523    /* First, look in the stack blocks accessible in this instruction's
   1524       frame. */
   1525    {
   1526      Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks );
   1527      if (nBlocks == 0) stats__t_i_b_empty++;
   1528      for (i = 0; i < nBlocks; i++) {
   1529         StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i );
   1530         Addr bea = calculate_StackBlock_EA( descr, sp, fp );
   1531         if (bea <= ea && ea + szB <= bea + descr->szB) {
   1532            /* found it */
   1533            inv->tag = Inv_Stack0;
   1534            inv->Inv.Stack0.addr  = bea;
   1535            inv->Inv.Stack0.szB   = descr->szB;
   1536            inv->Inv.Stack0.descr = descr;
   1537            stats__classify_Stack0++;
   1538            return;
   1539         }
   1540      }
   1541    }
   1542    /* Look in this thread's query cache */
   1543    { Word i;
   1544      QCache* cache = &qcaches[tid];
   1545      static UWord ctr = 0;
   1546      stats__qcache_queries++;
   1547      for (i = 0; i < cache->nInUse; i++) {
   1548         if (0) /* expensive in a loop like this */
   1549                tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0);
   1550         stats__qcache_probes++;
   1551         if (is_subinterval_of(cache->elems[i].addr,
   1552                               cache->elems[i].szB, ea, szB)) {
   1553            if (i > 0
   1554                && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) {
   1555               QCElem tmp;
   1556               tmp = cache->elems[i-1];
   1557               cache->elems[i-1] = cache->elems[i];
   1558               cache->elems[i] = tmp;
   1559               i--;
   1560            }
   1561            *inv = cache->elems[i].inv;
   1562            return;
   1563         }
   1564      }
   1565      stats__qcache_misses++;
   1566    }
   1567    /* Ok, so it's not a block in the top frame.  Perhaps it's a block
   1568       in some calling frame?  Consult this thread's stack-block
   1569       interval tree to find out. */
   1570    { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea );
   1571      /* We know that [ea,ea+1) is in the block, but we need to
   1572         restrict to the case where the whole access falls within
   1573         it. */
   1574      if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
   1575         nd = NULL;
   1576      }
   1577      if (nd) {
   1578         /* found it */
   1579         inv->tag = Inv_StackN;
   1580         inv->Inv.StackN.nd = nd;
   1581         stats__classify_StackN++;
   1582         goto out;
   1583      }
   1584    }
   1585    /* Not in a stack block.  Try the global pool. */
   1586    { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea);
   1587      /* We know that [ea,ea+1) is in the block, but we need to
   1588         restrict to the case where the whole access falls within
   1589         it. */
   1590      if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
   1591         nd = NULL;
   1592      }
   1593      if (nd) {
   1594         /* found it */
   1595         inv->tag = Inv_Global;
   1596         inv->Inv.Global.nd = nd;
   1597         stats__classify_Global++;
   1598         goto out;
   1599      }
   1600    }
   1601    /* No idea - give up. */
   1602    inv->tag = Inv_Unknown;
   1603    stats__classify_Unknown++;
   1604 
   1605    /* Update the cache */
   1606   out:
   1607    { Addr    toadd_addr = 0;
   1608      SizeT   toadd_szB  = 0;
   1609      QCache* cache      = &qcaches[tid];
   1610 
   1611      static UWord ctr = 0;
   1612      Bool show = False;
   1613      if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True;
   1614 
   1615      if (show) QCache__pp(cache, "before upd");
   1616 
   1617      switch (inv->tag) {
   1618         case Inv_Global:
   1619            toadd_addr = inv->Inv.Global.nd->addr;
   1620            toadd_szB  = inv->Inv.Global.nd->szB;
   1621            break;
   1622         case Inv_StackN:
   1623            toadd_addr = inv->Inv.StackN.nd->addr;
   1624            toadd_szB  = inv->Inv.StackN.nd->szB;
   1625            break;
   1626         case Inv_Unknown: {
   1627            /* This is more complex.  We need to figure out the
   1628               intersection of the "holes" in the global and stack
   1629               interval trees into which [ea,ea+szB) falls.  This is
   1630               further complicated by the fact that [ea,ea+szB) might
   1631               not fall cleanly into a hole; it may instead fall across
   1632               the boundary of a stack or global block.  In that case
   1633               we just ignore it and don't update the cache, since we
   1634               have no way to represent this situation precisely. */
   1635            StackTreeNode  sNegInf, sPosInf, sKey, *sLB, *sUB;
   1636            GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB;
   1637            Addr gMin, gMax, sMin, sMax, uMin, uMax;
   1638            Bool sOK, gOK;
   1639            sNegInf.addr = 0;
   1640            sNegInf.szB  = 1;
   1641            sPosInf.addr = ~(UWord)0;
   1642            sPosInf.szB  = 1;
   1643            gNegInf.addr = 0;
   1644            gNegInf.szB  = 1;
   1645            gPosInf.addr = ~(UWord)0;
   1646            gPosInf.szB  = 1;
   1647            sKey.addr = ea;
   1648            sKey.szB  = szB;
   1649            gKey.addr = ea;
   1650            gKey.szB  = szB;
   1651            if (0) VG_(printf)("Tree sizes %lu %lu\n",
   1652                               VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree));
   1653            sOK = VG_(findBoundsFM)( siTrees[tid],
   1654                                     (UWord*)&sLB,    NULL/*unused*/,
   1655                                     (UWord*)&sUB,    NULL/*unused*/,
   1656                                     (UWord)&sNegInf, 0/*unused*/,
   1657                                     (UWord)&sPosInf, 0/*unused*/,
   1658                                     (UWord)&sKey );
   1659            gOK = VG_(findBoundsFM)( giTree,
   1660                                     (UWord*)&gLB,    NULL/*unused*/,
   1661                                     (UWord*)&gUB,    NULL/*unused*/,
   1662                                     (UWord)&gNegInf, 0/*unused*/,
   1663                                     (UWord)&gPosInf, 0/*unused*/,
   1664                                     (UWord)&gKey );
   1665            if (!(sOK && gOK)) {
   1666               /* If this happens, then [ea,ea+szB) partially overlaps
   1667                  a heap or stack block.  We can't represent that, so
   1668                  just forget it (should be very rare).  However, do
   1669                  maximum sanity checks first.  In such a
   1670                  partial overlap case, it can't be the case that both
   1671                  [ea] and [ea+szB-1] overlap the same block, since if
   1672                  that were indeed the case then it wouldn't be a
   1673                  partial overlap; rather it would simply fall inside
   1674                  that block entirely and we shouldn't be inside this
   1675                  conditional at all. */
   1676               if (!sOK) {
   1677                  StackTreeNode *ndFirst, *ndLast;
   1678                  ndFirst = find_StackTreeNode( siTrees[tid], ea );
   1679                  ndLast  = find_StackTreeNode( siTrees[tid], ea+szB-1 );
   1680                  /* if both ends of the range fall inside a block,
   1681                     they can't be in the same block. */
   1682                  if (ndFirst && ndLast)
   1683                     tl_assert(ndFirst != ndLast);
   1684                  /* for each end of the range, if it is in a block,
   1685                     the range as a whole can't be entirely within the
   1686                     block. */
   1687                  if (ndFirst)
   1688                     tl_assert(!is_subinterval_of(ndFirst->addr,
   1689                                                  ndFirst->szB, ea, szB));
   1690                  if (ndLast)
   1691                     tl_assert(!is_subinterval_of(ndLast->addr,
   1692                                                  ndLast->szB, ea, szB));
   1693               }
   1694               if (!gOK) {
   1695                  GlobalTreeNode *ndFirst, *ndLast;
   1696                  ndFirst = find_GlobalTreeNode( giTree, ea );
   1697                  ndLast  = find_GlobalTreeNode( giTree, ea+szB-1 );
   1698                  /* if both ends of the range fall inside a block,
   1699                     they can't be in the same block. */
   1700                  if (ndFirst && ndLast)
   1701                     tl_assert(ndFirst != ndLast);
   1702                  /* for each end of the range, if it is in a block,
   1703                     the range as a whole can't be entirely within the
   1704                     block. */
   1705                  if (ndFirst)
   1706                     tl_assert(!is_subinterval_of(ndFirst->addr,
   1707                                                  ndFirst->szB, ea, szB));
   1708                  if (ndLast)
   1709                     tl_assert(!is_subinterval_of(ndLast->addr,
   1710                                                  ndLast->szB, ea, szB));
   1711               }
   1712               if (0) VG_(printf)("overlapping blocks in cache\n");
   1713               return;
   1714            }
   1715            sMin = sLB == &sNegInf  ? 0         : (sLB->addr + sLB->szB);
   1716            sMax = sUB == &sPosInf  ? ~(UWord)0 : (sUB->addr - 1);
   1717            gMin = gLB == &gNegInf  ? 0         : (gLB->addr + gLB->szB);
   1718            gMax = gUB == &gPosInf  ? ~(UWord)0 : (gUB->addr - 1);
   1719            if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
   1720                               sMin, sMax, gMin, gMax);
   1721            /* [sMin,sMax] and [gMin,gMax] must both contain
   1722               [ea,ea+szB) (right?)  That implies they must overlap at
   1723               at least over [ea,ea+szB). */
   1724            tl_assert(sMin <= ea && ea+szB-1 <= sMax);
   1725            tl_assert(gMin <= ea && ea+szB-1 <= gMax);
   1726            /* So now compute their intersection. */
   1727            uMin = Addr__max( sMin, gMin );
   1728            uMax = Addr__min( sMax, gMax );
   1729            if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax);
   1730            tl_assert(uMin <= uMax);
   1731            tl_assert(uMin <= ea && ea+szB-1 <= uMax);
   1732            /* Finally, we can park [uMin,uMax] in the cache.  However,
   1733               if uMax is ~0, we can't represent the difference; hence
   1734               fudge uMax. */
   1735            if (uMin < uMax && uMax == ~(UWord)0)
   1736               uMax--;
   1737            toadd_addr = uMin;
   1738            toadd_szB  = uMax - uMin + 1;
   1739            break;
   1740         }
   1741         default:
   1742            /* We should only be caching info for the above 3 cases */
   1743           tl_assert(0);
   1744      } /* switch (inv->tag) */
   1745 
   1746      { /* and actually add this to the cache, finally */
   1747        Word i;
   1748        Word ip = cache->nInUse / 2; /* doesn't seem critical */
   1749 
   1750        if (cache->nInUse < N_QCACHE)
   1751           cache->nInUse++;
   1752        for (i = cache->nInUse-1; i > ip; i--) {
   1753           cache->elems[i] = cache->elems[i-1];
   1754        }
   1755 
   1756        tl_assert(toadd_szB > 0);
   1757        cache->elems[ip].addr = toadd_addr;
   1758        cache->elems[ip].szB  = toadd_szB;
   1759        cache->elems[ip].inv  = *inv;
   1760      }
   1761 
   1762      if (show) QCache__pp(cache, "after upd");
   1763 
   1764    }
   1765 }
   1766 
   1767 
   1768 /* CALLED FROM GENERATED CODE */
   1769 static
   1770 VG_REGPARM(3)
   1771 void helperc__mem_access ( /* Known only at run time: */
   1772                            Addr ea, Addr sp, Addr fp,
   1773                            /* Known at translation time: */
   1774                            Word sszB, Addr ip, XArray* ip_frameBlocks )
   1775 {
   1776    UWord szB;
   1777    IInstance* iinstance;
   1778    Invar* inv;
   1779    Invar new_inv;
   1780    ThreadId tid = VG_(get_running_tid)();
   1781    StackFrame* frame;
   1782    HChar bufE[160], bufA[160], bufD[32];
   1783 
   1784    stats__total_accesses++;
   1785 
   1786    tl_assert(is_sane_TId(tid));
   1787    frame = shadowStacks[tid];
   1788    tl_assert(frame);
   1789 
   1790    /* Find the instance info for this instruction. */
   1791    tl_assert(ip_frameBlocks);
   1792    iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks );
   1793    tl_assert(iinstance);
   1794    tl_assert(iinstance->blocks == ip_frameBlocks);
   1795 
   1796    szB = (sszB < 0) ? (-sszB) : sszB;
   1797    tl_assert(szB > 0);
   1798 
   1799    inv = &iinstance->invar;
   1800 
   1801    /* Deal with first uses of instruction instances. */
   1802    if (inv->tag == Inv_Unset) {
   1803       /* This is the first use of this instance of the instruction, so
   1804          we can't make any check; we merely record what we saw, so we
   1805          can compare it against what happens for 2nd and subsequent
   1806          accesses. */
   1807       classify_address( inv,
   1808                         tid, ea, sp, fp, szB, iinstance->blocks );
   1809       tl_assert(inv->tag != Inv_Unset);
   1810       return;
   1811    }
   1812 
   1813    /* So generate an Invar and see if it's different from what
   1814       we had before. */
   1815    classify_address( &new_inv,
   1816                      tid, ea, sp, fp, szB, iinstance->blocks );
   1817    tl_assert(new_inv.tag != Inv_Unset);
   1818 
   1819    /* Did we see something different from before?  If no, then there's
   1820       no error. */
   1821    tl_assert(inv->tag != Inv_Unset);
   1822 
   1823    if (LIKELY(eq_Invar(&new_inv, inv)))
   1824       return;
   1825 
   1826    VG_(memset)(bufE, 0, sizeof(bufE));
   1827    show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth );
   1828 
   1829    VG_(memset)(bufA, 0, sizeof(bufA));
   1830    show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth );
   1831 
   1832    VG_(memset)(bufD, 0, sizeof(bufD));
   1833    UWord absDelta;
   1834    gen_delta_str( bufD, &absDelta, inv, ea );
   1835 
   1836    if (absDelta < 1024)
   1837       sg_record_error_SorG( tid, ea, sszB, bufE, bufA, bufD );
   1838 
   1839    /* And now install the new observation as "standard", so as to
   1840       make future error messages make more sense. */
   1841    *inv = new_inv;
   1842 }
   1843 
   1844 
   1845 ////////////////////////////////////////
   1846 /* Primary push-a-new-frame routine.  Called indirectly from
   1847    generated code. */
   1848 
   1849 static UWord stats__max_sitree_size = 0;
   1850 static UWord stats__max_gitree_size = 0;
   1851 
   1852 static
   1853 void shadowStack_new_frame ( ThreadId tid,
   1854                              Addr     sp_at_call_insn,
   1855                              Addr     sp_post_call_insn,
   1856                              Addr     fp_at_call_insn,
   1857                              Addr     ip_post_call_insn,
   1858                              XArray*  descrs_at_call_insn )
   1859 {
   1860    StackFrame *callee, *caller;
   1861    tl_assert(is_sane_TId(tid));
   1862 
   1863    caller = shadowStacks[tid];
   1864    tl_assert(caller);
   1865 
   1866    if (caller->outer) { /* "this is not the outermost frame" */
   1867       tl_assert(caller->outer->inner == caller);
   1868       tl_assert(caller->outer->depth >= 0);
   1869       tl_assert(1 + caller->outer->depth == caller->depth);
   1870    } else {
   1871       tl_assert(caller->depth == 0);
   1872    }
   1873 
   1874    caller->sp_at_call = sp_at_call_insn;
   1875    caller->fp_at_call = fp_at_call_insn;
   1876 
   1877    if (descrs_at_call_insn) {
   1878       tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 );
   1879       caller->blocks_added_by_call
   1880          = calculate_StackBlock_EAs( descrs_at_call_insn,
   1881                                      sp_at_call_insn, fp_at_call_insn );
   1882       if (caller->blocks_added_by_call)
   1883          add_blocks_to_StackTree( siTrees[tid],
   1884                                   descrs_at_call_insn,
   1885                                   caller->blocks_added_by_call,
   1886                                   caller->depth /* stack depth at which
   1887                                                    these blocks are
   1888                                                    considered to exist*/ );
   1889       if (1) {
   1890          UWord s  = VG_(sizeFM)( siTrees[tid] );
   1891          UWord g  = VG_(sizeFM)( giTree );
   1892          Bool  sb = s > stats__max_sitree_size;
   1893          Bool  gb = g > stats__max_gitree_size;
   1894          if (sb) stats__max_sitree_size = s;
   1895          if (gb) stats__max_gitree_size = g;
   1896          if (0 && (sb || gb))
   1897             VG_(message)(Vg_DebugMsg,
   1898                          "exp-sgcheck: new max tree sizes: "
   1899                          "StackTree %lu, GlobalTree %lu\n",
   1900                          stats__max_sitree_size, stats__max_gitree_size );
   1901       }
   1902    } else {
   1903       caller->blocks_added_by_call = NULL;
   1904    }
   1905 
   1906    /* caller->blocks_added_by_call is used again (and then freed) when
   1907       this frame is removed from the stack. */
   1908 
   1909    if (caller->inner) {
   1910       callee = caller->inner;
   1911    } else {
   1912       callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame));
   1913       VG_(memset)(callee, 0, sizeof(StackFrame));
   1914       callee->outer = caller;
   1915       caller->inner = callee;
   1916       callee->depth = 1 + caller->depth;
   1917       tl_assert(callee->inner == NULL);
   1918    }
   1919 
   1920    /* This sets up .htab, .htab_size and .htab_used */
   1921    initialise_II_hash_table( callee );
   1922 
   1923    callee->creation_sp    = sp_post_call_insn;
   1924    callee->sp_at_call     = 0; // not actually required ..
   1925    callee->fp_at_call     = 0; // .. these 3 initialisations are ..
   1926    callee->blocks_added_by_call = NULL; // .. just for cleanness
   1927 
   1928    /* record the new running stack frame */
   1929    shadowStacks[tid] = callee;
   1930 
   1931    /* and this thread's query cache is now invalid */
   1932    QCache__invalidate( &qcaches[tid] );
   1933 
   1934    if (0)
   1935    { Word d = callee->depth;
   1936      const HChar *fnname;
   1937      Bool ok;
   1938      Addr ip = ip_post_call_insn;
   1939      ok = VG_(get_fnname_w_offset)( ip, &fnname );
   1940      while (d > 0) {
   1941         VG_(printf)(" ");
   1942         d--;
   1943      }
   1944      VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip);
   1945    }
   1946 }
   1947 
   1948 /* CALLED FROM GENERATED CODE */
   1949 static
   1950 VG_REGPARM(3)
   1951 void helperc__new_frame ( Addr sp_post_call_insn,
   1952                           Addr fp_at_call_insn,
   1953                           Addr ip_post_call_insn,
   1954                           XArray* blocks_at_call_insn,
   1955                           Word sp_adjust )
   1956 {
   1957    ThreadId tid = VG_(get_running_tid)();
   1958    Addr     sp_at_call_insn = sp_post_call_insn + sp_adjust;
   1959    shadowStack_new_frame( tid,
   1960                           sp_at_call_insn,
   1961                           sp_post_call_insn,
   1962                           fp_at_call_insn,
   1963                           ip_post_call_insn,
   1964                           blocks_at_call_insn );
   1965 }
   1966 
   1967 
   1968 ////////////////////////////////////////
   1969 /* Primary remove-frame(s) routine.  Called indirectly from
   1970    generated code. */
   1971 
   1972 __attribute__((noinline))
   1973 static void shadowStack_unwind ( ThreadId tid, Addr sp_now )
   1974 {
   1975    StackFrame *innermost, *innermostOrig;
   1976    tl_assert(is_sane_TId(tid));
   1977    innermost = shadowStacks[tid];
   1978    tl_assert(innermost);
   1979    innermostOrig = innermost;
   1980    //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
   1981    while (1) {
   1982       if (!innermost->outer)
   1983          break;
   1984       if (innermost->inner)
   1985          tl_assert(innermost->inner->outer == innermost);
   1986       tl_assert(innermost->outer->inner == innermost);
   1987       tl_assert(innermost->blocks_added_by_call == NULL);
   1988       if (sp_now <= innermost->creation_sp) break;
   1989       //VG_(printf)("UNWIND     dump %p\n", innermost->creation_sp);
   1990       tl_assert(innermost->htab);
   1991       if (innermost->htab != &innermost->htab_fixed[0])
   1992          sg_free(innermost->htab);
   1993       /* be on the safe side */
   1994       innermost->creation_sp = 0;
   1995       innermost->htab = NULL;
   1996       innermost->htab_size = 0;
   1997       innermost->htab_used = 0;
   1998       innermost->sp_at_call = 0;
   1999       innermost->fp_at_call = 0;
   2000       innermost->blocks_added_by_call = NULL;
   2001       innermost = innermost->outer;
   2002 
   2003       /* So now we're "back" in the calling frame.  Remove from this
   2004          thread's stack-interval-tree, the blocks added at the time of
   2005          the call. */
   2006 
   2007       if (innermost->outer) { /* not at the outermost frame */
   2008          if (innermost->blocks_added_by_call == NULL) {
   2009          } else {
   2010             del_blocks_from_StackTree( siTrees[tid],
   2011                                        innermost->blocks_added_by_call );
   2012             VG_(deleteXA)( innermost->blocks_added_by_call );
   2013             innermost->blocks_added_by_call = NULL;
   2014          }
   2015       }
   2016       /* That completes the required tidying of the interval tree
   2017          associated with the frame we just removed. */
   2018 
   2019       if (0) {
   2020          Word d = innermost->depth;
   2021          while (d > 0) {
   2022             VG_(printf)(" ");
   2023             d--;
   2024          }
   2025          VG_(printf)("X\n");
   2026       }
   2027 
   2028    }
   2029 
   2030    tl_assert(innermost);
   2031 
   2032    if (innermost != innermostOrig) {
   2033       shadowStacks[tid] = innermost;
   2034       /* this thread's query cache is now invalid */
   2035       QCache__invalidate( &qcaches[tid] );
   2036    }
   2037 }
   2038 
   2039 
   2040 
   2041 //////////////////////////////////////////////////////////////
   2042 //                                                          //
   2043 // Instrumentation                                          //
   2044 //                                                          //
   2045 //////////////////////////////////////////////////////////////
   2046 
   2047 /* What does instrumentation need to do?
   2048 
   2049    - at each Call transfer, generate a call to shadowStack_new_frame
   2050      do this by manually inspecting the IR
   2051 
   2052    - at each sp change, if the sp change is negative,
   2053      call shadowStack_unwind
   2054      do this by asking for SP-change analysis
   2055 
   2056    - for each memory referencing instruction,
   2057      call helperc__mem_access
   2058 */
   2059 
   2060 /* A complication: sg_ instrumentation and h_ instrumentation need to
   2061    be interleaved.  Since the latter is a lot more complex than the
   2062    former, we split the sg_ instrumentation here into four functions
   2063    and let the h_ instrumenter call the four functions as it goes.
   2064    Hence the h_ instrumenter drives the sg_ instrumenter.
   2065 
   2066    To make this viable, the sg_ instrumenter carries what running
   2067    state it needs in 'struct _SGEnv'.  This is exported only
   2068    abstractly from this file.
   2069 */
   2070 
   2071 struct _SGEnv {
   2072    /* the current insn's IP */
   2073    Addr curr_IP;
   2074    /* whether the above is actually known */
   2075    Bool curr_IP_known;
   2076    /* if we find a mem ref, is it the first for this insn?  Used for
   2077       detecting insns which make more than one memory ref, a situation
   2078       we basically can't really handle properly; and so we ignore all
   2079       but the first ref. */
   2080    Bool firstRef;
   2081    /* READONLY */
   2082    IRTemp (*newIRTemp_cb)(IRType,void*);
   2083    void* newIRTemp_opaque;
   2084 };
   2085 
   2086 
   2087 /* --- Helper fns for instrumentation --- */
   2088 
   2089 static IRTemp gen_Get_SP ( struct _SGEnv*  sge,
   2090                            IRSB*           bbOut,
   2091                            const VexGuestLayout* layout,
   2092                            Int             hWordTy_szB )
   2093 {
   2094    IRExpr* sp_expr;
   2095    IRTemp  sp_temp;
   2096    IRType  sp_type;
   2097    /* This in effect forces the host and guest word sizes to be the
   2098       same. */
   2099    tl_assert(hWordTy_szB == layout->sizeof_SP);
   2100    sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
   2101    sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
   2102    sp_temp = sge->newIRTemp_cb( sp_type, sge->newIRTemp_opaque );
   2103    addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
   2104    return sp_temp;
   2105 }
   2106 
   2107 static IRTemp gen_Get_FP ( struct _SGEnv*  sge,
   2108                            IRSB*           bbOut,
   2109                            const VexGuestLayout* layout,
   2110                            Int             hWordTy_szB )
   2111 {
   2112    IRExpr* fp_expr;
   2113    IRTemp  fp_temp;
   2114    IRType  fp_type;
   2115    /* This in effect forces the host and guest word sizes to be the
   2116       same. */
   2117    tl_assert(hWordTy_szB == layout->sizeof_SP);
   2118    fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32;
   2119    fp_expr = IRExpr_Get( layout->offset_FP, fp_type );
   2120    fp_temp = sge->newIRTemp_cb( fp_type, sge->newIRTemp_opaque );
   2121    addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) );
   2122    return fp_temp;
   2123 }
   2124 
   2125 static void instrument_mem_access ( struct _SGEnv* sge,
   2126                                     IRSB*   bbOut,
   2127                                     IRExpr* addr,
   2128                                     Int     szB,
   2129                                     Bool    isStore,
   2130                                     Int     hWordTy_szB,
   2131                                     Addr    curr_IP,
   2132                                     const VexGuestLayout* layout )
   2133 {
   2134    IRType  tyAddr      = Ity_INVALID;
   2135    XArray* frameBlocks = NULL;
   2136 
   2137    tl_assert(isIRAtom(addr));
   2138    tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
   2139 
   2140    tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
   2141    tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
   2142 
   2143 #if defined(VGA_x86)
   2144    { UChar* p = (UChar*)curr_IP;
   2145      // pop %ebp; RET
   2146      if (p[0] == 0xc3 && p[-1] == 0x5d) return;
   2147      // pop %ebp; RET $imm16
   2148      if (p[0] == 0xc2 && p[-1] == 0x5d) return;
   2149      // PUSH %EBP; mov %esp,%ebp
   2150      if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return;
   2151    }
   2152 #endif
   2153 
   2154    /* First off, find or create the StackBlocks for this instruction. */
   2155    frameBlocks = get_StackBlocks_for_IP( curr_IP );
   2156    tl_assert(frameBlocks);
   2157    //if (VG_(sizeXA)( frameBlocks ) == 0)
   2158    //   frameBlocks = NULL;
   2159 
   2160    /* Generate a call to "helperc__mem_access", passing:
   2161          addr current_SP current_FP szB curr_IP frameBlocks
   2162    */
   2163    { IRTemp t_SP = gen_Get_SP( sge, bbOut, layout, hWordTy_szB );
   2164      IRTemp t_FP = gen_Get_FP( sge, bbOut, layout, hWordTy_szB );
   2165      IRExpr** args
   2166         = mkIRExprVec_6( addr,
   2167                          IRExpr_RdTmp(t_SP),
   2168                          IRExpr_RdTmp(t_FP),
   2169                          mkIRExpr_HWord( isStore ? (-szB) : szB ),
   2170                          mkIRExpr_HWord( curr_IP ),
   2171                          mkIRExpr_HWord( (HWord)frameBlocks ) );
   2172      IRDirty* di
   2173         = unsafeIRDirty_0_N( 3/*regparms*/,
   2174                              "helperc__mem_access",
   2175                             VG_(fnptr_to_fnentry)( &helperc__mem_access ),
   2176                              args );
   2177 
   2178      addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
   2179    }
   2180 }
   2181 
   2182 
   2183 /* --- Instrumentation main (4 fns) --- */
   2184 
   2185 struct _SGEnv * sg_instrument_init ( IRTemp (*newIRTemp_cb)(IRType,void*),
   2186                                      void* newIRTemp_opaque )
   2187 {
   2188    struct _SGEnv * env = sg_malloc("di.sg_main.sii.1",
   2189                                    sizeof(struct _SGEnv));
   2190    tl_assert(env);
   2191    env->curr_IP          = 0;
   2192    env->curr_IP_known    = False;
   2193    env->firstRef         = True;
   2194    env->newIRTemp_cb     = newIRTemp_cb;
   2195    env->newIRTemp_opaque = newIRTemp_opaque;
   2196    return env;
   2197 }
   2198 
   2199 void sg_instrument_fini ( struct _SGEnv * env )
   2200 {
   2201    sg_free(env);
   2202 }
   2203 
   2204 /* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
   2205    as required.  This must be called before 'st' itself is added to
   2206    'sbOut'. */
   2207 void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env,
   2208                             /*MOD*/IRSB* sbOut,
   2209                             IRStmt* st,
   2210                             const VexGuestLayout* layout,
   2211                             IRType gWordTy, IRType hWordTy )
   2212 {
   2213    if (!sg_clo_enable_sg_checks)
   2214       return;
   2215 
   2216    tl_assert(st);
   2217    tl_assert(isFlatIRStmt(st));
   2218    switch (st->tag) {
   2219       case Ist_NoOp:
   2220       case Ist_AbiHint:
   2221       case Ist_Put:
   2222       case Ist_PutI:
   2223       case Ist_MBE:
   2224          /* None of these can contain any memory references. */
   2225          break;
   2226 
   2227       case Ist_Exit:
   2228          tl_assert(st->Ist.Exit.jk != Ijk_Call);
   2229          /* else we must deal with a conditional call */
   2230          break;
   2231 
   2232       case Ist_IMark:
   2233          env->curr_IP_known = True;
   2234          env->curr_IP       = st->Ist.IMark.addr;
   2235          env->firstRef      = True;
   2236          break;
   2237 
   2238       case Ist_Store:
   2239          tl_assert(env->curr_IP_known);
   2240          if (env->firstRef) {
   2241             instrument_mem_access(
   2242                env, sbOut,
   2243                st->Ist.Store.addr,
   2244                sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)),
   2245                True/*isStore*/,
   2246                sizeofIRType(hWordTy),
   2247                env->curr_IP, layout
   2248             );
   2249             env->firstRef = False;
   2250          }
   2251          break;
   2252 
   2253       case Ist_WrTmp: {
   2254          IRExpr* data = st->Ist.WrTmp.data;
   2255          if (data->tag == Iex_Load) {
   2256             tl_assert(env->curr_IP_known);
   2257             if (env->firstRef) {
   2258                instrument_mem_access(
   2259                   env, sbOut,
   2260                   data->Iex.Load.addr,
   2261                   sizeofIRType(data->Iex.Load.ty),
   2262                   False/*!isStore*/,
   2263                   sizeofIRType(hWordTy),
   2264                   env->curr_IP, layout
   2265                );
   2266                env->firstRef = False;
   2267             }
   2268          }
   2269          break;
   2270       }
   2271 
   2272       case Ist_Dirty: {
   2273          Int      dataSize;
   2274          IRDirty* d = st->Ist.Dirty.details;
   2275          if (d->mFx != Ifx_None) {
   2276             /* This dirty helper accesses memory.  Collect the
   2277                details. */
   2278             tl_assert(env->curr_IP_known);
   2279             if (env->firstRef) {
   2280                tl_assert(d->mAddr != NULL);
   2281                tl_assert(d->mSize != 0);
   2282                dataSize = d->mSize;
   2283                if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
   2284                   instrument_mem_access(
   2285                      env, sbOut, d->mAddr, dataSize, False/*!isStore*/,
   2286                      sizeofIRType(hWordTy), env->curr_IP, layout
   2287                   );
   2288                }
   2289                if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
   2290                   instrument_mem_access(
   2291                      env, sbOut, d->mAddr, dataSize, True/*isStore*/,
   2292                      sizeofIRType(hWordTy), env->curr_IP, layout
   2293                   );
   2294                }
   2295                env->firstRef = False;
   2296             }
   2297          } else {
   2298             tl_assert(d->mAddr == NULL);
   2299             tl_assert(d->mSize == 0);
   2300          }
   2301          break;
   2302       }
   2303 
   2304       case Ist_CAS: {
   2305          /* We treat it as a read and a write of the location.  I
   2306             think that is the same behaviour as it was before IRCAS
   2307             was introduced, since prior to that point, the Vex front
   2308             ends would translate a lock-prefixed instruction into a
   2309             (normal) read followed by a (normal) write. */
   2310          if (env->firstRef) {
   2311             Int    dataSize;
   2312             IRCAS* cas = st->Ist.CAS.details;
   2313             tl_assert(cas->addr != NULL);
   2314             tl_assert(cas->dataLo != NULL);
   2315             dataSize = sizeofIRType(typeOfIRExpr(sbOut->tyenv, cas->dataLo));
   2316             if (cas->dataHi != NULL)
   2317                dataSize *= 2; /* since it's a doubleword-CAS */
   2318             instrument_mem_access(
   2319                env, sbOut, cas->addr, dataSize, False/*!isStore*/,
   2320                sizeofIRType(hWordTy), env->curr_IP, layout
   2321             );
   2322             instrument_mem_access(
   2323                env, sbOut, cas->addr, dataSize, True/*isStore*/,
   2324                sizeofIRType(hWordTy), env->curr_IP, layout
   2325             );
   2326             env->firstRef = False;
   2327          }
   2328          break;
   2329       }
   2330 
   2331       default:
   2332          tl_assert(0);
   2333 
   2334    } /* switch (st->tag) */
   2335 }
   2336 
   2337 
   2338 /* Add instrumentation for the final jump of an IRSB 'sbOut', and
   2339    possibly modify 'env' as required.  This must be the last
   2340    instrumentation statement in the block. */
   2341 void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env,
   2342                                 /*MOD*/IRSB* sbOut,
   2343                                 IRExpr* next,
   2344                                 IRJumpKind jumpkind,
   2345                                 const VexGuestLayout* layout,
   2346                                 IRType gWordTy, IRType hWordTy )
   2347 {
   2348    if (!sg_clo_enable_sg_checks)
   2349       return;
   2350 
   2351    if (jumpkind == Ijk_Call) {
   2352       // Assumes x86 or amd64
   2353       IRTemp   sp_post_call_insn, fp_post_call_insn;
   2354       XArray*  frameBlocks;
   2355       IRExpr** args;
   2356       IRDirty* di;
   2357       sp_post_call_insn
   2358          = gen_Get_SP( env, sbOut, layout, sizeofIRType(hWordTy) );
   2359       fp_post_call_insn
   2360          = gen_Get_FP( env, sbOut, layout, sizeofIRType(hWordTy) );
   2361       tl_assert(env->curr_IP_known);
   2362       frameBlocks = get_StackBlocks_for_IP( env->curr_IP );
   2363       tl_assert(frameBlocks);
   2364       if (VG_(sizeXA)(frameBlocks) == 0)
   2365          frameBlocks = NULL;
   2366       args
   2367          = mkIRExprVec_5(
   2368               IRExpr_RdTmp(sp_post_call_insn),
   2369               IRExpr_RdTmp(fp_post_call_insn),
   2370                          /* assume the call doesn't change FP */
   2371               next,
   2372               mkIRExpr_HWord( (HWord)frameBlocks ),
   2373               mkIRExpr_HWord( sizeofIRType(gWordTy) )
   2374            );
   2375       di = unsafeIRDirty_0_N(
   2376               3/*regparms*/,
   2377               "helperc__new_frame",
   2378               VG_(fnptr_to_fnentry)( &helperc__new_frame ),
   2379               args );
   2380       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
   2381    }
   2382 }
   2383 
   2384 
   2385 //////////////////////////////////////////////////////////////
   2386 //                                                          //
   2387 // end Instrumentation                                      //
   2388 //                                                          //
   2389 //////////////////////////////////////////////////////////////
   2390 
   2391 
   2392 //////////////////////////////////////////////////////////////
   2393 //                                                          //
   2394 // misc                                                     //
   2395 //                                                          //
   2396 //////////////////////////////////////////////////////////////
   2397 
   2398 /* Make a new empty stack frame that is suitable for being the
   2399    outermost frame in a stack.  It has a creation_sp of effectively
   2400    infinity, so it can never be removed. */
   2401 static StackFrame* new_root_StackFrame ( void )
   2402 {
   2403    StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame));
   2404    VG_(memset)( sframe, 0, sizeof(*sframe) );
   2405    sframe->creation_sp = ~0UL;
   2406 
   2407    /* This sets up .htab, .htab_size and .htab_used */
   2408    initialise_II_hash_table( sframe );
   2409 
   2410    /* ->depth, ->outer, ->inner are 0, NULL, NULL */
   2411 
   2412    return sframe;
   2413 }
   2414 
   2415 /* Primary routine for setting up the shadow stack for a new thread.
   2416    Note that this is used to create not only child thread stacks, but
   2417    the root thread's stack too.  We create a new stack with
   2418    .creation_sp set to infinity, so that the outermost frame can never
   2419    be removed (by shadowStack_unwind).  The core calls this function
   2420    as soon as a thread is created.  We cannot yet get its SP value,
   2421    since that may not yet be set. */
   2422 static void shadowStack_thread_create ( ThreadId parent, ThreadId child )
   2423 {
   2424    tl_assert(is_sane_TId(child));
   2425    if (parent == VG_INVALID_THREADID) {
   2426       /* creating the main thread's stack */
   2427    } else {
   2428       tl_assert(is_sane_TId(parent));
   2429       tl_assert(parent != child);
   2430       tl_assert(shadowStacks[parent] != NULL);
   2431       tl_assert(siTrees[parent] != NULL);
   2432    }
   2433 
   2434    /* Create the child's stack.  Bear in mind we may be re-using
   2435       it. */
   2436    if (shadowStacks[child] == NULL) {
   2437       /* First use of this stack.  Just allocate an initial frame. */
   2438       tl_assert(siTrees[child] == NULL);
   2439    } else {
   2440       StackFrame *frame, *frame2;
   2441       /* re-using a stack. */
   2442       /* get rid of the interval tree */
   2443       tl_assert(siTrees[child] != NULL);
   2444       delete_StackTree( siTrees[child] );
   2445       siTrees[child] = NULL;
   2446       /* Throw away all existing frames. */
   2447       frame = shadowStacks[child];
   2448       while (frame->outer)
   2449          frame = frame->outer;
   2450       tl_assert(frame->depth == 0);
   2451       while (frame) {
   2452          frame2 = frame->inner;
   2453          if (frame2) tl_assert(1 + frame->depth == frame2->depth);
   2454          sg_free(frame);
   2455          frame = frame2;
   2456       }
   2457       shadowStacks[child] = NULL;
   2458    }
   2459 
   2460    tl_assert(shadowStacks[child] == NULL);
   2461    tl_assert(siTrees[child] == NULL);
   2462 
   2463    /* Set up the initial stack frame. */
   2464    shadowStacks[child] = new_root_StackFrame();
   2465 
   2466    /* and set up the child's stack block interval tree. */
   2467    siTrees[child] = new_StackTree();
   2468 }
   2469 
   2470 /* Once a thread is ready to go, the core calls here.  We take the
   2471    opportunity to push a second frame on its stack, with the
   2472    presumably valid SP value that is going to be used for the thread's
   2473    startup.  Hence we should always wind up with a valid outermost
   2474    frame for the thread. */
   2475 static void shadowStack_set_initial_SP ( ThreadId tid )
   2476 {
   2477    StackFrame* sf;
   2478    tl_assert(is_sane_TId(tid));
   2479    sf = shadowStacks[tid];
   2480    tl_assert(sf != NULL);
   2481    tl_assert(sf->outer == NULL);
   2482    tl_assert(sf->inner == NULL);
   2483    tl_assert(sf->creation_sp == ~0UL);
   2484    shadowStack_new_frame( tid, 0, VG_(get_SP)(tid),
   2485                                0, VG_(get_IP)(tid), NULL );
   2486 }
   2487 
   2488 
   2489 //////////////////////////////////////////////////////////////
   2490 //                                                          //
   2491 // main-ish                                                 //
   2492 //                                                          //
   2493 //////////////////////////////////////////////////////////////
   2494 
   2495 /* CALLED indirectly FROM GENERATED CODE.  Calls here are created by
   2496    sp-change analysis, as requested in pc_pre_clo_int(). */
   2497 void sg_die_mem_stack ( Addr old_SP, SizeT len ) {
   2498    ThreadId  tid = VG_(get_running_tid)();
   2499    shadowStack_unwind( tid, old_SP+len );
   2500 }
   2501 
   2502 void sg_pre_clo_init ( void ) {
   2503    ourGlobals_init();
   2504    init_StackBlocks_set();
   2505    init_GlobalBlock_set();
   2506 }
   2507 
   2508 void sg_post_clo_init ( void ) {
   2509 }
   2510 
   2511 void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
   2512    shadowStack_thread_create(parent, child);
   2513 }
   2514 
   2515 void sg_pre_thread_first_insn ( ThreadId tid ) {
   2516    shadowStack_set_initial_SP(tid);
   2517 }
   2518 
   2519 void sg_fini(Int exitcode)
   2520 {
   2521    if (VG_(clo_stats)) {
   2522       VG_(message)(Vg_DebugMsg,
   2523          " sg_:  %'llu total accesses, of which:\n", stats__total_accesses);
   2524       VG_(message)(Vg_DebugMsg,
   2525          " sg_:     stack0: %'12llu classify\n",
   2526          stats__classify_Stack0);
   2527       VG_(message)(Vg_DebugMsg,
   2528          " sg_:     stackN: %'12llu classify\n",
   2529          stats__classify_StackN);
   2530       VG_(message)(Vg_DebugMsg,
   2531          " sg_:     global: %'12llu classify\n",
   2532          stats__classify_Global);
   2533       VG_(message)(Vg_DebugMsg,
   2534          " sg_:    unknown: %'12llu classify\n",
   2535          stats__classify_Unknown);
   2536       VG_(message)(Vg_DebugMsg,
   2537          " sg_:  %'llu Invars preened, of which %'llu changed\n",
   2538          stats__Invars_preened, stats__Invars_changed);
   2539       VG_(message)(Vg_DebugMsg,
   2540          " sg_:   t_i_b_MT: %'12llu\n", stats__t_i_b_empty);
   2541       VG_(message)(Vg_DebugMsg,
   2542          " sg_:     qcache: %'llu searches, %'llu probes, %'llu misses\n",
   2543          stats__qcache_queries, stats__qcache_probes, stats__qcache_misses);
   2544       VG_(message)(Vg_DebugMsg,
   2545          " sg_:  htab-fast: %'llu hits\n",
   2546          stats__htab_fast);
   2547       VG_(message)(Vg_DebugMsg,
   2548          " sg_:  htab-slow: %'llu searches, %'llu probes, %'llu resizes\n",
   2549          stats__htab_searches, stats__htab_probes, stats__htab_resizes);
   2550    }
   2551 }
   2552 
   2553 
   2554 /*--------------------------------------------------------------------*/
   2555 /*--- end                                                sg_main.c ---*/
   2556 /*--------------------------------------------------------------------*/
   2557