Home | History | Annotate | Download | only in exp-ptrcheck
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Ptrcheck: a pointer-use checker.                             ---*/
      4 /*--- This file checks stack and global array accesses.            ---*/
      5 /*---                                                    sg_main.c ---*/
      6 /*--------------------------------------------------------------------*/
      7 
      8 /*
      9    This file is part of Ptrcheck, a Valgrind tool for checking pointer
     10    use in programs.
     11 
     12    Copyright (C) 2008-2010 OpenWorks Ltd
     13       info (at) open-works.co.uk
     14 
     15    This program is free software; you can redistribute it and/or
     16    modify it under the terms of the GNU General Public License as
     17    published by the Free Software Foundation; either version 2 of the
     18    License, or (at your option) any later version.
     19 
     20    This program is distributed in the hope that it will be useful, but
     21    WITHOUT ANY WARRANTY; without even the implied warranty of
     22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23    General Public License for more details.
     24 
     25    You should have received a copy of the GNU General Public License
     26    along with this program; if not, write to the Free Software
     27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     28    02111-1307, USA.
     29 
     30    The GNU General Public License is contained in the file COPYING.
     31 
     32    Neither the names of the U.S. Department of Energy nor the
     33    University of California nor the names of its contributors may be
     34    used to endorse or promote products derived from this software
     35    without prior written permission.
     36 */
     37 
     38 #include "pub_tool_basics.h"
     39 #include "pub_tool_libcbase.h"
     40 #include "pub_tool_libcassert.h"
     41 #include "pub_tool_libcprint.h"
     42 #include "pub_tool_tooliface.h"
     43 #include "pub_tool_wordfm.h"
     44 #include "pub_tool_xarray.h"
     45 #include "pub_tool_threadstate.h"
     46 #include "pub_tool_mallocfree.h"
     47 #include "pub_tool_machine.h"
     48 #include "pub_tool_debuginfo.h"
     49 #include "pub_tool_options.h"
     50 
     51 #include "pc_common.h"
     52 
     53 #include "sg_main.h"      // self
     54 
     55 
     56 static
     57 void preen_global_Invars ( Addr a, SizeT len ); /*fwds*/
     58 
     59 
     60 //////////////////////////////////////////////////////////////
     61 //                                                          //
     62 // Basic Stuff                                              //
     63 //                                                          //
     64 //////////////////////////////////////////////////////////////
     65 
     66 static inline Bool is_sane_TId ( ThreadId tid )
     67 {
     68    return tid >= 0 && tid < VG_N_THREADS
     69           && tid != VG_INVALID_THREADID;
     70 }
     71 
     72 static void* sg_malloc ( HChar* cc, SizeT n ) {
     73    void* p;
     74    tl_assert(n > 0);
     75    p = VG_(malloc)( cc, n );
     76    tl_assert(p);
     77    return p;
     78 }
     79 
     80 static void sg_free ( void* p ) {
     81    tl_assert(p);
     82    VG_(free)(p);
     83 }
     84 
     85 
     86 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2).  Return -1 if the
     87    first interval is lower, 1 if the first interval is higher, and 0
     88    if there is any overlap.  Redundant paranoia with casting is there
     89    following what looked distinctly like a bug in gcc-4.1.2, in which
     90    some of the comparisons were done signedly instead of
     91    unsignedly. */
     92 inline
     93 static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
     94                                      Addr a2, SizeT n2 ) {
     95    UWord a1w = (UWord)a1;
     96    UWord n1w = (UWord)n1;
     97    UWord a2w = (UWord)a2;
     98    UWord n2w = (UWord)n2;
     99    tl_assert(n1w > 0 && n2w > 0);
    100    if (a1w + n1w <= a2w) return -1L;
    101    if (a2w + n2w <= a1w) return 1L;
    102    return 0;
    103 }
    104 
    105 /* Return true iff [aSmall,aSmall+nSmall) is entirely contained
    106    within [aBig,aBig+nBig). */
    107 inline
    108 static Bool is_subinterval_of ( Addr aBig, SizeT nBig,
    109                                 Addr aSmall, SizeT nSmall ) {
    110    tl_assert(nBig > 0 && nSmall > 0);
    111    return aBig <= aSmall && aSmall + nSmall <= aBig + nBig;
    112 }
    113 
    114 inline
    115 static Addr Addr__min ( Addr a1, Addr a2 ) {
    116    return a1 < a2 ? a1 : a2;
    117 }
    118 
    119 inline
    120 static Addr Addr__max ( Addr a1, Addr a2 ) {
    121    return a1 < a2 ? a2 : a1;
    122 }
    123 
    124 
    125 //////////////////////////////////////////////////////////////
    126 //                                                          //
    127 // StackBlocks Persistent Cache                             //
    128 //                                                          //
    129 //////////////////////////////////////////////////////////////
    130 
    131 /* We maintain a set of XArray* of StackBlocks.  These are never
    132    freed.  When a new StackBlock vector is acquired from
    133    VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
    134    If not present, it is added.  If present, the just-acquired one is
    135    freed and the copy used.
    136 
    137    This simplifies storage management elsewhere.  It allows us to
    138    assume that a pointer to an XArray* of StackBlock is valid forever.
    139    It also means there are no duplicates anywhere, which could be
    140    important from a space point of view for programs that generate a
    141    lot of translations, or where translations are frequently discarded
    142    and re-made.
    143 
    144    Note that we normalise the arrays by sorting the elements according
    145    to an arbitrary total order, so as to avoid the situation that two
    146    vectors describe the same set of variables but are not structurally
    147    identical. */
    148 
    149 static inline Bool StackBlock__sane ( StackBlock* fb )
    150 {
    151    if (fb->name[ sizeof(fb->name)-1 ] != 0)
    152       return False;
    153    if (fb->spRel != False && fb->spRel != True)
    154       return False;
    155    if (fb->isVec != False && fb->isVec != True)
    156       return False;
    157    return True;
    158 }
    159 
    160 /* Generate an arbitrary total ordering on StackBlocks. */
    161 static Word StackBlock__cmp ( StackBlock* fb1, StackBlock* fb2 )
    162 {
    163    Word r;
    164    tl_assert(StackBlock__sane(fb1));
    165    tl_assert(StackBlock__sane(fb2));
    166    /* Hopefully the .base test hits most of the time.  For the blocks
    167       associated with any particular instruction, if the .base values
    168       are the same then probably it doesn't make sense for the other
    169       fields to be different.  But this is supposed to be a completely
    170       general structural total order, so we have to compare everything
    171       anyway. */
    172    if (fb1->base < fb2->base) return -1;
    173    if (fb1->base > fb2->base) return 1;
    174    /* compare sizes */
    175    if (fb1->szB < fb2->szB) return -1;
    176    if (fb1->szB > fb2->szB) return 1;
    177    /* compare sp/fp flag */
    178    if (fb1->spRel < fb2->spRel) return -1;
    179    if (fb1->spRel > fb2->spRel) return 1;
    180    /* compare is/is-not array-typed flag */
    181    if (fb1->isVec < fb2->isVec) return -1;
    182    if (fb1->isVec > fb2->isVec) return 1;
    183    /* compare the name */
    184    r = (Word)VG_(strcmp)(fb1->name, fb2->name);
    185    return r;
    186 }
    187 
    188 /* Returns True if all fields except .szB are the same.  szBs may or
    189    may not be the same; they are simply not consulted. */
    190 static Bool StackBlock__all_fields_except_szB_are_equal (
    191                StackBlock* fb1,
    192                StackBlock* fb2
    193             )
    194 {
    195    tl_assert(StackBlock__sane(fb1));
    196    tl_assert(StackBlock__sane(fb2));
    197    return fb1->base == fb2->base
    198           && fb1->spRel == fb2->spRel
    199           && fb1->isVec == fb2->isVec
    200           && 0 == VG_(strcmp)(fb1->name, fb2->name);
    201 }
    202 
    203 
    204 /* Generate an arbitrary total ordering on vectors of StackBlocks. */
    205 static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s )
    206 {
    207    Word i, r, n1, n2;
    208    n1 = VG_(sizeXA)( fb1s );
    209    n2 = VG_(sizeXA)( fb2s );
    210    if (n1 < n2) return -1;
    211    if (n1 > n2) return 1;
    212    for (i = 0; i < n1; i++) {
    213       StackBlock *fb1, *fb2;
    214       fb1 = VG_(indexXA)( fb1s, i );
    215       fb2 = VG_(indexXA)( fb2s, i );
    216       r = StackBlock__cmp( fb1, fb2 );
    217       if (r != 0) return r;
    218    }
    219    tl_assert(i == n1 && i == n2);
    220    return 0;
    221 }
    222 
    223 static void pp_StackBlocks ( XArray* sbs )
    224 {
    225    Word i, n = VG_(sizeXA)( sbs );
    226    VG_(message)(Vg_DebugMsg, "<<< STACKBLOCKS\n" );
    227    for (i = 0; i < n; i++) {
    228       StackBlock* sb = (StackBlock*)VG_(indexXA)( sbs, i );
    229       VG_(message)(Vg_DebugMsg,
    230          "   StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }\n",
    231          sb->base, sb->szB, sb->spRel ? 'Y' : 'N',
    232          sb->isVec ? 'Y' : 'N', &sb->name[0]
    233       );
    234    }
    235    VG_(message)(Vg_DebugMsg, ">>> STACKBLOCKS\n" );
    236 }
    237 
    238 
    239 /* ---------- The StackBlock vector cache ---------- */
    240 
    241 static WordFM* /* XArray* of StackBlock -> nothing */
    242        frameBlocks_set = NULL;
    243 
    244 static void init_StackBlocks_set ( void )
    245 {
    246    tl_assert(!frameBlocks_set);
    247    frameBlocks_set
    248       = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free,
    249                     (Word(*)(UWord,UWord))StackBlocks__cmp );
    250    tl_assert(frameBlocks_set);
    251 }
    252 
    253 /* Find the given StackBlock-vector in our collection thereof.  If
    254    found, deallocate the supplied one, and return the address of the
    255    copy.  If not found, add the supplied one to our collection and
    256    return its address. */
    257 static XArray* /* of StackBlock */
    258        StackBlocks__find_and_dealloc__or_add
    259           ( XArray* /* of StackBlock */ orig )
    260 {
    261    UWord key, val;
    262 
    263    /* First, normalise, as per comments above. */
    264    VG_(setCmpFnXA)( orig, (Int(*)(void*,void*))StackBlock__cmp );
    265    VG_(sortXA)( orig );
    266 
    267    /* Now get rid of any exact duplicates. */
    268   nuke_dups:
    269    { Word r, w, nEQ, n = VG_(sizeXA)( orig );
    270      if (n >= 2) {
    271         w = 0;
    272         nEQ = 0;
    273         for (r = 0; r < n; r++) {
    274            if (r+1 < n) {
    275               StackBlock* pR0 = VG_(indexXA)( orig, r+0 );
    276               StackBlock* pR1 = VG_(indexXA)( orig, r+1 );
    277               Word c = StackBlock__cmp(pR0,pR1);
    278               tl_assert(c == -1 || c == 0);
    279               if (c == 0) { nEQ++; continue; }
    280            }
    281            if (w != r) {
    282               StackBlock* pW = VG_(indexXA)( orig, w );
    283               StackBlock* pR = VG_(indexXA)( orig, r );
    284               *pW = *pR;
    285            }
    286            w++;
    287         }
    288         tl_assert(r == n);
    289         tl_assert(w + nEQ == n);
    290         if (w < n) {
    291            VG_(dropTailXA)( orig, n-w );
    292         }
    293         if (0) VG_(printf)("delta %ld\n", n-w);
    294      }
    295    }
    296 
    297    /* Deal with the following strangeness, where two otherwise
    298       identical blocks are claimed to have different sizes.  In which
    299       case we use the larger size. */
    300    /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
    301       StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
    302       StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
    303    */
    304    { Word i, n = VG_(sizeXA)( orig );
    305      if (n >= 2) {
    306         for (i = 0; i < n-1; i++) {
    307            StackBlock* sb0 = VG_(indexXA)( orig, i+0 );
    308            StackBlock* sb1 = VG_(indexXA)( orig, i+1 );
    309            if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) {
    310               /* They can't be identical because the previous tidying
    311                  pass would have removed the duplicates.  And they
    312                  can't be > because the earlier sorting pass would
    313                  have ordered otherwise-identical descriptors
    314                  according to < on .szB fields.  Hence: */
    315               tl_assert(sb0->szB < sb1->szB);
    316               sb0->szB = sb1->szB;
    317               /* This makes the blocks identical, at the size of the
    318                  larger one.  Rather than go to all the hassle of
    319                  sliding the rest down, simply go back to the
    320                  remove-duplicates stage.  The assertion guarantees
    321                  that we eventually make progress, since the rm-dups
    322                  stage will get rid of one of the blocks.  This is
    323                  expected to happen only exceedingly rarely. */
    324               tl_assert(StackBlock__cmp(sb0,sb1) == 0);
    325               goto nuke_dups;
    326            }
    327         }
    328      }
    329    }
    330 
    331    /* If there are any blocks which overlap and have the same
    332       fpRel-ness, junk the whole descriptor; it's obviously bogus.
    333       Icc11 certainly generates bogus info from time to time.
    334 
    335       This check is pretty weak; really we ought to have a stronger
    336       sanity check. */
    337    { Word i, n = VG_(sizeXA)( orig );
    338      static Int moans = 3;
    339      for (i = 0; i < n-1; i++) {
    340        StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i );
    341        StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 );
    342        if (sb1->spRel == sb2->spRel
    343            && (sb1->base >= sb2->base
    344                || sb1->base + sb1->szB > sb2->base)) {
    345           if (moans > 0 && !VG_(clo_xml)) {
    346              moans--;
    347              VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
    348                                       "overlapping stack blocks\n");
    349              if (VG_(clo_verbosity) >= 2)
    350                 pp_StackBlocks(orig);
    351              if (moans == 0)
    352                 VG_(message)(Vg_UserMsg, "Further instances of this "
    353                                          "message will not be shown\n" );
    354           }
    355           VG_(dropTailXA)( orig, VG_(sizeXA)( orig ));
    356           break;
    357        }
    358      }
    359    }
    360 
    361    /* Now, do we have it already? */
    362    if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) {
    363       /* yes */
    364       XArray* res;
    365       tl_assert(val == 0);
    366       tl_assert(key != (UWord)orig);
    367       VG_(deleteXA)(orig);
    368       res = (XArray*)key;
    369       return res;
    370    } else {
    371       /* no */
    372       VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 );
    373       return orig;
    374    }
    375 }
    376 
    377 /* Top level function for getting the StackBlock vector for a given
    378    instruction.  It is guaranteed that the returned pointer will be
    379    valid for the entire rest of the run, and also that the addresses
    380    of the individual elements of the array will not change. */
    381 
    382 static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip )
    383 {
    384    XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ );
    385    tl_assert(blocks);
    386    return StackBlocks__find_and_dealloc__or_add( blocks );
    387 }
    388 
    389 
    390 //////////////////////////////////////////////////////////////
    391 //                                                          //
    392 // GlobalBlocks Persistent Cache                            //
    393 //                                                          //
    394 //////////////////////////////////////////////////////////////
    395 
    396 /* Generate an arbitrary total ordering on GlobalBlocks. */
    397 static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 )
    398 {
    399    Word r;
    400    /* compare addrs */
    401    if (gb1->addr < gb2->addr) return -1;
    402    if (gb1->addr > gb2->addr) return 1;
    403    /* compare sizes */
    404    if (gb1->szB < gb2->szB) return -1;
    405    if (gb1->szB > gb2->szB) return 1;
    406    /* compare is/is-not array-typed flag */
    407    if (gb1->isVec < gb2->isVec) return -1;
    408    if (gb1->isVec > gb2->isVec) return 1;
    409    /* compare the name */
    410    r = (Word)VG_(strcmp)(gb1->name, gb2->name);
    411    if (r != 0) return r;
    412    /* compare the soname */
    413    r = (Word)VG_(strcmp)(gb1->soname, gb2->soname);
    414    return r;
    415 }
    416 
    417 static WordFM* /* GlobalBlock* -> nothing */
    418        globalBlock_set = NULL;
    419 
    420 static void init_GlobalBlock_set ( void )
    421 {
    422    tl_assert(!globalBlock_set);
    423     globalBlock_set
    424        = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free,
    425                      (Word(*)(UWord,UWord))GlobalBlock__cmp );
    426    tl_assert(globalBlock_set);
    427 }
    428 
    429 
    430 /* Top level function for making GlobalBlocks persistent.  Call here
    431    with a non-persistent version, and the returned one is guaranteed
    432    to be valid for the entire rest of the run.  The supplied one is
    433    copied, not stored, so can be freed after the call. */
    434 
    435 static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig )
    436 {
    437    UWord key, val;
    438    /* Now, do we have it already? */
    439    if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) {
    440       /* yes, return the copy */
    441       GlobalBlock* res;
    442       tl_assert(val == 0);
    443       res = (GlobalBlock*)key;
    444       tl_assert(res != orig);
    445       return res;
    446    } else {
    447       /* no.  clone it, store the clone and return the clone's
    448          address. */
    449       GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1",
    450                                       sizeof(GlobalBlock) );
    451       tl_assert(clone);
    452       *clone = *orig;
    453       VG_(addToFM)( globalBlock_set, (UWord)clone, 0 );
    454       return clone;
    455    }
    456 }
    457 
    458 
    459 //////////////////////////////////////////////////////////////
    460 //                                                          //
    461 // Interval tree of StackTreeBlock                          //
    462 //                                                          //
    463 //////////////////////////////////////////////////////////////
    464 
    465 /* A node in a stack interval tree.  Zero length intervals (.szB == 0)
    466    are not allowed.
    467 
    468    A stack interval tree is a (WordFM StackTreeNode* void).  There is
    469    one stack interval tree for each thread.
    470 */
    471 typedef
    472    struct {
    473       Addr        addr;
    474       SizeT       szB;   /* copied from .descr->szB */
    475       StackBlock* descr; /* it's an instance of this block */
    476       UWord       depth; /* depth of stack at time block was pushed */
    477    }
    478    StackTreeNode;
    479 
    480 static void pp_StackTree ( WordFM* sitree, HChar* who )
    481 {
    482    UWord keyW, valW;
    483    VG_(printf)("<<< BEGIN pp_StackTree %s\n", who );
    484    VG_(initIterFM)( sitree );
    485    while (VG_(nextIterFM)( sitree, &keyW, &valW )) {
    486       StackTreeNode* nd = (StackTreeNode*)keyW;
    487       VG_(printf)("  [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB,
    488                   nd->descr, nd->descr->name, nd->descr->szB);
    489    }
    490    VG_(printf)(">>> END   pp_StackTree %s\n", who );
    491 }
    492 
    493 /* Interval comparison function for StackTreeNode */
    494 static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1,
    495                                           StackTreeNode* sn2 )
    496 {
    497    return cmp_nonempty_intervals(sn1->addr, sn1->szB,
    498                                  sn2->addr, sn2->szB);
    499 }
    500 
    501 /* Find the node holding 'a', if any. */
    502 static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a )
    503 {
    504    UWord keyW, valW;
    505    StackTreeNode key;
    506    tl_assert(sitree);
    507    key.addr = a;
    508    key.szB  = 1;
    509    if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) {
    510       StackTreeNode* res = (StackTreeNode*)keyW;
    511       tl_assert(valW == 0);
    512       tl_assert(res != &key);
    513       return res;
    514    } else {
    515       return NULL;
    516    }
    517 }
    518 
    519 /* Note that the supplied XArray of FrameBlock must have been
    520    made persistent already. */
    521 __attribute__((noinline))
    522 static void add_blocks_to_StackTree (
    523                /*MOD*/WordFM* sitree,
    524                XArray* /* FrameBlock */ descrs,
    525                XArray* /* Addr */ bases,
    526                UWord depth
    527             )
    528 {
    529    Bool debug = (Bool)0;
    530    Word i, nDescrs, nBases;
    531 
    532    nDescrs = VG_(sizeXA)( descrs ),
    533    nBases = VG_(sizeXA)( bases );
    534    tl_assert(nDescrs == nBases);
    535 
    536    if (nDescrs == 0) return;
    537 
    538    tl_assert(sitree);
    539    if (debug) {
    540       VG_(printf)("\ndepth = %lu\n", depth);
    541       pp_StackTree( sitree, "add_blocks_to_StackTree-pre" );
    542       pp_StackBlocks(descrs);
    543    }
    544 
    545    for (i = 0; i < nDescrs; i++) {
    546       Bool already_present;
    547       StackTreeNode* nyu;
    548       Addr        addr  = *(Addr*)VG_(indexXA)( bases, i );
    549       StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i );
    550       tl_assert(descr->szB > 0);
    551       nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) );
    552       nyu->addr  = addr;
    553       nyu->szB   = descr->szB;
    554       nyu->descr = descr;
    555       nyu->depth = depth;
    556       if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB);
    557       already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 );
    558       /* The interval can't already be there; else we have
    559          overlapping stack blocks. */
    560       tl_assert(!already_present);
    561       if (debug) {
    562          pp_StackTree( sitree, "add_blocks_to_StackTree-step" );
    563       }
    564    }
    565    if (debug) {
    566       pp_StackTree( sitree, "add_blocks_to_StackTree-post" );
    567       VG_(printf)("\n");
    568    }
    569 }
    570 
    571 static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree,
    572                                         XArray* /* Addr */ bases )
    573 {
    574    UWord oldK, oldV;
    575    Word i, nBases = VG_(sizeXA)( bases );
    576    for (i = 0; i < nBases; i++) {
    577       Bool b;
    578       Addr addr = *(Addr*)VG_(indexXA)( bases, i );
    579       StackTreeNode* nd = find_StackTreeNode(sitree, addr);
    580       /* The interval must be there; we added it earlier when
    581          the associated frame was created. */
    582       tl_assert(nd);
    583       b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd );
    584       /* we just found the block! */
    585       tl_assert(b);
    586       tl_assert(oldV == 0);
    587       tl_assert(nd == (StackTreeNode*)oldK);
    588       sg_free(nd);
    589    }
    590 }
    591 
    592 
    593 static void delete_StackTree__kFin ( UWord keyW ) {
    594    StackTreeNode* nd = (StackTreeNode*)keyW;
    595    tl_assert(nd);
    596    sg_free(nd);
    597 }
    598 static void delete_StackTree__vFin ( UWord valW ) {
    599    tl_assert(valW == 0);
    600 }
    601 static void delete_StackTree ( WordFM* sitree )
    602 {
    603    VG_(deleteFM)( sitree,
    604                  delete_StackTree__kFin, delete_StackTree__vFin );
    605 }
    606 
    607 static WordFM* new_StackTree ( void ) {
    608    return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free,
    609                       (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode );
    610 }
    611 
    612 
    613 //////////////////////////////////////////////////////////////
    614 //                                                          //
    615 // Interval tree of GlobalTreeBlock                         //
    616 //                                                          //
    617 //////////////////////////////////////////////////////////////
    618 
    619 /* A node in a global interval tree.  Zero length intervals
    620    (.szB == 0) are not allowed.
    621 
    622    A global interval tree is a (WordFM GlobalTreeNode* void).  There
    623    is one global interval tree for the entire process.
    624 */
    625 typedef
    626    struct {
    627       Addr         addr; /* copied from .descr->addr */
    628       SizeT        szB; /* copied from .descr->szB */
    629       GlobalBlock* descr; /* it's this block */
    630    }
    631    GlobalTreeNode;
    632 
    633 static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) {
    634    tl_assert(nd->descr);
    635    VG_(printf)("GTNode [%#lx,+%ld) %s",
    636                nd->addr, nd->szB, nd->descr->name);
    637 }
    638 
    639 static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree,
    640                              HChar* who )
    641 {
    642    UWord keyW, valW;
    643    GlobalTreeNode* nd;
    644    VG_(printf)("<<< GlobalBlockTree (%s)\n", who);
    645    VG_(initIterFM)( gitree );
    646    while (VG_(nextIterFM)( gitree, &keyW, &valW )) {
    647       tl_assert(valW == 0);
    648       nd = (GlobalTreeNode*)keyW;
    649       VG_(printf)("  ");
    650       GlobalTreeNode__pp(nd);
    651       VG_(printf)("\n");
    652    }
    653    VG_(doneIterFM)( gitree );
    654    VG_(printf)(">>>\n");
    655 }
    656 
    657 /* Interval comparison function for GlobalTreeNode */
    658 static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1,
    659                                            GlobalTreeNode* gn2 )
    660 {
    661    return cmp_nonempty_intervals( gn1->addr, gn1->szB,
    662                                   gn2->addr, gn2->szB );
    663 }
    664 
    665 /* Find the node holding 'a', if any. */
    666 static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a )
    667 {
    668    UWord keyW, valW;
    669    GlobalTreeNode key;
    670    key.addr = a;
    671    key.szB  = 1;
    672    if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
    673       GlobalTreeNode* res = (GlobalTreeNode*)keyW;
    674       tl_assert(valW == 0);
    675       tl_assert(res != &key);
    676       return res;
    677    } else {
    678       return NULL;
    679    }
    680 }
    681 
    682 /* Note that the supplied GlobalBlock must have been made persistent
    683    already. */
    684 static void add_block_to_GlobalTree (
    685                /*MOD*/WordFM* gitree,
    686                GlobalBlock* descr
    687             )
    688 {
    689    Bool already_present;
    690    GlobalTreeNode *nyu, *nd;
    691    UWord keyW, valW;
    692    static Int moans = 3;
    693 
    694    tl_assert(descr->szB > 0);
    695    nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) );
    696    nyu->addr  = descr->addr;
    697    nyu->szB   = descr->szB;
    698    nyu->descr = descr;
    699 
    700    /* Basically it's an error to add a global block to the tree that
    701       is already in the tree.  However, detect and ignore attempts to
    702       insert exact duplicates; they do appear for some reason
    703       (possible a bug in m_debuginfo?) */
    704    already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu );
    705    if (already_present) {
    706       tl_assert(valW == 0);
    707       nd = (GlobalTreeNode*)keyW;
    708       tl_assert(nd);
    709       tl_assert(nd != nyu);
    710       tl_assert(nd->descr);
    711       tl_assert(nyu->descr);
    712       if (nd->addr == nyu->addr && nd->szB == nyu->szB
    713           /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
    714           /* Although it seems reasonable to demand that duplicate
    715              blocks have identical names, that is too strict.  For
    716              example, reading debuginfo from glibc produces two
    717              otherwise identical blocks with names "tzname" and
    718              "__tzname".  A constraint of the form "must be identical,
    719              or one must be a substring of the other" would fix that.
    720              However, such trickery is scuppered by the fact that we
    721              truncate all variable names to 15 characters to make
    722              storage management simpler, hence giving pairs like
    723              "__EI___pthread_" (truncated) vs "__pthread_keys".  So
    724              it's simplest just to skip the name comparison
    725              completely. */
    726           && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) {
    727          /* exact duplicate; ignore it */
    728          sg_free(nyu);
    729          return;
    730       }
    731       /* else fall through; the assertion below will catch it */
    732    }
    733 
    734    already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 );
    735    /* The interval can't already be there; else we have
    736       overlapping global blocks. */
    737    /* Unfortunately (25 Jan 09) at least icc11 has been seen to
    738       generate overlapping block descriptions in the Dwarf3; clearly
    739       bogus. */
    740    if (already_present && moans > 0 && !VG_(clo_xml)) {
    741       moans--;
    742       VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
    743                                "overlapping global blocks\n");
    744       if (VG_(clo_verbosity) >= 2) {
    745          GlobalTree__pp( gitree,
    746                          "add_block_to_GlobalTree: non-exact duplicate" );
    747          VG_(printf)("Overlapping block: ");
    748          GlobalTreeNode__pp(nyu);
    749          VG_(printf)("\n");
    750       }
    751       if (moans == 0)
    752          VG_(message)(Vg_UserMsg, "Further instances of this "
    753                                   "message will not be shown\n" );
    754    }
    755    /* tl_assert(!already_present); */
    756 }
    757 
    758 static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree,
    759                                    Addr a, SizeT szB )
    760 {
    761    /* One easy way to do this: look up [a,a+szB) in the tree.  That
    762       will either succeed, producing a block which intersects that
    763       range, in which case we delete it and repeat; or it will fail,
    764       in which case there are no blocks intersecting the range, and we
    765       can bring the process to a halt. */
    766    UWord keyW, valW, oldK, oldV;
    767    GlobalTreeNode key, *nd;
    768    Bool b, anyFound;
    769 
    770    tl_assert(szB > 0);
    771 
    772    anyFound = False;
    773 
    774    key.addr = a;
    775    key.szB  = szB;
    776 
    777    while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
    778       anyFound = True;
    779       nd = (GlobalTreeNode*)keyW;
    780       tl_assert(valW == 0);
    781       tl_assert(nd != &key);
    782       tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0);
    783 
    784       b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key );
    785       tl_assert(b);
    786       tl_assert(oldV == 0);
    787       tl_assert(oldK == keyW); /* check we deleted the node we just found */
    788    }
    789 
    790    return anyFound;
    791 }
    792 
    793 
    794 //////////////////////////////////////////////////////////////
    795 //                                                          //
    796 // Invar                                                    //
    797 //                                                          //
    798 //////////////////////////////////////////////////////////////
    799 
    800 /* An invariant, as resulting from watching the destination of a
    801    memory referencing instruction.  Initially is Inv_Unset until the
    802    instruction makes a first access. */
    803 
    804 typedef
    805    enum {
    806       Inv_Unset=1,  /* not established yet */
    807       Inv_Unknown,  /* unknown location */
    808       Inv_Stack0,   /* array-typed stack block in innermost frame */
    809       Inv_StackN,   /* array-typed stack block in non-innermost frame */
    810       Inv_Global,   /* array-typed global block */
    811    }
    812    InvarTag;
    813 
    814 typedef
    815    struct {
    816       InvarTag tag;
    817       union {
    818          struct {
    819          } Unset;
    820          struct {
    821          } Unknown;
    822          struct {
    823             Addr  addr;
    824             SizeT szB;
    825             StackBlock* descr;
    826          } Stack0; /* innermost stack frame */
    827          struct {
    828             /* Pointer to a node in the interval tree for
    829               this thread. */
    830             StackTreeNode* nd;
    831          } StackN; /* non-innermost stack frame */
    832          struct {
    833            /* Pointer to a GlobalBlock in the interval tree of
    834               global blocks. */
    835            GlobalTreeNode* nd;
    836          } Global;
    837       }
    838       Inv;
    839    }
    840    Invar;
    841 
    842 /* Partial debugging printing for an Invar. */
    843 static void pp_Invar ( Invar* i )
    844 {
    845    switch (i->tag) {
    846       case Inv_Unset:
    847          VG_(printf)("Unset");
    848          break;
    849       case Inv_Unknown:
    850          VG_(printf)("Unknown");
    851          break;
    852       case Inv_Stack0:
    853          VG_(printf)("Stack0 [%#lx,+%lu)",
    854                      i->Inv.Stack0.addr, i->Inv.Stack0.szB);
    855          break;
    856       case Inv_StackN:
    857          VG_(printf)("StackN [%#lx,+%lu)",
    858                      i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB);
    859          break;
    860       case Inv_Global:
    861          VG_(printf)("Global [%#lx,+%lu)",
    862                      i->Inv.Global.nd->addr, i->Inv.Global.nd->szB);
    863          break;
    864       default:
    865          tl_assert(0);
    866    }
    867 }
    868 
    869 /* Compare two Invars for equality. */
    870 static Bool eq_Invar ( Invar* i1, Invar* i2 )
    871 {
    872    tl_assert(i1->tag != Inv_Unset);
    873    tl_assert(i2->tag != Inv_Unset);
    874    if (i1->tag != i2->tag)
    875       return False;
    876    switch (i1->tag) {
    877       case Inv_Unknown:
    878          return True;
    879       case Inv_Stack0:
    880          return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr
    881                 && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB;
    882       case Inv_StackN:
    883          return i1->Inv.StackN.nd == i2->Inv.StackN.nd;
    884       case Inv_Global:
    885          return i1->Inv.Global.nd == i2->Inv.Global.nd;
    886       default:
    887          tl_assert(0);
    888    }
    889    /*NOTREACHED*/
    890    tl_assert(0);
    891 }
    892 
    893 /* Print selected parts of an Invar, suitable for use in error
    894    messages. */
    895 static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth )
    896 {
    897    HChar* str;
    898    tl_assert(nBuf >= 96);
    899    buf[0] = 0;
    900    switch (inv->tag) {
    901       case Inv_Unknown:
    902          VG_(sprintf)(buf, "%s", "unknown");
    903          break;
    904       case Inv_Stack0:
    905          str = "array";
    906          VG_(sprintf)(buf, "stack %s \"%s\" in this frame",
    907                       str, inv->Inv.Stack0.descr->name );
    908          break;
    909       case Inv_StackN:
    910          str = "array";
    911          VG_(sprintf)(buf, "stack %s \"%s\" in frame %lu back from here",
    912                       str, inv->Inv.StackN.nd->descr->name,
    913                            depth - inv->Inv.StackN.nd->depth );
    914          break;
    915       case Inv_Global:
    916          str = "array";
    917          VG_(sprintf)(buf, "global %s \"%s\" in object with soname \"%s\"",
    918                       str, inv->Inv.Global.nd->descr->name,
    919                            inv->Inv.Global.nd->descr->soname );
    920          break;
    921       case Inv_Unset:
    922          VG_(sprintf)(buf, "%s", "Unset!");
    923          break;
    924       default:
    925          tl_assert(0);
    926    }
    927 }
    928 
    929 
    930 //////////////////////////////////////////////////////////////
    931 //                                                          //
    932 // our globals                                              //
    933 //                                                          //
    934 //////////////////////////////////////////////////////////////
    935 
    936 //////////////////////////////////////////////////////////////
    937 ///
    938 
    939 #define N_QCACHE 16
    940 
    941 /* Powers of two only, else the result will be chaos */
    942 #define QCACHE_ADVANCE_EVERY 16
    943 
    944 /* Per-thread query cache.  Note that the invar can only be Inv_StackN
    945    (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
    946 typedef
    947    struct {
    948       Addr  addr;
    949       SizeT szB;
    950       Invar inv;
    951    }
    952    QCElem;
    953 
    954 typedef
    955    struct {
    956       Word   nInUse;
    957       QCElem elems[N_QCACHE];
    958    }
    959    QCache;
    960 
    961 static void QCache__invalidate ( QCache* qc ) {
    962    tl_assert(qc->nInUse >= 0);
    963    qc->nInUse = 0;
    964 }
    965 
    966 static void QCache__pp ( QCache* qc, HChar* who )
    967 {
    968    Word i;
    969    VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who);
    970    for (i = 0; i < qc->nInUse; i++) {
    971       VG_(printf)("  [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB);
    972       pp_Invar(&qc->elems[i].inv);
    973       VG_(printf)("\n");
    974    }
    975    VG_(printf)(">>>\n");
    976 }
    977 
    978 static ULong stats__qcache_queries = 0;
    979 static ULong stats__qcache_misses  = 0;
    980 static ULong stats__qcache_probes  = 0;
    981 
    982 ///
    983 //////////////////////////////////////////////////////////////
    984 
    985 /* Each thread has:
    986    * a shadow stack of StackFrames, which is a double-linked list
    987    * an stack block interval tree
    988 */
    989 static  struct _StackFrame*          shadowStacks[VG_N_THREADS];
    990 
    991 static  WordFM* /* StackTreeNode */  siTrees[VG_N_THREADS];
    992 
    993 static  QCache                       qcaches[VG_N_THREADS];
    994 
    995 
    996 /* Additionally, there is one global variable interval tree
    997    for the entire process.
    998 */
    999 static WordFM* /* GlobalTreeNode */ giTree;
   1000 
   1001 
   1002 static void invalidate_all_QCaches ( void )
   1003 {
   1004    Word i;
   1005    for (i = 0; i < VG_N_THREADS; i++) {
   1006       QCache__invalidate( &qcaches[i] );
   1007    }
   1008 }
   1009 
   1010 static void ourGlobals_init ( void )
   1011 {
   1012    Word i;
   1013    for (i = 0; i < VG_N_THREADS; i++) {
   1014       shadowStacks[i] = NULL;
   1015       siTrees[i] = NULL;
   1016    }
   1017    invalidate_all_QCaches();
   1018    giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free,
   1019                         (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode );
   1020 }
   1021 
   1022 
   1023 //////////////////////////////////////////////////////////////
   1024 //                                                          //
   1025 // Handle global variable load/unload events                //
   1026 //                                                          //
   1027 //////////////////////////////////////////////////////////////
   1028 
   1029 static void acquire_globals ( ULong di_handle )
   1030 {
   1031    Word n, i;
   1032    XArray* /* of GlobalBlock */ gbs;
   1033    if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle );
   1034    gbs = VG_(di_get_global_blocks_from_dihandle)
   1035             (di_handle, True/*arrays only*/);
   1036    if (0) VG_(printf)("   GOT %ld globals\n", VG_(sizeXA)( gbs ));
   1037 
   1038    n = VG_(sizeXA)( gbs );
   1039    for (i = 0; i < n; i++) {
   1040       GlobalBlock* gbp;
   1041       GlobalBlock* gb = VG_(indexXA)( gbs, i );
   1042       if (0) VG_(printf)("   new Global size %2lu at %#lx:  %s %s\n",
   1043                          gb->szB, gb->addr, gb->soname, gb->name );
   1044       tl_assert(gb->szB > 0);
   1045       /* Make a persistent copy of each GlobalBlock, and add it
   1046          to the tree. */
   1047       gbp = get_persistent_GlobalBlock( gb );
   1048       add_block_to_GlobalTree( giTree, gbp );
   1049    }
   1050 
   1051    VG_(deleteXA)( gbs );
   1052 }
   1053 
   1054 
   1055 /* We only intercept these two because we need to see any di_handles
   1056    that might arise from the mappings/allocations. */
   1057 void sg_new_mem_mmap( Addr a, SizeT len,
   1058                       Bool rr, Bool ww, Bool xx, ULong di_handle )
   1059 {
   1060    if (di_handle > 0)
   1061       acquire_globals(di_handle);
   1062 }
   1063 void sg_new_mem_startup( Addr a, SizeT len,
   1064                          Bool rr, Bool ww, Bool xx, ULong di_handle )
   1065 {
   1066    if (di_handle > 0)
   1067       acquire_globals(di_handle);
   1068 }
   1069 void sg_die_mem_munmap ( Addr a, SizeT len )
   1070 {
   1071    Bool debug = (Bool)0;
   1072    Bool overlap = False;
   1073 
   1074    if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len );
   1075 
   1076    if (len == 0)
   1077       return;
   1078 
   1079    overlap = del_GlobalTree_range(giTree, a, len);
   1080 
   1081    { /* redundant sanity check */
   1082      UWord keyW, valW;
   1083      VG_(initIterFM)( giTree );
   1084      while (VG_(nextIterFM)( giTree, &keyW, &valW )) {
   1085        GlobalTreeNode* nd = (GlobalTreeNode*)keyW;
   1086         tl_assert(valW == 0);
   1087         tl_assert(nd->szB > 0);
   1088         tl_assert(nd->addr + nd->szB <= a
   1089                   || a + len <= nd->addr);
   1090      }
   1091      VG_(doneIterFM)( giTree );
   1092    }
   1093 
   1094    if (!overlap)
   1095       return;
   1096 
   1097    /* Ok, the range contained some blocks.  Therefore we'll need to
   1098       visit all the Invars in all the thread shadow stacks, and
   1099       convert all Inv_Global entries that intersect [a,a+len) to
   1100       Inv_Unknown. */
   1101    tl_assert(len > 0);
   1102    preen_global_Invars( a, len );
   1103    invalidate_all_QCaches();
   1104 }
   1105 
   1106 
   1107 //////////////////////////////////////////////////////////////
   1108 //                                                          //
   1109 // StackFrame                                               //
   1110 //                                                          //
   1111 //////////////////////////////////////////////////////////////
   1112 
   1113 static ULong stats__total_accesses   = 0;
   1114 static ULong stats__classify_Stack0  = 0;
   1115 static ULong stats__classify_StackN  = 0;
   1116 static ULong stats__classify_Global  = 0;
   1117 static ULong stats__classify_Unknown = 0;
   1118 static ULong stats__Invars_preened   = 0;
   1119 static ULong stats__Invars_changed   = 0;
   1120 static ULong stats__t_i_b_empty      = 0;
   1121 static ULong stats__htab_fast        = 0;
   1122 static ULong stats__htab_searches    = 0;
   1123 static ULong stats__htab_probes      = 0;
   1124 static ULong stats__htab_resizes     = 0;
   1125 
   1126 
   1127 /* A dynamic instance of an instruction */
   1128 typedef
   1129    struct {
   1130       /* IMMUTABLE */
   1131       Addr    insn_addr; /* NB! zero means 'not in use' */
   1132       XArray* blocks; /* XArray* of StackBlock, or NULL if none */
   1133       /* MUTABLE */
   1134       Invar invar;
   1135    }
   1136    IInstance;
   1137 
   1138 
   1139 #define N_HTAB_FIXED 64
   1140 
   1141 typedef
   1142    struct _StackFrame {
   1143       /* The sp when the frame was created, so we know when to get rid
   1144          of it. */
   1145       Addr creation_sp;
   1146       /* The stack frames for a thread are arranged as a doubly linked
   1147          list.  Obviously the outermost frame in the stack has .outer
   1148          as NULL and the innermost in theory has .inner as NULL.
   1149          However, when a function returns, we don't delete the
   1150          just-vacated StackFrame.  Instead, it is retained in the list
   1151          and will be re-used when the next call happens.  This is so
   1152          as to avoid constantly having to dynamically allocate and
   1153          deallocate frames. */
   1154       struct _StackFrame* inner;
   1155       struct _StackFrame* outer;
   1156       Word depth; /* 0 for outermost; increases inwards */
   1157       /* Information for each memory referencing instruction, for this
   1158          instantiation of the function.  The iinstances array is
   1159          operated as a simple linear-probe hash table, which is
   1160          dynamically expanded as necessary.  Once critical thing is
   1161          that an IInstance with a .insn_addr of zero is interpreted to
   1162          mean that hash table slot is unused.  This means we can't
   1163          store an IInstance for address zero. */
   1164       /* Note that htab initially points to htab_fixed.  If htab_fixed
   1165          turns out not to be big enough then htab is made to point to
   1166          dynamically allocated memory.  But it's often the case that
   1167          htab_fixed is big enough, so this optimisation saves a huge
   1168          number of sg_malloc/sg_free call pairs. */
   1169       IInstance* htab;
   1170       UWord      htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */
   1171       UWord      htab_used; /* number of hash table slots currently in use */
   1172       /* If this frame is currently making a call, then the following
   1173          are relevant. */
   1174       Addr sp_at_call;
   1175       Addr fp_at_call;
   1176       XArray* /* of Addr */ blocks_added_by_call;
   1177       /* See comment just above */
   1178       IInstance htab_fixed[N_HTAB_FIXED];
   1179    }
   1180    StackFrame;
   1181 
   1182 
   1183 
   1184 
   1185 
   1186 /* Move this somewhere else? */
   1187 /* Visit all Invars in the entire system.  If 'isHeap' is True, change
   1188    all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown.  If
   1189    'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
   1190    instead. */
   1191 
   1192 __attribute__((noinline))
   1193 static void preen_global_Invar ( Invar* inv, Addr a, SizeT len )
   1194 {
   1195    stats__Invars_preened++;
   1196    tl_assert(len > 0);
   1197    tl_assert(inv);
   1198    switch (inv->tag) {
   1199       case Inv_Global:
   1200          tl_assert(inv->Inv.Global.nd);
   1201          tl_assert(inv->Inv.Global.nd->szB > 0);
   1202          if (0) VG_(printf)("preen_Invar Global %#lx %lu\n",
   1203                             inv->Inv.Global.nd->addr,
   1204                             inv->Inv.Global.nd->szB);
   1205          if (0 == cmp_nonempty_intervals(a, len, inv->Inv.Global.nd->addr,
   1206                                                  inv->Inv.Global.nd->szB)) {
   1207             inv->tag = Inv_Unknown;
   1208             stats__Invars_changed++;
   1209          }
   1210          break;
   1211       case Inv_Stack0:
   1212       case Inv_StackN:
   1213       case Inv_Unknown:
   1214          break;
   1215       default:
   1216          tl_assert(0);
   1217    }
   1218 }
   1219 
   1220 __attribute__((noinline))
   1221 static void preen_global_Invars ( Addr a, SizeT len )
   1222 {
   1223    Int         i;
   1224    UWord       u;
   1225    StackFrame* frame;
   1226    tl_assert(len > 0);
   1227    for (i = 0; i < VG_N_THREADS; i++) {
   1228       frame = shadowStacks[i];
   1229       if (!frame)
   1230          continue; /* no frames for this thread */
   1231       /* start from the innermost frame */
   1232       while (frame->inner)
   1233          frame = frame->inner;
   1234       tl_assert(frame->outer);
   1235       /* work through the frames from innermost to outermost.  The
   1236          order isn't important; we just need to ensure we visit each
   1237          frame once (including those which are not actually active,
   1238          more 'inner' than the 'innermost active frame', viz, just
   1239          hanging around waiting to be used, when the current innermost
   1240          active frame makes more calls.  See comments on definition of
   1241          struct _StackFrame. */
   1242       for (; frame; frame = frame->outer) {
   1243          UWord xx = 0; /* sanity check only; count of used htab entries */
   1244          if (!frame->htab)
   1245             continue; /* frame not in use.  See shadowStack_unwind(). */
   1246          for (u = 0; u < frame->htab_size; u++) {
   1247             IInstance* ii = &frame->htab[u];
   1248             if (ii->insn_addr == 0)
   1249                continue; /* not in use */
   1250             if (0) { pp_Invar(&ii->invar); VG_(printf)(" x\n"); }
   1251             preen_global_Invar( &ii->invar, a, len );
   1252             xx++;
   1253          }
   1254          tl_assert(xx == frame->htab_used);
   1255       }
   1256    }
   1257 }
   1258 
   1259 
   1260 /* XXX this should be >> 2 on ppc32/64 since the bottom two bits
   1261    of the ip are guaranteed to be zero */
   1262 inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) {
   1263    return (ip >> 0) & (htab_size - 1);
   1264 }
   1265 
   1266 __attribute__((noinline))
   1267 static void initialise_II_hash_table ( StackFrame* sf )
   1268 {
   1269    UWord i;
   1270    sf->htab_size = N_HTAB_FIXED; /* initial hash table size */
   1271    sf->htab = &sf->htab_fixed[0];
   1272    tl_assert(sf->htab);
   1273    sf->htab_used = 0;
   1274    for (i = 0; i < sf->htab_size; i++)
   1275       sf->htab[i].insn_addr = 0; /* NOT IN USE */
   1276 }
   1277 
   1278 
   1279 __attribute__((noinline))
   1280 static void resize_II_hash_table ( StackFrame* sf )
   1281 {
   1282    UWord     i, j, ix, old_size, new_size;
   1283    IInstance *old_htab, *new_htab, *old;
   1284 
   1285    tl_assert(sf && sf->htab);
   1286    old_size = sf->htab_size;
   1287    new_size = 2 * old_size;
   1288    old_htab = sf->htab;
   1289    new_htab = sg_malloc( "di.sg_main.rIht.1",
   1290                          new_size * sizeof(IInstance) );
   1291    for (i = 0; i < new_size; i++) {
   1292       new_htab[i].insn_addr = 0; /* NOT IN USE */
   1293    }
   1294    for (i = 0; i < old_size; i++) {
   1295       old = &old_htab[i];
   1296       if (old->insn_addr == 0 /* NOT IN USE */)
   1297          continue;
   1298       ix = compute_II_hash(old->insn_addr, new_size);
   1299       /* find out where to put this, in the new table */
   1300       j = new_size;
   1301       while (1) {
   1302          if (new_htab[ix].insn_addr == 0)
   1303             break;
   1304          /* This can't ever happen, because it would mean the new
   1305             table is full; that isn't allowed -- even the old table is
   1306             only allowed to become half full. */
   1307          tl_assert(j > 0);
   1308          j--;
   1309          ix++; if (ix == new_size) ix = 0;
   1310       }
   1311       /* copy the old entry to this location */
   1312       tl_assert(ix < new_size);
   1313       tl_assert(new_htab[ix].insn_addr == 0);
   1314       new_htab[ix] = *old;
   1315       tl_assert(new_htab[ix].insn_addr != 0);
   1316    }
   1317    /* all entries copied; free old table. */
   1318    if (old_htab != &sf->htab_fixed[0])
   1319       sg_free(old_htab);
   1320    sf->htab = new_htab;
   1321    sf->htab_size = new_size;
   1322    /* check sf->htab_used is correct.  Optional and a bit expensive
   1323       but anyway: */
   1324    j = 0;
   1325    for (i = 0; i < new_size; i++) {
   1326       if (new_htab[i].insn_addr != 0) {
   1327          j++;
   1328       }
   1329    }
   1330    tl_assert(j == sf->htab_used);
   1331    if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size);
   1332 }
   1333 
   1334 
   1335 __attribute__((noinline))
   1336 static IInstance* find_or_create_IInstance_SLOW (
   1337                      StackFrame* sf,
   1338                      Addr ip,
   1339                      XArray* /* StackBlock */ ip_frameblocks
   1340                   )
   1341 {
   1342    UWord i, ix;
   1343 
   1344    stats__htab_searches++;
   1345 
   1346    tl_assert(sf);
   1347    tl_assert(sf->htab);
   1348 
   1349    /* Make sure the table loading doesn't get too high. */
   1350    if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) {
   1351       stats__htab_resizes++;
   1352       resize_II_hash_table(sf);
   1353    }
   1354    tl_assert(2 * sf->htab_used <= sf->htab_size);
   1355 
   1356    ix = compute_II_hash(ip, sf->htab_size);
   1357    i = sf->htab_size;
   1358    while (1) {
   1359       stats__htab_probes++;
   1360       /* Note that because of the way the fast-case handler works,
   1361          these two tests are actually redundant in the first iteration
   1362          of this loop.  (Except they aren't redundant if the code just
   1363          above resized the table first. :-) */
   1364       if (sf->htab[ix].insn_addr == ip)
   1365          return &sf->htab[ix];
   1366       if (sf->htab[ix].insn_addr == 0)
   1367          break;
   1368       /* If i ever gets to zero and we have found neither what we're
   1369          looking for nor an empty slot, the table must be full.  Which
   1370          isn't possible -- we monitor the load factor to ensure it
   1371          doesn't get above say 50%; if that ever does happen the table
   1372          is resized. */
   1373       tl_assert(i > 0);
   1374       i--;
   1375       ix++;
   1376       if (ix == sf->htab_size) ix = 0;
   1377    }
   1378 
   1379    /* So now we've found a free slot at ix, and we can use that. */
   1380    tl_assert(sf->htab[ix].insn_addr == 0);
   1381 
   1382    /* Add a new record in this slot. */
   1383    tl_assert(ip != 0); /* CAN'T REPRESENT THIS */
   1384    sf->htab[ix].insn_addr = ip;
   1385    sf->htab[ix].blocks    = ip_frameblocks;
   1386    sf->htab[ix].invar.tag = Inv_Unset;
   1387    sf->htab_used++;
   1388    return &sf->htab[ix];
   1389 }
   1390 
   1391 
   1392 inline
   1393 static IInstance* find_or_create_IInstance (
   1394                      StackFrame* sf,
   1395                      Addr ip,
   1396                      XArray* /* StackBlock */ ip_frameblocks
   1397                   )
   1398 {
   1399    UWord ix = compute_II_hash(ip, sf->htab_size);
   1400    /* Is it in the first slot we come to? */
   1401    if (LIKELY(sf->htab[ix].insn_addr == ip)) {
   1402       stats__htab_fast++;
   1403       return &sf->htab[ix];
   1404    }
   1405    /* If the first slot we come to is empty, bag it. */
   1406    if (LIKELY(sf->htab[ix].insn_addr == 0)) {
   1407       stats__htab_fast++;
   1408       tl_assert(ip != 0);
   1409       sf->htab[ix].insn_addr = ip;
   1410       sf->htab[ix].blocks    = ip_frameblocks;
   1411       sf->htab[ix].invar.tag = Inv_Unset;
   1412       sf->htab_used++;
   1413       return &sf->htab[ix];
   1414    }
   1415    /* Otherwise we hand off to the slow case, which searches other
   1416       slots, and optionally resizes the table if necessary. */
   1417    return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks );
   1418 }
   1419 
   1420 
   1421 __attribute__((noinline))
   1422 static Addr calculate_StackBlock_EA ( StackBlock* descr,
   1423                                       Addr sp, Addr fp ) {
   1424    UWord w1 = (UWord)descr->base;
   1425    UWord w2 = (UWord)(descr->spRel ? sp : fp);
   1426    UWord ea = w1 + w2;
   1427    return ea;
   1428 }
   1429 
   1430 /* Given an array of StackBlocks, return an array of Addrs, holding
   1431    their effective addresses.  Caller deallocates result array. */
   1432 __attribute__((noinline))
   1433 static XArray* /* Addr */ calculate_StackBlock_EAs (
   1434                              XArray* /* StackBlock */ blocks,
   1435                              Addr sp, Addr fp
   1436                           )
   1437 {
   1438    XArray* res;
   1439    Word i, n = VG_(sizeXA)( blocks );
   1440    tl_assert(n > 0);
   1441    res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) );
   1442    for (i = 0; i < n; i++) {
   1443       StackBlock* blk = VG_(indexXA)( blocks, i );
   1444       Addr ea = calculate_StackBlock_EA( blk, sp, fp );
   1445       VG_(addToXA)( res, &ea );
   1446    }
   1447    return res;
   1448 }
   1449 
   1450 
   1451 /* Try to classify the block into which a memory access falls, and
   1452    write the result in 'inv'.  This writes all relevant fields of
   1453    'inv'. */
   1454 __attribute__((noinline))
   1455 static void classify_address ( /*OUT*/Invar* inv,
   1456                                ThreadId tid,
   1457                                Addr ea, Addr sp, Addr fp,
   1458                                UWord szB,
   1459                                XArray* /* of StackBlock */ thisInstrBlocks )
   1460 {
   1461    tl_assert(szB > 0);
   1462    /* First, look in the stack blocks accessible in this instruction's
   1463       frame. */
   1464    {
   1465      Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks );
   1466      if (nBlocks == 0) stats__t_i_b_empty++;
   1467      for (i = 0; i < nBlocks; i++) {
   1468         StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i );
   1469         Addr bea = calculate_StackBlock_EA( descr, sp, fp );
   1470         if (bea <= ea && ea + szB <= bea + descr->szB) {
   1471            /* found it */
   1472            inv->tag = Inv_Stack0;
   1473            inv->Inv.Stack0.addr  = bea;
   1474            inv->Inv.Stack0.szB   = descr->szB;
   1475            inv->Inv.Stack0.descr = descr;
   1476            stats__classify_Stack0++;
   1477            return;
   1478         }
   1479      }
   1480    }
   1481    /* Look in this thread's query cache */
   1482    { Word i;
   1483      QCache* cache = &qcaches[tid];
   1484      static UWord ctr = 0;
   1485      stats__qcache_queries++;
   1486      for (i = 0; i < cache->nInUse; i++) {
   1487         if (0) /* expensive in a loop like this */
   1488                tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0);
   1489         stats__qcache_probes++;
   1490         if (is_subinterval_of(cache->elems[i].addr,
   1491                               cache->elems[i].szB, ea, szB)) {
   1492            if (i > 0
   1493                && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) {
   1494               QCElem tmp;
   1495               tmp = cache->elems[i-1];
   1496               cache->elems[i-1] = cache->elems[i];
   1497               cache->elems[i] = tmp;
   1498               i--;
   1499            }
   1500            *inv = cache->elems[i].inv;
   1501            return;
   1502         }
   1503      }
   1504      stats__qcache_misses++;
   1505    }
   1506    /* Ok, so it's not a block in the top frame.  Perhaps it's a block
   1507       in some calling frame?  Consult this thread's stack-block
   1508       interval tree to find out. */
   1509    { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea );
   1510      /* We know that [ea,ea+1) is in the block, but we need to
   1511         restrict to the case where the whole access falls within
   1512         it. */
   1513      if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
   1514         nd = NULL;
   1515      }
   1516      if (nd) {
   1517         /* found it */
   1518         inv->tag = Inv_StackN;
   1519         inv->Inv.StackN.nd = nd;
   1520         stats__classify_StackN++;
   1521         goto out;
   1522      }
   1523    }
   1524    /* Not in a stack block.  Try the global pool. */
   1525    { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea);
   1526      /* We know that [ea,ea+1) is in the block, but we need to
   1527         restrict to the case where the whole access falls within
   1528         it. */
   1529      if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
   1530         nd = NULL;
   1531      }
   1532      if (nd) {
   1533         /* found it */
   1534         inv->tag = Inv_Global;
   1535         inv->Inv.Global.nd = nd;
   1536         stats__classify_Global++;
   1537         goto out;
   1538      }
   1539    }
   1540    /* No idea - give up. */
   1541    inv->tag = Inv_Unknown;
   1542    stats__classify_Unknown++;
   1543 
   1544    /* Update the cache */
   1545   out:
   1546    { Addr    toadd_addr = 0;
   1547      SizeT   toadd_szB  = 0;
   1548      QCache* cache      = &qcaches[tid];
   1549 
   1550      static UWord ctr = 0;
   1551      Bool show = False;
   1552      if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True;
   1553 
   1554      if (show) QCache__pp(cache, "before upd");
   1555 
   1556      switch (inv->tag) {
   1557         case Inv_Global:
   1558            toadd_addr = inv->Inv.Global.nd->addr;
   1559            toadd_szB  = inv->Inv.Global.nd->szB;
   1560            break;
   1561         case Inv_StackN:
   1562            toadd_addr = inv->Inv.StackN.nd->addr;
   1563            toadd_szB  = inv->Inv.StackN.nd->szB;
   1564            break;
   1565         case Inv_Unknown: {
   1566            /* This is more complex.  We need to figure out the
   1567               intersection of the "holes" in the global and stack
   1568               interval trees into which [ea,ea+szB) falls.  This is
   1569               further complicated by the fact that [ea,ea+szB) might
   1570               not fall cleanly into a hole; it may instead fall across
   1571               the boundary of a stack or global block.  In that case
   1572               we just ignore it and don't update the cache, since we
   1573               have no way to represent this situation precisely. */
   1574            StackTreeNode  sNegInf, sPosInf, sKey, *sLB, *sUB;
   1575            GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB;
   1576            Addr gMin, gMax, sMin, sMax, uMin, uMax;
   1577            Bool sOK, gOK;
   1578            sNegInf.addr = 0;
   1579            sNegInf.szB  = 1;
   1580            sPosInf.addr = ~(UWord)0;
   1581            sPosInf.szB  = 1;
   1582            gNegInf.addr = 0;
   1583            gNegInf.szB  = 1;
   1584            gPosInf.addr = ~(UWord)0;
   1585            gPosInf.szB  = 1;
   1586            sKey.addr = ea;
   1587            sKey.szB  = szB;
   1588            gKey.addr = ea;
   1589            gKey.szB  = szB;
   1590            if (0) VG_(printf)("Tree sizes %ld %ld\n",
   1591                               VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree));
   1592            sOK = VG_(findBoundsFM)( siTrees[tid],
   1593                                     (UWord*)&sLB,    NULL/*unused*/,
   1594                                     (UWord*)&sUB,    NULL/*unused*/,
   1595                                     (UWord)&sNegInf, 0/*unused*/,
   1596                                     (UWord)&sPosInf, 0/*unused*/,
   1597                                     (UWord)&sKey );
   1598            gOK = VG_(findBoundsFM)( giTree,
   1599                                     (UWord*)&gLB,    NULL/*unused*/,
   1600                                     (UWord*)&gUB,    NULL/*unused*/,
   1601                                     (UWord)&gNegInf, 0/*unused*/,
   1602                                     (UWord)&gPosInf, 0/*unused*/,
   1603                                     (UWord)&gKey );
   1604            if (!(sOK && gOK)) {
   1605               /* If this happens, then [ea,ea+szB) partially overlaps
   1606                  a heap or stack block.  We can't represent that, so
   1607                  just forget it (should be very rare).  However, do
   1608                  maximum sanity checks first.  In such a
   1609                  partial overlap case, it can't be the case that both
   1610                  [ea] and [ea+szB-1] overlap the same block, since if
   1611                  that were indeed the case then it wouldn't be a
   1612                  partial overlap; rather it would simply fall inside
   1613                  that block entirely and we shouldn't be inside this
   1614                  conditional at all. */
   1615               if (!sOK) {
   1616                  StackTreeNode *ndFirst, *ndLast;
   1617                  ndFirst = find_StackTreeNode( siTrees[tid], ea );
   1618                  ndLast  = find_StackTreeNode( siTrees[tid], ea+szB-1 );
   1619                  /* if both ends of the range fall inside a block,
   1620                     they can't be in the same block. */
   1621                  if (ndFirst && ndLast)
   1622                     tl_assert(ndFirst != ndLast);
   1623                  /* for each end of the range, if it is in a block,
   1624                     the range as a whole can't be entirely within the
   1625                     block. */
   1626                  if (ndFirst)
   1627                     tl_assert(!is_subinterval_of(ndFirst->addr,
   1628                                                  ndFirst->szB, ea, szB));
   1629                  if (ndLast)
   1630                     tl_assert(!is_subinterval_of(ndLast->addr,
   1631                                                  ndLast->szB, ea, szB));
   1632               }
   1633               if (!gOK) {
   1634                  GlobalTreeNode *ndFirst, *ndLast;
   1635                  ndFirst = find_GlobalTreeNode( giTree, ea );
   1636                  ndLast  = find_GlobalTreeNode( giTree, ea+szB-1 );
   1637                  /* if both ends of the range fall inside a block,
   1638                     they can't be in the same block. */
   1639                  if (ndFirst && ndLast)
   1640                     tl_assert(ndFirst != ndLast);
   1641                  /* for each end of the range, if it is in a block,
   1642                     the range as a whole can't be entirely within the
   1643                     block. */
   1644                  if (ndFirst)
   1645                     tl_assert(!is_subinterval_of(ndFirst->addr,
   1646                                                  ndFirst->szB, ea, szB));
   1647                  if (ndLast)
   1648                     tl_assert(!is_subinterval_of(ndLast->addr,
   1649                                                  ndLast->szB, ea, szB));
   1650               }
   1651               if (0) VG_(printf)("overlapping blocks in cache\n");
   1652               return;
   1653            }
   1654            sMin = sLB == &sNegInf  ? 0         : (sLB->addr + sLB->szB);
   1655            sMax = sUB == &sPosInf  ? ~(UWord)0 : (sUB->addr - 1);
   1656            gMin = gLB == &gNegInf  ? 0         : (gLB->addr + gLB->szB);
   1657            gMax = gUB == &gPosInf  ? ~(UWord)0 : (gUB->addr - 1);
   1658            if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
   1659                               sMin, sMax, gMin, gMax);
   1660            /* [sMin,sMax] and [gMin,gMax] must both contain
   1661               [ea,ea+szB) (right?)  That implies they must overlap at
   1662               at least over [ea,ea+szB). */
   1663            tl_assert(sMin <= ea && ea+szB-1 <= sMax);
   1664            tl_assert(gMin <= ea && ea+szB-1 <= gMax);
   1665            /* So now compute their intersection. */
   1666            uMin = Addr__max( sMin, gMin );
   1667            uMax = Addr__min( sMax, gMax );
   1668            if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax);
   1669            tl_assert(uMin <= uMax);
   1670            tl_assert(uMin <= ea && ea+szB-1 <= uMax);
   1671            /* Finally, we can park [uMin,uMax] in the cache.  However,
   1672               if uMax is ~0, we can't represent the difference; hence
   1673               fudge uMax. */
   1674            if (uMin < uMax && uMax == ~(UWord)0)
   1675               uMax--;
   1676            toadd_addr = uMin;
   1677            toadd_szB  = uMax - uMin + 1;
   1678            break;
   1679         }
   1680         default:
   1681            /* We should only be caching info for the above 3 cases */
   1682           tl_assert(0);
   1683      } /* switch (inv->tag) */
   1684 
   1685      { /* and actually add this to the cache, finally */
   1686        Word i;
   1687        Word ip = cache->nInUse / 2; /* doesn't seem critical */
   1688 
   1689        if (cache->nInUse < N_QCACHE)
   1690           cache->nInUse++;
   1691        for (i = cache->nInUse-1; i > ip; i--) {
   1692           cache->elems[i] = cache->elems[i-1];
   1693        }
   1694 
   1695        tl_assert(toadd_szB > 0);
   1696        cache->elems[ip].addr = toadd_addr;
   1697        cache->elems[ip].szB  = toadd_szB;
   1698        cache->elems[ip].inv  = *inv;
   1699      }
   1700 
   1701      if (show) QCache__pp(cache, "after upd");
   1702 
   1703    }
   1704 }
   1705 
   1706 
   1707 /* CALLED FROM GENERATED CODE */
   1708 static
   1709 VG_REGPARM(3)
   1710 void helperc__mem_access ( /* Known only at run time: */
   1711                            Addr ea, Addr sp, Addr fp,
   1712                            /* Known at translation time: */
   1713                            Word sszB, Addr ip, XArray* ip_frameBlocks )
   1714 {
   1715    UWord szB;
   1716    IInstance* iinstance;
   1717    Invar* inv;
   1718    Invar new_inv;
   1719    ThreadId tid = VG_(get_running_tid)();
   1720    StackFrame* frame;
   1721    HChar bufE[128], bufA[128];
   1722 
   1723    stats__total_accesses++;
   1724 
   1725    tl_assert(is_sane_TId(tid));
   1726    frame = shadowStacks[tid];
   1727    tl_assert(frame);
   1728 
   1729    /* Find the instance info for this instruction. */
   1730    tl_assert(ip_frameBlocks);
   1731    iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks );
   1732    tl_assert(iinstance);
   1733    tl_assert(iinstance->blocks == ip_frameBlocks);
   1734 
   1735    szB = (sszB < 0) ? (-sszB) : sszB;
   1736    tl_assert(szB > 0);
   1737 
   1738    inv = &iinstance->invar;
   1739 
   1740    /* Deal with first uses of instruction instances. */
   1741    if (inv->tag == Inv_Unset) {
   1742       /* This is the first use of this instance of the instruction, so
   1743          we can't make any check; we merely record what we saw, so we
   1744          can compare it against what happens for 2nd and subsequent
   1745          accesses. */
   1746       classify_address( inv,
   1747                         tid, ea, sp, fp, szB, iinstance->blocks );
   1748       tl_assert(inv->tag != Inv_Unset);
   1749       return;
   1750    }
   1751 
   1752    /* So generate an Invar and see if it's different from what
   1753       we had before. */
   1754    classify_address( &new_inv,
   1755                      tid, ea, sp, fp, szB, iinstance->blocks );
   1756    tl_assert(new_inv.tag != Inv_Unset);
   1757 
   1758    /* Did we see something different from before?  If no, then there's
   1759       no error. */
   1760    if (eq_Invar(&new_inv, inv))
   1761       return;
   1762 
   1763    tl_assert(inv->tag != Inv_Unset);
   1764 
   1765    VG_(memset)(bufE, 0, sizeof(bufE));
   1766    show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth );
   1767 
   1768    VG_(memset)(bufA, 0, sizeof(bufA));
   1769    show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth );
   1770 
   1771    sg_record_error_SorG( tid, ea, sszB, bufE, bufA );
   1772 
   1773    /* And now install the new observation as "standard", so as to
   1774       make future error messages make more sense. */
   1775    *inv = new_inv;
   1776 }
   1777 
   1778 
   1779 ////////////////////////////////////////
   1780 /* Primary push-a-new-frame routine.  Called indirectly from
   1781    generated code. */
   1782 
   1783 static UWord stats__max_sitree_size = 0;
   1784 static UWord stats__max_gitree_size = 0;
   1785 
   1786 static
   1787 void shadowStack_new_frame ( ThreadId tid,
   1788                              Addr     sp_at_call_insn,
   1789                              Addr     sp_post_call_insn,
   1790                              Addr     fp_at_call_insn,
   1791                              Addr     ip_post_call_insn,
   1792                              XArray*  descrs_at_call_insn )
   1793 {
   1794    StackFrame *callee, *caller;
   1795    tl_assert(is_sane_TId(tid));
   1796 
   1797    caller = shadowStacks[tid];
   1798    tl_assert(caller);
   1799 
   1800    if (caller->outer) { /* "this is not the outermost frame" */
   1801       tl_assert(caller->outer->inner == caller);
   1802       tl_assert(caller->outer->depth >= 0);
   1803       tl_assert(1 + caller->outer->depth == caller->depth);
   1804    } else {
   1805       tl_assert(caller->depth == 0);
   1806    }
   1807 
   1808    caller->sp_at_call = sp_at_call_insn;
   1809    caller->fp_at_call = fp_at_call_insn;
   1810 
   1811    if (descrs_at_call_insn) {
   1812       tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 );
   1813       caller->blocks_added_by_call
   1814          = calculate_StackBlock_EAs( descrs_at_call_insn,
   1815                                      sp_at_call_insn, fp_at_call_insn );
   1816       if (caller->blocks_added_by_call)
   1817          add_blocks_to_StackTree( siTrees[tid],
   1818                                   descrs_at_call_insn,
   1819                                   caller->blocks_added_by_call,
   1820                                   caller->depth /* stack depth at which
   1821                                                    these blocks are
   1822                                                    considered to exist*/ );
   1823       if (1) {
   1824          UWord s  = VG_(sizeFM)( siTrees[tid] );
   1825          UWord g  = VG_(sizeFM)( giTree );
   1826          Bool  sb = s > stats__max_sitree_size;
   1827          Bool  gb = g > stats__max_gitree_size;
   1828          if (sb) stats__max_sitree_size = s;
   1829          if (gb) stats__max_gitree_size = g;
   1830          if (0 && (sb || gb))
   1831             VG_(message)(Vg_DebugMsg,
   1832                          "exp-sgcheck: new max tree sizes: "
   1833                          "StackTree %ld, GlobalTree %ld\n",
   1834                          stats__max_sitree_size, stats__max_gitree_size );
   1835       }
   1836    } else {
   1837       caller->blocks_added_by_call = NULL;
   1838    }
   1839 
   1840    /* caller->blocks_added_by_call is used again (and then freed) when
   1841       this frame is removed from the stack. */
   1842 
   1843    if (caller->inner) {
   1844       callee = caller->inner;
   1845    } else {
   1846       callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame));
   1847       VG_(memset)(callee, 0, sizeof(StackFrame));
   1848       callee->outer = caller;
   1849       caller->inner = callee;
   1850       callee->depth = 1 + caller->depth;
   1851       tl_assert(callee->inner == NULL);
   1852    }
   1853 
   1854    /* This sets up .htab, .htab_size and .htab_used */
   1855    initialise_II_hash_table( callee );
   1856 
   1857    callee->creation_sp    = sp_post_call_insn;
   1858    callee->sp_at_call     = 0; // not actually required ..
   1859    callee->fp_at_call     = 0; // .. these 3 initialisations are ..
   1860    callee->blocks_added_by_call = NULL; // .. just for cleanness
   1861 
   1862    /* record the new running stack frame */
   1863    shadowStacks[tid] = callee;
   1864 
   1865    /* and this thread's query cache is now invalid */
   1866    QCache__invalidate( &qcaches[tid] );
   1867 
   1868    if (0)
   1869    { Word d = callee->depth;
   1870      HChar fnname[80];
   1871      Bool ok;
   1872      Addr ip = ip_post_call_insn;
   1873      ok = VG_(get_fnname_w_offset)( ip, fnname, sizeof(fnname) );
   1874      while (d > 0) {
   1875         VG_(printf)(" ");
   1876         d--;
   1877      }
   1878      VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip);
   1879    }
   1880 }
   1881 
   1882 /* CALLED FROM GENERATED CODE */
   1883 static
   1884 VG_REGPARM(3)
   1885 void helperc__new_frame ( Addr sp_post_call_insn,
   1886                           Addr fp_at_call_insn,
   1887                           Addr ip_post_call_insn,
   1888                           XArray* blocks_at_call_insn,
   1889                           Word sp_adjust )
   1890 {
   1891    ThreadId tid = VG_(get_running_tid)();
   1892    Addr     sp_at_call_insn = sp_post_call_insn + sp_adjust;
   1893    shadowStack_new_frame( tid,
   1894                           sp_at_call_insn,
   1895                           sp_post_call_insn,
   1896                           fp_at_call_insn,
   1897                           ip_post_call_insn,
   1898                           blocks_at_call_insn );
   1899 }
   1900 
   1901 
   1902 ////////////////////////////////////////
   1903 /* Primary remove-frame(s) routine.  Called indirectly from
   1904    generated code. */
   1905 
   1906 __attribute__((noinline))
   1907 static void shadowStack_unwind ( ThreadId tid, Addr sp_now )
   1908 {
   1909    StackFrame *innermost, *innermostOrig;
   1910    tl_assert(is_sane_TId(tid));
   1911    innermost = shadowStacks[tid];
   1912    tl_assert(innermost);
   1913    innermostOrig = innermost;
   1914    //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
   1915    while (1) {
   1916       if (!innermost->outer)
   1917          break;
   1918       if (innermost->inner)
   1919          tl_assert(innermost->inner->outer == innermost);
   1920       tl_assert(innermost->outer->inner == innermost);
   1921       tl_assert(innermost->blocks_added_by_call == NULL);
   1922       if (sp_now <= innermost->creation_sp) break;
   1923       //VG_(printf)("UNWIND     dump %p\n", innermost->creation_sp);
   1924       tl_assert(innermost->htab);
   1925       if (innermost->htab != &innermost->htab_fixed[0])
   1926          sg_free(innermost->htab);
   1927       /* be on the safe side */
   1928       innermost->creation_sp = 0;
   1929       innermost->htab = NULL;
   1930       innermost->htab_size = 0;
   1931       innermost->htab_used = 0;
   1932       innermost->sp_at_call = 0;
   1933       innermost->fp_at_call = 0;
   1934       innermost->blocks_added_by_call = NULL;
   1935       innermost = innermost->outer;
   1936 
   1937       /* So now we're "back" in the calling frame.  Remove from this
   1938          thread's stack-interval-tree, the blocks added at the time of
   1939          the call. */
   1940 
   1941       if (innermost->outer) { /* not at the outermost frame */
   1942          if (innermost->blocks_added_by_call == NULL) {
   1943          } else {
   1944             del_blocks_from_StackTree( siTrees[tid],
   1945                                        innermost->blocks_added_by_call );
   1946             VG_(deleteXA)( innermost->blocks_added_by_call );
   1947             innermost->blocks_added_by_call = NULL;
   1948          }
   1949       }
   1950       /* That completes the required tidying of the interval tree
   1951          associated with the frame we just removed. */
   1952 
   1953       if (0) {
   1954          Word d = innermost->depth;
   1955          while (d > 0) {
   1956             VG_(printf)(" ");
   1957             d--;
   1958          }
   1959          VG_(printf)("X\n");
   1960       }
   1961 
   1962    }
   1963 
   1964    tl_assert(innermost);
   1965 
   1966    if (innermost != innermostOrig) {
   1967       shadowStacks[tid] = innermost;
   1968       /* this thread's query cache is now invalid */
   1969       QCache__invalidate( &qcaches[tid] );
   1970    }
   1971 }
   1972 
   1973 
   1974 
   1975 //////////////////////////////////////////////////////////////
   1976 //                                                          //
   1977 // Instrumentation                                          //
   1978 //                                                          //
   1979 //////////////////////////////////////////////////////////////
   1980 
   1981 /* What does instrumentation need to do?
   1982 
   1983    - at each Call transfer, generate a call to shadowStack_new_frame
   1984      do this by manually inspecting the IR
   1985 
   1986    - at each sp change, if the sp change is negative,
   1987      call shadowStack_unwind
   1988      do this by asking for SP-change analysis
   1989 
   1990    - for each memory referencing instruction,
   1991      call helperc__mem_access
   1992 */
   1993 
   1994 /* A complication: sg_ instrumentation and h_ instrumentation need to
   1995    be interleaved.  Since the latter is a lot more complex than the
   1996    former, we split the sg_ instrumentation here into four functions
   1997    and let the h_ instrumenter call the four functions as it goes.
   1998    Hence the h_ instrumenter drives the sg_ instrumenter.
   1999 
   2000    To make this viable, the sg_ instrumenter carries what running
   2001    state it needs in 'struct _SGEnv'.  This is exported only
   2002    abstractly from this file.
   2003 */
   2004 
   2005 struct _SGEnv {
   2006    /* the current insn's IP */
   2007    Addr64 curr_IP;
   2008    /* whether the above is actually known */
   2009    Bool curr_IP_known;
   2010    /* if we find a mem ref, is it the first for this insn?  Used for
   2011       detecting insns which make more than one memory ref, a situation
   2012       we basically can't really handle properly; and so we ignore all
   2013       but the first ref. */
   2014    Bool firstRef;
   2015    /* READONLY */
   2016    IRTemp (*newIRTemp_cb)(IRType,void*);
   2017    void* newIRTemp_opaque;
   2018 };
   2019 
   2020 
   2021 /* --- Helper fns for instrumentation --- */
   2022 
   2023 static IRTemp gen_Get_SP ( struct _SGEnv*  sge,
   2024                            IRSB*           bbOut,
   2025                            VexGuestLayout* layout,
   2026                            Int             hWordTy_szB )
   2027 {
   2028    IRExpr* sp_expr;
   2029    IRTemp  sp_temp;
   2030    IRType  sp_type;
   2031    /* This in effect forces the host and guest word sizes to be the
   2032       same. */
   2033    tl_assert(hWordTy_szB == layout->sizeof_SP);
   2034    sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
   2035    sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
   2036    sp_temp = sge->newIRTemp_cb( sp_type, sge->newIRTemp_opaque );
   2037    addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
   2038    return sp_temp;
   2039 }
   2040 
   2041 static IRTemp gen_Get_FP ( struct _SGEnv*  sge,
   2042                            IRSB*           bbOut,
   2043                            VexGuestLayout* layout,
   2044                            Int             hWordTy_szB )
   2045 {
   2046    IRExpr* fp_expr;
   2047    IRTemp  fp_temp;
   2048    IRType  fp_type;
   2049    /* This in effect forces the host and guest word sizes to be the
   2050       same. */
   2051    tl_assert(hWordTy_szB == layout->sizeof_SP);
   2052    fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32;
   2053    fp_expr = IRExpr_Get( layout->offset_FP, fp_type );
   2054    fp_temp = sge->newIRTemp_cb( fp_type, sge->newIRTemp_opaque );
   2055    addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) );
   2056    return fp_temp;
   2057 }
   2058 
   2059 static void instrument_mem_access ( struct _SGEnv* sge,
   2060                                     IRSB*   bbOut,
   2061                                     IRExpr* addr,
   2062                                     Int     szB,
   2063                                     Bool    isStore,
   2064                                     Int     hWordTy_szB,
   2065                                     Addr    curr_IP,
   2066                                     VexGuestLayout* layout )
   2067 {
   2068    IRType  tyAddr      = Ity_INVALID;
   2069    XArray* frameBlocks = NULL;
   2070 
   2071    tl_assert(isIRAtom(addr));
   2072    tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
   2073 
   2074    tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
   2075    tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
   2076 
   2077 #if defined(VGA_x86)
   2078    { UChar* p = (UChar*)curr_IP;
   2079      // pop %ebp; RET
   2080      if (p[-1] == 0x5d && p[0] == 0xc3) return;
   2081      // pop %ebp; RET $imm16
   2082      if (p[-1] == 0x5d && p[0] == 0xc2) return;
   2083      // PUSH %EBP; mov %esp,%ebp
   2084      if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return;
   2085    }
   2086 #endif
   2087 
   2088    /* First off, find or create the StackBlocks for this instruction. */
   2089    frameBlocks = get_StackBlocks_for_IP( curr_IP );
   2090    tl_assert(frameBlocks);
   2091    //if (VG_(sizeXA)( frameBlocks ) == 0)
   2092    //   frameBlocks = NULL;
   2093 
   2094    /* Generate a call to "helperc__mem_access", passing:
   2095          addr current_SP current_FP szB curr_IP frameBlocks
   2096    */
   2097    { IRTemp t_SP = gen_Get_SP( sge, bbOut, layout, hWordTy_szB );
   2098      IRTemp t_FP = gen_Get_FP( sge, bbOut, layout, hWordTy_szB );
   2099      IRExpr** args
   2100         = mkIRExprVec_6( addr,
   2101                          IRExpr_RdTmp(t_SP),
   2102                          IRExpr_RdTmp(t_FP),
   2103                          mkIRExpr_HWord( isStore ? (-szB) : szB ),
   2104                          mkIRExpr_HWord( curr_IP ),
   2105                          mkIRExpr_HWord( (HWord)frameBlocks ) );
   2106      IRDirty* di
   2107         = unsafeIRDirty_0_N( 3/*regparms*/,
   2108                              "helperc__mem_access",
   2109                             VG_(fnptr_to_fnentry)( &helperc__mem_access ),
   2110                              args );
   2111 
   2112      addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
   2113    }
   2114 }
   2115 
   2116 
   2117 /* --- Instrumentation main (4 fns) --- */
   2118 
   2119 struct _SGEnv * sg_instrument_init ( IRTemp (*newIRTemp_cb)(IRType,void*),
   2120                                      void* newIRTemp_opaque )
   2121 {
   2122    struct _SGEnv * env = sg_malloc("di.sg_main.sii.1",
   2123                                    sizeof(struct _SGEnv));
   2124    tl_assert(env);
   2125    env->curr_IP          = 0;
   2126    env->curr_IP_known    = False;
   2127    env->firstRef         = True;
   2128    env->newIRTemp_cb     = newIRTemp_cb;
   2129    env->newIRTemp_opaque = newIRTemp_opaque;
   2130    return env;
   2131 }
   2132 
   2133 void sg_instrument_fini ( struct _SGEnv * env )
   2134 {
   2135    sg_free(env);
   2136 }
   2137 
   2138 /* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
   2139    as required.  This must be called before 'st' itself is added to
   2140    'sbOut'. */
   2141 void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env,
   2142                             /*MOD*/IRSB* sbOut,
   2143                             IRStmt* st,
   2144                             VexGuestLayout* layout,
   2145                             IRType gWordTy, IRType hWordTy )
   2146 {
   2147    if (!sg_clo_enable_sg_checks)
   2148       return;
   2149 
   2150    tl_assert(st);
   2151    tl_assert(isFlatIRStmt(st));
   2152    switch (st->tag) {
   2153       case Ist_NoOp:
   2154       case Ist_AbiHint:
   2155       case Ist_Put:
   2156       case Ist_PutI:
   2157       case Ist_MBE:
   2158          /* None of these can contain any memory references. */
   2159          break;
   2160 
   2161       case Ist_Exit:
   2162          tl_assert(st->Ist.Exit.jk != Ijk_Call);
   2163          /* else we must deal with a conditional call */
   2164          break;
   2165 
   2166       case Ist_IMark:
   2167          env->curr_IP_known = True;
   2168          env->curr_IP       = (Addr)st->Ist.IMark.addr;
   2169          env->firstRef      = True;
   2170          break;
   2171 
   2172       case Ist_Store:
   2173          tl_assert(env->curr_IP_known);
   2174          if (env->firstRef) {
   2175             instrument_mem_access(
   2176                env, sbOut,
   2177                st->Ist.Store.addr,
   2178                sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)),
   2179                True/*isStore*/,
   2180                sizeofIRType(hWordTy),
   2181                env->curr_IP, layout
   2182             );
   2183             env->firstRef = False;
   2184          }
   2185          break;
   2186 
   2187       case Ist_WrTmp: {
   2188          IRExpr* data = st->Ist.WrTmp.data;
   2189          if (data->tag == Iex_Load) {
   2190             tl_assert(env->curr_IP_known);
   2191             if (env->firstRef) {
   2192                instrument_mem_access(
   2193                   env, sbOut,
   2194                   data->Iex.Load.addr,
   2195                   sizeofIRType(data->Iex.Load.ty),
   2196                   False/*!isStore*/,
   2197                   sizeofIRType(hWordTy),
   2198                   env->curr_IP, layout
   2199                );
   2200                env->firstRef = False;
   2201             }
   2202          }
   2203          break;
   2204       }
   2205 
   2206       case Ist_Dirty: {
   2207          Int      dataSize;
   2208          IRDirty* d = st->Ist.Dirty.details;
   2209          if (d->mFx != Ifx_None) {
   2210             /* This dirty helper accesses memory.  Collect the
   2211                details. */
   2212             tl_assert(env->curr_IP_known);
   2213             if (env->firstRef) {
   2214                tl_assert(d->mAddr != NULL);
   2215                tl_assert(d->mSize != 0);
   2216                dataSize = d->mSize;
   2217                if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
   2218                   instrument_mem_access(
   2219                      env, sbOut, d->mAddr, dataSize, False/*!isStore*/,
   2220                      sizeofIRType(hWordTy), env->curr_IP, layout
   2221                   );
   2222                }
   2223                if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
   2224                   instrument_mem_access(
   2225                      env, sbOut, d->mAddr, dataSize, True/*isStore*/,
   2226                      sizeofIRType(hWordTy), env->curr_IP, layout
   2227                   );
   2228                }
   2229                env->firstRef = False;
   2230             }
   2231          } else {
   2232             tl_assert(d->mAddr == NULL);
   2233             tl_assert(d->mSize == 0);
   2234          }
   2235          break;
   2236       }
   2237 
   2238       case Ist_CAS: {
   2239          /* We treat it as a read and a write of the location.  I
   2240             think that is the same behaviour as it was before IRCAS
   2241             was introduced, since prior to that point, the Vex front
   2242             ends would translate a lock-prefixed instruction into a
   2243             (normal) read followed by a (normal) write. */
   2244          if (env->firstRef) {
   2245             Int    dataSize;
   2246             IRCAS* cas = st->Ist.CAS.details;
   2247             tl_assert(cas->addr != NULL);
   2248             tl_assert(cas->dataLo != NULL);
   2249             dataSize = sizeofIRType(typeOfIRExpr(sbOut->tyenv, cas->dataLo));
   2250             if (cas->dataHi != NULL)
   2251                dataSize *= 2; /* since it's a doubleword-CAS */
   2252             instrument_mem_access(
   2253                env, sbOut, cas->addr, dataSize, False/*!isStore*/,
   2254                sizeofIRType(hWordTy), env->curr_IP, layout
   2255             );
   2256             instrument_mem_access(
   2257                env, sbOut, cas->addr, dataSize, True/*isStore*/,
   2258                sizeofIRType(hWordTy), env->curr_IP, layout
   2259             );
   2260             env->firstRef = False;
   2261          }
   2262          break;
   2263       }
   2264 
   2265       default:
   2266          tl_assert(0);
   2267 
   2268    } /* switch (st->tag) */
   2269 }
   2270 
   2271 
   2272 /* Add instrumentation for the final jump of an IRSB 'sbOut', and
   2273    possibly modify 'env' as required.  This must be the last
   2274    instrumentation statement in the block. */
   2275 void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env,
   2276                                 /*MOD*/IRSB* sbOut,
   2277                                 IRExpr* next,
   2278                                 IRJumpKind jumpkind,
   2279                                 VexGuestLayout* layout,
   2280                                 IRType gWordTy, IRType hWordTy )
   2281 {
   2282    if (!sg_clo_enable_sg_checks)
   2283       return;
   2284 
   2285    if (jumpkind == Ijk_Call) {
   2286       // Assumes x86 or amd64
   2287       IRTemp   sp_post_call_insn, fp_post_call_insn;
   2288       XArray*  frameBlocks;
   2289       IRExpr** args;
   2290       IRDirty* di;
   2291       sp_post_call_insn
   2292          = gen_Get_SP( env, sbOut, layout, sizeofIRType(hWordTy) );
   2293       fp_post_call_insn
   2294          = gen_Get_FP( env, sbOut, layout, sizeofIRType(hWordTy) );
   2295       tl_assert(env->curr_IP_known);
   2296       frameBlocks = get_StackBlocks_for_IP( env->curr_IP );
   2297       tl_assert(frameBlocks);
   2298       if (VG_(sizeXA)(frameBlocks) == 0)
   2299          frameBlocks = NULL;
   2300       args
   2301          = mkIRExprVec_5(
   2302               IRExpr_RdTmp(sp_post_call_insn),
   2303               IRExpr_RdTmp(fp_post_call_insn),
   2304                          /* assume the call doesn't change FP */
   2305               next,
   2306               mkIRExpr_HWord( (HWord)frameBlocks ),
   2307               mkIRExpr_HWord( sizeofIRType(gWordTy) )
   2308            );
   2309       di = unsafeIRDirty_0_N(
   2310               3/*regparms*/,
   2311               "helperc__new_frame",
   2312               VG_(fnptr_to_fnentry)( &helperc__new_frame ),
   2313               args );
   2314       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
   2315    }
   2316 }
   2317 
   2318 
   2319 //////////////////////////////////////////////////////////////
   2320 //                                                          //
   2321 // end Instrumentation                                      //
   2322 //                                                          //
   2323 //////////////////////////////////////////////////////////////
   2324 
   2325 
   2326 //////////////////////////////////////////////////////////////
   2327 //                                                          //
   2328 // misc                                                     //
   2329 //                                                          //
   2330 //////////////////////////////////////////////////////////////
   2331 
   2332 /* Make a new empty stack frame that is suitable for being the
   2333    outermost frame in a stack.  It has a creation_sp of effectively
   2334    infinity, so it can never be removed. */
   2335 static StackFrame* new_root_StackFrame ( void )
   2336 {
   2337    StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame));
   2338    VG_(memset)( sframe, 0, sizeof(*sframe) );
   2339    sframe->creation_sp = ~0UL;
   2340 
   2341    /* This sets up .htab, .htab_size and .htab_used */
   2342    initialise_II_hash_table( sframe );
   2343 
   2344    /* ->depth, ->outer, ->inner are 0, NULL, NULL */
   2345 
   2346    return sframe;
   2347 }
   2348 
   2349 /* Primary routine for setting up the shadow stack for a new thread.
   2350    Note that this is used to create not only child thread stacks, but
   2351    the root thread's stack too.  We create a new stack with
   2352    .creation_sp set to infinity, so that the outermost frame can never
   2353    be removed (by shadowStack_unwind).  The core calls this function
   2354    as soon as a thread is created.  We cannot yet get its SP value,
   2355    since that may not yet be set. */
   2356 static void shadowStack_thread_create ( ThreadId parent, ThreadId child )
   2357 {
   2358    tl_assert(is_sane_TId(child));
   2359    if (parent == VG_INVALID_THREADID) {
   2360       /* creating the main thread's stack */
   2361    } else {
   2362       tl_assert(is_sane_TId(parent));
   2363       tl_assert(parent != child);
   2364       tl_assert(shadowStacks[parent] != NULL);
   2365       tl_assert(siTrees[parent] != NULL);
   2366    }
   2367 
   2368    /* Create the child's stack.  Bear in mind we may be re-using
   2369       it. */
   2370    if (shadowStacks[child] == NULL) {
   2371       /* First use of this stack.  Just allocate an initial frame. */
   2372       tl_assert(siTrees[child] == NULL);
   2373    } else {
   2374       StackFrame *frame, *frame2;
   2375       /* re-using a stack. */
   2376       /* get rid of the interval tree */
   2377       tl_assert(siTrees[child] != NULL);
   2378       delete_StackTree( siTrees[child] );
   2379       siTrees[child] = NULL;
   2380       /* Throw away all existing frames. */
   2381       frame = shadowStacks[child];
   2382       while (frame->outer)
   2383          frame = frame->outer;
   2384       tl_assert(frame->depth == 0);
   2385       while (frame) {
   2386          frame2 = frame->inner;
   2387          if (frame2) tl_assert(1 + frame->depth == frame2->depth);
   2388          sg_free(frame);
   2389          frame = frame2;
   2390       }
   2391       shadowStacks[child] = NULL;
   2392    }
   2393 
   2394    tl_assert(shadowStacks[child] == NULL);
   2395    tl_assert(siTrees[child] == NULL);
   2396 
   2397    /* Set up the initial stack frame. */
   2398    shadowStacks[child] = new_root_StackFrame();
   2399 
   2400    /* and set up the child's stack block interval tree. */
   2401    siTrees[child] = new_StackTree();
   2402 }
   2403 
   2404 /* Once a thread is ready to go, the core calls here.  We take the
   2405    opportunity to push a second frame on its stack, with the
   2406    presumably valid SP value that is going to be used for the thread's
   2407    startup.  Hence we should always wind up with a valid outermost
   2408    frame for the thread. */
   2409 static void shadowStack_set_initial_SP ( ThreadId tid )
   2410 {
   2411    StackFrame* sf;
   2412    tl_assert(is_sane_TId(tid));
   2413    sf = shadowStacks[tid];
   2414    tl_assert(sf != NULL);
   2415    tl_assert(sf->outer == NULL);
   2416    tl_assert(sf->inner == NULL);
   2417    tl_assert(sf->creation_sp == ~0UL);
   2418    shadowStack_new_frame( tid, 0, VG_(get_SP)(tid),
   2419                                0, VG_(get_IP)(tid), NULL );
   2420 }
   2421 
   2422 
   2423 //////////////////////////////////////////////////////////////
   2424 //                                                          //
   2425 // main-ish                                                 //
   2426 //                                                          //
   2427 //////////////////////////////////////////////////////////////
   2428 
   2429 /* CALLED indirectly FROM GENERATED CODE.  Calls here are created by
   2430    sp-change analysis, as requested in pc_pre_clo_int(). */
   2431 void sg_die_mem_stack ( Addr old_SP, SizeT len ) {
   2432    ThreadId  tid = VG_(get_running_tid)();
   2433    shadowStack_unwind( tid, old_SP+len );
   2434 }
   2435 
   2436 void sg_pre_clo_init ( void ) {
   2437    ourGlobals_init();
   2438    init_StackBlocks_set();
   2439    init_GlobalBlock_set();
   2440 }
   2441 
   2442 void sg_post_clo_init ( void ) {
   2443 }
   2444 
   2445 void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
   2446    shadowStack_thread_create(parent, child);
   2447 }
   2448 
   2449 void sg_pre_thread_first_insn ( ThreadId tid ) {
   2450    shadowStack_set_initial_SP(tid);
   2451 }
   2452 
   2453 void sg_fini(Int exitcode)
   2454 {
   2455    if (VG_(clo_stats)) {
   2456       VG_(message)(Vg_DebugMsg,
   2457          " sg_:  %'llu total accesses, of which:\n", stats__total_accesses);
   2458       VG_(message)(Vg_DebugMsg,
   2459          " sg_:     stack0: %'12llu classify\n",
   2460          stats__classify_Stack0);
   2461       VG_(message)(Vg_DebugMsg,
   2462          " sg_:     stackN: %'12llu classify\n",
   2463          stats__classify_StackN);
   2464       VG_(message)(Vg_DebugMsg,
   2465          " sg_:     global: %'12llu classify\n",
   2466          stats__classify_Global);
   2467       VG_(message)(Vg_DebugMsg,
   2468          " sg_:    unknown: %'12llu classify\n",
   2469          stats__classify_Unknown);
   2470       VG_(message)(Vg_DebugMsg,
   2471          " sg_:  %'llu Invars preened, of which %'llu changed\n",
   2472          stats__Invars_preened, stats__Invars_changed);
   2473       VG_(message)(Vg_DebugMsg,
   2474          " sg_:   t_i_b_MT: %'12llu\n", stats__t_i_b_empty);
   2475       VG_(message)(Vg_DebugMsg,
   2476          " sg_:     qcache: %'llu searches, %'llu probes, %'llu misses\n",
   2477          stats__qcache_queries, stats__qcache_probes, stats__qcache_misses);
   2478       VG_(message)(Vg_DebugMsg,
   2479          " sg_:  htab-fast: %'llu hits\n",
   2480          stats__htab_fast);
   2481       VG_(message)(Vg_DebugMsg,
   2482          " sg_:  htab-slow: %'llu searches, %'llu probes, %'llu resizes\n",
   2483          stats__htab_searches, stats__htab_probes, stats__htab_resizes);
   2484    }
   2485 }
   2486 
   2487 
   2488 /*--------------------------------------------------------------------*/
   2489 /*--- end                                                sg_main.c ---*/
   2490 /*--------------------------------------------------------------------*/
   2491