Home | History | Annotate | Download | only in exp-sgcheck
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Ptrcheck: a pointer-use checker.                             ---*/
      4 /*--- This file checks stack and global array accesses.            ---*/
      5 /*---                                                    sg_main.c ---*/
      6 /*--------------------------------------------------------------------*/
      7 
      8 /*
      9    This file is part of Ptrcheck, a Valgrind tool for checking pointer
     10    use in programs.
     11 
     12    Copyright (C) 2008-2011 OpenWorks Ltd
     13       info (at) open-works.co.uk
     14 
     15    This program is free software; you can redistribute it and/or
     16    modify it under the terms of the GNU General Public License as
     17    published by the Free Software Foundation; either version 2 of the
     18    License, or (at your option) any later version.
     19 
     20    This program is distributed in the hope that it will be useful, but
     21    WITHOUT ANY WARRANTY; without even the implied warranty of
     22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23    General Public License for more details.
     24 
     25    You should have received a copy of the GNU General Public License
     26    along with this program; if not, write to the Free Software
     27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     28    02111-1307, USA.
     29 
     30    The GNU General Public License is contained in the file COPYING.
     31 
     32    Neither the names of the U.S. Department of Energy nor the
     33    University of California nor the names of its contributors may be
     34    used to endorse or promote products derived from this software
     35    without prior written permission.
     36 */
     37 
     38 #include "pub_tool_basics.h"
     39 #include "pub_tool_libcbase.h"
     40 #include "pub_tool_libcassert.h"
     41 #include "pub_tool_libcprint.h"
     42 #include "pub_tool_tooliface.h"
     43 #include "pub_tool_wordfm.h"
     44 #include "pub_tool_xarray.h"
     45 #include "pub_tool_threadstate.h"
     46 #include "pub_tool_mallocfree.h"
     47 #include "pub_tool_machine.h"
     48 #include "pub_tool_debuginfo.h"
     49 #include "pub_tool_options.h"
     50 
     51 #include "pc_common.h"
     52 
     53 #include "sg_main.h"      // self
     54 
     55 
     56 static
     57 void preen_global_Invars ( Addr a, SizeT len ); /*fwds*/
     58 
     59 
     60 //////////////////////////////////////////////////////////////
     61 //                                                          //
     62 // Basic Stuff                                              //
     63 //                                                          //
     64 //////////////////////////////////////////////////////////////
     65 
     66 static inline Bool is_sane_TId ( ThreadId tid )
     67 {
     68    return tid >= 0 && tid < VG_N_THREADS
     69           && tid != VG_INVALID_THREADID;
     70 }
     71 
     72 static void* sg_malloc ( HChar* cc, SizeT n ) {
     73    void* p;
     74    tl_assert(n > 0);
     75    p = VG_(malloc)( cc, n );
     76    tl_assert(p);
     77    return p;
     78 }
     79 
     80 static void sg_free ( void* p ) {
     81    tl_assert(p);
     82    VG_(free)(p);
     83 }
     84 
     85 
     86 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2).  Return -1 if the
     87    first interval is lower, 1 if the first interval is higher, and 0
     88    if there is any overlap.  Redundant paranoia with casting is there
     89    following what looked distinctly like a bug in gcc-4.1.2, in which
     90    some of the comparisons were done signedly instead of
     91    unsignedly. */
     92 inline
     93 static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
     94                                      Addr a2, SizeT n2 ) {
     95    UWord a1w = (UWord)a1;
     96    UWord n1w = (UWord)n1;
     97    UWord a2w = (UWord)a2;
     98    UWord n2w = (UWord)n2;
     99    tl_assert(n1w > 0 && n2w > 0);
    100    if (a1w + n1w <= a2w) return -1L;
    101    if (a2w + n2w <= a1w) return 1L;
    102    return 0;
    103 }
    104 
    105 /* Return true iff [aSmall,aSmall+nSmall) is entirely contained
    106    within [aBig,aBig+nBig). */
    107 inline
    108 static Bool is_subinterval_of ( Addr aBig, SizeT nBig,
    109                                 Addr aSmall, SizeT nSmall ) {
    110    tl_assert(nBig > 0 && nSmall > 0);
    111    return aBig <= aSmall && aSmall + nSmall <= aBig + nBig;
    112 }
    113 
    114 inline
    115 static Addr Addr__min ( Addr a1, Addr a2 ) {
    116    return a1 < a2 ? a1 : a2;
    117 }
    118 
    119 inline
    120 static Addr Addr__max ( Addr a1, Addr a2 ) {
    121    return a1 < a2 ? a2 : a1;
    122 }
    123 
    124 
    125 //////////////////////////////////////////////////////////////
    126 //                                                          //
    127 // StackBlocks Persistent Cache                             //
    128 //                                                          //
    129 //////////////////////////////////////////////////////////////
    130 
    131 /* We maintain a set of XArray* of StackBlocks.  These are never
    132    freed.  When a new StackBlock vector is acquired from
    133    VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
    134    If not present, it is added.  If present, the just-acquired one is
    135    freed and the copy used.
    136 
    137    This simplifies storage management elsewhere.  It allows us to
    138    assume that a pointer to an XArray* of StackBlock is valid forever.
    139    It also means there are no duplicates anywhere, which could be
    140    important from a space point of view for programs that generate a
    141    lot of translations, or where translations are frequently discarded
    142    and re-made.
    143 
    144    Note that we normalise the arrays by sorting the elements according
    145    to an arbitrary total order, so as to avoid the situation that two
    146    vectors describe the same set of variables but are not structurally
    147    identical. */
    148 
    149 static inline Bool StackBlock__sane ( StackBlock* fb )
    150 {
    151    if (fb->name[ sizeof(fb->name)-1 ] != 0)
    152       return False;
    153    if (fb->spRel != False && fb->spRel != True)
    154       return False;
    155    if (fb->isVec != False && fb->isVec != True)
    156       return False;
    157    return True;
    158 }
    159 
    160 /* Generate an arbitrary total ordering on StackBlocks. */
    161 static Word StackBlock__cmp ( StackBlock* fb1, StackBlock* fb2 )
    162 {
    163    Word r;
    164    tl_assert(StackBlock__sane(fb1));
    165    tl_assert(StackBlock__sane(fb2));
    166    /* Hopefully the .base test hits most of the time.  For the blocks
    167       associated with any particular instruction, if the .base values
    168       are the same then probably it doesn't make sense for the other
    169       fields to be different.  But this is supposed to be a completely
    170       general structural total order, so we have to compare everything
    171       anyway. */
    172    if (fb1->base < fb2->base) return -1;
    173    if (fb1->base > fb2->base) return 1;
    174    /* compare sizes */
    175    if (fb1->szB < fb2->szB) return -1;
    176    if (fb1->szB > fb2->szB) return 1;
    177    /* compare sp/fp flag */
    178    if (fb1->spRel < fb2->spRel) return -1;
    179    if (fb1->spRel > fb2->spRel) return 1;
    180    /* compare is/is-not array-typed flag */
    181    if (fb1->isVec < fb2->isVec) return -1;
    182    if (fb1->isVec > fb2->isVec) return 1;
    183    /* compare the name */
    184    r = (Word)VG_(strcmp)(fb1->name, fb2->name);
    185    return r;
    186 }
    187 
    188 /* Returns True if all fields except .szB are the same.  szBs may or
    189    may not be the same; they are simply not consulted. */
    190 static Bool StackBlock__all_fields_except_szB_are_equal (
    191                StackBlock* fb1,
    192                StackBlock* fb2
    193             )
    194 {
    195    tl_assert(StackBlock__sane(fb1));
    196    tl_assert(StackBlock__sane(fb2));
    197    return fb1->base == fb2->base
    198           && fb1->spRel == fb2->spRel
    199           && fb1->isVec == fb2->isVec
    200           && 0 == VG_(strcmp)(fb1->name, fb2->name);
    201 }
    202 
    203 
    204 /* Generate an arbitrary total ordering on vectors of StackBlocks. */
    205 static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s )
    206 {
    207    Word i, r, n1, n2;
    208    n1 = VG_(sizeXA)( fb1s );
    209    n2 = VG_(sizeXA)( fb2s );
    210    if (n1 < n2) return -1;
    211    if (n1 > n2) return 1;
    212    for (i = 0; i < n1; i++) {
    213       StackBlock *fb1, *fb2;
    214       fb1 = VG_(indexXA)( fb1s, i );
    215       fb2 = VG_(indexXA)( fb2s, i );
    216       r = StackBlock__cmp( fb1, fb2 );
    217       if (r != 0) return r;
    218    }
    219    tl_assert(i == n1 && i == n2);
    220    return 0;
    221 }
    222 
    223 static void pp_StackBlocks ( XArray* sbs )
    224 {
    225    Word i, n = VG_(sizeXA)( sbs );
    226    VG_(message)(Vg_DebugMsg, "<<< STACKBLOCKS\n" );
    227    for (i = 0; i < n; i++) {
    228       StackBlock* sb = (StackBlock*)VG_(indexXA)( sbs, i );
    229       VG_(message)(Vg_DebugMsg,
    230          "   StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }\n",
    231          sb->base, sb->szB, sb->spRel ? 'Y' : 'N',
    232          sb->isVec ? 'Y' : 'N', &sb->name[0]
    233       );
    234    }
    235    VG_(message)(Vg_DebugMsg, ">>> STACKBLOCKS\n" );
    236 }
    237 
    238 
    239 /* ---------- The StackBlock vector cache ---------- */
    240 
    241 static WordFM* /* XArray* of StackBlock -> nothing */
    242        frameBlocks_set = NULL;
    243 
    244 static void init_StackBlocks_set ( void )
    245 {
    246    tl_assert(!frameBlocks_set);
    247    frameBlocks_set
    248       = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free,
    249                     (Word(*)(UWord,UWord))StackBlocks__cmp );
    250    tl_assert(frameBlocks_set);
    251 }
    252 
    253 /* Find the given StackBlock-vector in our collection thereof.  If
    254    found, deallocate the supplied one, and return the address of the
    255    copy.  If not found, add the supplied one to our collection and
    256    return its address. */
    257 static XArray* /* of StackBlock */
    258        StackBlocks__find_and_dealloc__or_add
    259           ( XArray* /* of StackBlock */ orig )
    260 {
    261    UWord key, val;
    262 
    263    /* First, normalise, as per comments above. */
    264    VG_(setCmpFnXA)( orig, (Int(*)(void*,void*))StackBlock__cmp );
    265    VG_(sortXA)( orig );
    266 
    267    /* Now get rid of any exact duplicates. */
    268   nuke_dups:
    269    { Word r, w, nEQ, n = VG_(sizeXA)( orig );
    270      if (n >= 2) {
    271         w = 0;
    272         nEQ = 0;
    273         for (r = 0; r < n; r++) {
    274            if (r+1 < n) {
    275               StackBlock* pR0 = VG_(indexXA)( orig, r+0 );
    276               StackBlock* pR1 = VG_(indexXA)( orig, r+1 );
    277               Word c = StackBlock__cmp(pR0,pR1);
    278               tl_assert(c == -1 || c == 0);
    279               if (c == 0) { nEQ++; continue; }
    280            }
    281            if (w != r) {
    282               StackBlock* pW = VG_(indexXA)( orig, w );
    283               StackBlock* pR = VG_(indexXA)( orig, r );
    284               *pW = *pR;
    285            }
    286            w++;
    287         }
    288         tl_assert(r == n);
    289         tl_assert(w + nEQ == n);
    290         if (w < n) {
    291            VG_(dropTailXA)( orig, n-w );
    292         }
    293         if (0) VG_(printf)("delta %ld\n", n-w);
    294      }
    295    }
    296 
    297    /* Deal with the following strangeness, where two otherwise
    298       identical blocks are claimed to have different sizes.  In which
    299       case we use the larger size. */
    300    /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
    301       StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
    302       StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
    303    */
    304    { Word i, n = VG_(sizeXA)( orig );
    305      if (n >= 2) {
    306         for (i = 0; i < n-1; i++) {
    307            StackBlock* sb0 = VG_(indexXA)( orig, i+0 );
    308            StackBlock* sb1 = VG_(indexXA)( orig, i+1 );
    309            if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) {
    310               /* They can't be identical because the previous tidying
    311                  pass would have removed the duplicates.  And they
    312                  can't be > because the earlier sorting pass would
    313                  have ordered otherwise-identical descriptors
    314                  according to < on .szB fields.  Hence: */
    315               tl_assert(sb0->szB < sb1->szB);
    316               sb0->szB = sb1->szB;
    317               /* This makes the blocks identical, at the size of the
    318                  larger one.  Rather than go to all the hassle of
    319                  sliding the rest down, simply go back to the
    320                  remove-duplicates stage.  The assertion guarantees
    321                  that we eventually make progress, since the rm-dups
    322                  stage will get rid of one of the blocks.  This is
    323                  expected to happen only exceedingly rarely. */
    324               tl_assert(StackBlock__cmp(sb0,sb1) == 0);
    325               goto nuke_dups;
    326            }
    327         }
    328      }
    329    }
    330 
    331    /* If there are any blocks which overlap and have the same
    332       fpRel-ness, junk the whole descriptor; it's obviously bogus.
    333       Icc11 certainly generates bogus info from time to time.
    334 
    335       This check is pretty weak; really we ought to have a stronger
    336       sanity check. */
    337    { Word i, n = VG_(sizeXA)( orig );
    338      static Int moans = 3;
    339      for (i = 0; i < n-1; i++) {
    340        StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i );
    341        StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 );
    342        if (sb1->spRel == sb2->spRel
    343            && (sb1->base >= sb2->base
    344                || sb1->base + sb1->szB > sb2->base)) {
    345           if (moans > 0 && !VG_(clo_xml)) {
    346              moans--;
    347              VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
    348                                       "overlapping stack blocks\n");
    349              if (VG_(clo_verbosity) >= 2)
    350                 pp_StackBlocks(orig);
    351              if (moans == 0)
    352                 VG_(message)(Vg_UserMsg, "Further instances of this "
    353                                          "message will not be shown\n" );
    354           }
    355           VG_(dropTailXA)( orig, VG_(sizeXA)( orig ));
    356           break;
    357        }
    358      }
    359    }
    360 
    361    /* Now, do we have it already? */
    362    if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) {
    363       /* yes */
    364       XArray* res;
    365       tl_assert(val == 0);
    366       tl_assert(key != (UWord)orig);
    367       VG_(deleteXA)(orig);
    368       res = (XArray*)key;
    369       return res;
    370    } else {
    371       /* no */
    372       VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 );
    373       return orig;
    374    }
    375 }
    376 
    377 /* Top level function for getting the StackBlock vector for a given
    378    instruction.  It is guaranteed that the returned pointer will be
    379    valid for the entire rest of the run, and also that the addresses
    380    of the individual elements of the array will not change. */
    381 
    382 static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip )
    383 {
    384    XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ );
    385    tl_assert(blocks);
    386    return StackBlocks__find_and_dealloc__or_add( blocks );
    387 }
    388 
    389 
    390 //////////////////////////////////////////////////////////////
    391 //                                                          //
    392 // GlobalBlocks Persistent Cache                            //
    393 //                                                          //
    394 //////////////////////////////////////////////////////////////
    395 
    396 /* Generate an arbitrary total ordering on GlobalBlocks. */
    397 static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 )
    398 {
    399    Word r;
    400    /* compare addrs */
    401    if (gb1->addr < gb2->addr) return -1;
    402    if (gb1->addr > gb2->addr) return 1;
    403    /* compare sizes */
    404    if (gb1->szB < gb2->szB) return -1;
    405    if (gb1->szB > gb2->szB) return 1;
    406    /* compare is/is-not array-typed flag */
    407    if (gb1->isVec < gb2->isVec) return -1;
    408    if (gb1->isVec > gb2->isVec) return 1;
    409    /* compare the name */
    410    r = (Word)VG_(strcmp)(gb1->name, gb2->name);
    411    if (r != 0) return r;
    412    /* compare the soname */
    413    r = (Word)VG_(strcmp)(gb1->soname, gb2->soname);
    414    return r;
    415 }
    416 
    417 static WordFM* /* GlobalBlock* -> nothing */
    418        globalBlock_set = NULL;
    419 
    420 static void init_GlobalBlock_set ( void )
    421 {
    422    tl_assert(!globalBlock_set);
    423     globalBlock_set
    424        = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free,
    425                      (Word(*)(UWord,UWord))GlobalBlock__cmp );
    426    tl_assert(globalBlock_set);
    427 }
    428 
    429 
    430 /* Top level function for making GlobalBlocks persistent.  Call here
    431    with a non-persistent version, and the returned one is guaranteed
    432    to be valid for the entire rest of the run.  The supplied one is
    433    copied, not stored, so can be freed after the call. */
    434 
    435 static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig )
    436 {
    437    UWord key, val;
    438    /* Now, do we have it already? */
    439    if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) {
    440       /* yes, return the copy */
    441       GlobalBlock* res;
    442       tl_assert(val == 0);
    443       res = (GlobalBlock*)key;
    444       tl_assert(res != orig);
    445       return res;
    446    } else {
    447       /* no.  clone it, store the clone and return the clone's
    448          address. */
    449       GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1",
    450                                       sizeof(GlobalBlock) );
    451       tl_assert(clone);
    452       *clone = *orig;
    453       VG_(addToFM)( globalBlock_set, (UWord)clone, 0 );
    454       return clone;
    455    }
    456 }
    457 
    458 
    459 //////////////////////////////////////////////////////////////
    460 //                                                          //
    461 // Interval tree of StackTreeBlock                          //
    462 //                                                          //
    463 //////////////////////////////////////////////////////////////
    464 
    465 /* A node in a stack interval tree.  Zero length intervals (.szB == 0)
    466    are not allowed.
    467 
    468    A stack interval tree is a (WordFM StackTreeNode* void).  There is
    469    one stack interval tree for each thread.
    470 */
    471 typedef
    472    struct {
    473       Addr        addr;
    474       SizeT       szB;   /* copied from .descr->szB */
    475       StackBlock* descr; /* it's an instance of this block */
    476       UWord       depth; /* depth of stack at time block was pushed */
    477    }
    478    StackTreeNode;
    479 
    480 static void pp_StackTree ( WordFM* sitree, HChar* who )
    481 {
    482    UWord keyW, valW;
    483    VG_(printf)("<<< BEGIN pp_StackTree %s\n", who );
    484    VG_(initIterFM)( sitree );
    485    while (VG_(nextIterFM)( sitree, &keyW, &valW )) {
    486       StackTreeNode* nd = (StackTreeNode*)keyW;
    487       VG_(printf)("  [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB,
    488                   nd->descr, nd->descr->name, nd->descr->szB);
    489    }
    490    VG_(printf)(">>> END   pp_StackTree %s\n", who );
    491 }
    492 
    493 /* Interval comparison function for StackTreeNode */
    494 static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1,
    495                                           StackTreeNode* sn2 )
    496 {
    497    return cmp_nonempty_intervals(sn1->addr, sn1->szB,
    498                                  sn2->addr, sn2->szB);
    499 }
    500 
    501 /* Find the node holding 'a', if any. */
    502 static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a )
    503 {
    504    UWord keyW, valW;
    505    StackTreeNode key;
    506    tl_assert(sitree);
    507    key.addr = a;
    508    key.szB  = 1;
    509    if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) {
    510       StackTreeNode* res = (StackTreeNode*)keyW;
    511       tl_assert(valW == 0);
    512       tl_assert(res != &key);
    513       return res;
    514    } else {
    515       return NULL;
    516    }
    517 }
    518 
    519 /* Note that the supplied XArray of FrameBlock must have been
    520    made persistent already. */
    521 __attribute__((noinline))
    522 static void add_blocks_to_StackTree (
    523                /*MOD*/WordFM* sitree,
    524                XArray* /* FrameBlock */ descrs,
    525                XArray* /* Addr */ bases,
    526                UWord depth
    527             )
    528 {
    529    Bool debug = (Bool)0;
    530    Word i, nDescrs, nBases;
    531 
    532    nDescrs = VG_(sizeXA)( descrs ),
    533    nBases = VG_(sizeXA)( bases );
    534    tl_assert(nDescrs == nBases);
    535 
    536    if (nDescrs == 0) return;
    537 
    538    tl_assert(sitree);
    539    if (debug) {
    540       VG_(printf)("\ndepth = %lu\n", depth);
    541       pp_StackTree( sitree, "add_blocks_to_StackTree-pre" );
    542       pp_StackBlocks(descrs);
    543    }
    544 
    545    for (i = 0; i < nDescrs; i++) {
    546       Bool already_present;
    547       StackTreeNode* nyu;
    548       Addr        addr  = *(Addr*)VG_(indexXA)( bases, i );
    549       StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i );
    550       tl_assert(descr->szB > 0);
    551       nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) );
    552       nyu->addr  = addr;
    553       nyu->szB   = descr->szB;
    554       nyu->descr = descr;
    555       nyu->depth = depth;
    556       if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB);
    557       already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 );
    558       /* The interval can't already be there; else we have
    559          overlapping stack blocks. */
    560       tl_assert(!already_present);
    561       if (debug) {
    562          pp_StackTree( sitree, "add_blocks_to_StackTree-step" );
    563       }
    564    }
    565    if (debug) {
    566       pp_StackTree( sitree, "add_blocks_to_StackTree-post" );
    567       VG_(printf)("\n");
    568    }
    569 }
    570 
    571 static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree,
    572                                         XArray* /* Addr */ bases )
    573 {
    574    UWord oldK, oldV;
    575    Word i, nBases = VG_(sizeXA)( bases );
    576    for (i = 0; i < nBases; i++) {
    577       Bool b;
    578       Addr addr = *(Addr*)VG_(indexXA)( bases, i );
    579       StackTreeNode* nd = find_StackTreeNode(sitree, addr);
    580       /* The interval must be there; we added it earlier when
    581          the associated frame was created. */
    582       tl_assert(nd);
    583       b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd );
    584       /* we just found the block! */
    585       tl_assert(b);
    586       tl_assert(oldV == 0);
    587       tl_assert(nd == (StackTreeNode*)oldK);
    588       sg_free(nd);
    589    }
    590 }
    591 
    592 
    593 static void delete_StackTree__kFin ( UWord keyW ) {
    594    StackTreeNode* nd = (StackTreeNode*)keyW;
    595    tl_assert(nd);
    596    sg_free(nd);
    597 }
    598 static void delete_StackTree__vFin ( UWord valW ) {
    599    tl_assert(valW == 0);
    600 }
    601 static void delete_StackTree ( WordFM* sitree )
    602 {
    603    VG_(deleteFM)( sitree,
    604                  delete_StackTree__kFin, delete_StackTree__vFin );
    605 }
    606 
    607 static WordFM* new_StackTree ( void ) {
    608    return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free,
    609                       (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode );
    610 }
    611 
    612 
    613 //////////////////////////////////////////////////////////////
    614 //                                                          //
    615 // Interval tree of GlobalTreeBlock                         //
    616 //                                                          //
    617 //////////////////////////////////////////////////////////////
    618 
    619 /* A node in a global interval tree.  Zero length intervals
    620    (.szB == 0) are not allowed.
    621 
    622    A global interval tree is a (WordFM GlobalTreeNode* void).  There
    623    is one global interval tree for the entire process.
    624 */
    625 typedef
    626    struct {
    627       Addr         addr; /* copied from .descr->addr */
    628       SizeT        szB; /* copied from .descr->szB */
    629       GlobalBlock* descr; /* it's this block */
    630    }
    631    GlobalTreeNode;
    632 
    633 static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) {
    634    tl_assert(nd->descr);
    635    VG_(printf)("GTNode [%#lx,+%ld) %s",
    636                nd->addr, nd->szB, nd->descr->name);
    637 }
    638 
    639 static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree,
    640                              HChar* who )
    641 {
    642    UWord keyW, valW;
    643    GlobalTreeNode* nd;
    644    VG_(printf)("<<< GlobalBlockTree (%s)\n", who);
    645    VG_(initIterFM)( gitree );
    646    while (VG_(nextIterFM)( gitree, &keyW, &valW )) {
    647       tl_assert(valW == 0);
    648       nd = (GlobalTreeNode*)keyW;
    649       VG_(printf)("  ");
    650       GlobalTreeNode__pp(nd);
    651       VG_(printf)("\n");
    652    }
    653    VG_(doneIterFM)( gitree );
    654    VG_(printf)(">>>\n");
    655 }
    656 
    657 /* Interval comparison function for GlobalTreeNode */
    658 static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1,
    659                                            GlobalTreeNode* gn2 )
    660 {
    661    return cmp_nonempty_intervals( gn1->addr, gn1->szB,
    662                                   gn2->addr, gn2->szB );
    663 }
    664 
    665 /* Find the node holding 'a', if any. */
    666 static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a )
    667 {
    668    UWord keyW, valW;
    669    GlobalTreeNode key;
    670    key.addr = a;
    671    key.szB  = 1;
    672    if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
    673       GlobalTreeNode* res = (GlobalTreeNode*)keyW;
    674       tl_assert(valW == 0);
    675       tl_assert(res != &key);
    676       return res;
    677    } else {
    678       return NULL;
    679    }
    680 }
    681 
    682 /* Note that the supplied GlobalBlock must have been made persistent
    683    already. */
    684 static void add_block_to_GlobalTree (
    685                /*MOD*/WordFM* gitree,
    686                GlobalBlock* descr
    687             )
    688 {
    689    Bool already_present;
    690    GlobalTreeNode *nyu, *nd;
    691    UWord keyW, valW;
    692    static Int moans = 3;
    693 
    694    tl_assert(descr->szB > 0);
    695    nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) );
    696    nyu->addr  = descr->addr;
    697    nyu->szB   = descr->szB;
    698    nyu->descr = descr;
    699 
    700    /* Basically it's an error to add a global block to the tree that
    701       is already in the tree.  However, detect and ignore attempts to
    702       insert exact duplicates; they do appear for some reason
    703       (possible a bug in m_debuginfo?) */
    704    already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu );
    705    if (already_present) {
    706       tl_assert(valW == 0);
    707       nd = (GlobalTreeNode*)keyW;
    708       tl_assert(nd);
    709       tl_assert(nd != nyu);
    710       tl_assert(nd->descr);
    711       tl_assert(nyu->descr);
    712       if (nd->addr == nyu->addr && nd->szB == nyu->szB
    713           /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
    714           /* Although it seems reasonable to demand that duplicate
    715              blocks have identical names, that is too strict.  For
    716              example, reading debuginfo from glibc produces two
    717              otherwise identical blocks with names "tzname" and
    718              "__tzname".  A constraint of the form "must be identical,
    719              or one must be a substring of the other" would fix that.
    720              However, such trickery is scuppered by the fact that we
    721              truncate all variable names to 15 characters to make
    722              storage management simpler, hence giving pairs like
    723              "__EI___pthread_" (truncated) vs "__pthread_keys".  So
    724              it's simplest just to skip the name comparison
    725              completely. */
    726           && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) {
    727          /* exact duplicate; ignore it */
    728          sg_free(nyu);
    729          return;
    730       }
    731       /* else fall through; the assertion below will catch it */
    732    }
    733 
    734    already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 );
    735    /* The interval can't already be there; else we have
    736       overlapping global blocks. */
    737    /* Unfortunately (25 Jan 09) at least icc11 has been seen to
    738       generate overlapping block descriptions in the Dwarf3; clearly
    739       bogus. */
    740    if (already_present && moans > 0 && !VG_(clo_xml)) {
    741       moans--;
    742       VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
    743                                "overlapping global blocks\n");
    744       if (VG_(clo_verbosity) >= 2) {
    745          GlobalTree__pp( gitree,
    746                          "add_block_to_GlobalTree: non-exact duplicate" );
    747          VG_(printf)("Overlapping block: ");
    748          GlobalTreeNode__pp(nyu);
    749          VG_(printf)("\n");
    750       }
    751       if (moans == 0)
    752          VG_(message)(Vg_UserMsg, "Further instances of this "
    753                                   "message will not be shown\n" );
    754    }
    755    /* tl_assert(!already_present); */
    756 }
    757 
    758 static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree,
    759                                    Addr a, SizeT szB )
    760 {
    761    /* One easy way to do this: look up [a,a+szB) in the tree.  That
    762       will either succeed, producing a block which intersects that
    763       range, in which case we delete it and repeat; or it will fail,
    764       in which case there are no blocks intersecting the range, and we
    765       can bring the process to a halt. */
    766    UWord keyW, valW, oldK, oldV;
    767    GlobalTreeNode key, *nd;
    768    Bool b, anyFound;
    769 
    770    tl_assert(szB > 0);
    771 
    772    anyFound = False;
    773 
    774    key.addr = a;
    775    key.szB  = szB;
    776 
    777    while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
    778       anyFound = True;
    779       nd = (GlobalTreeNode*)keyW;
    780       tl_assert(valW == 0);
    781       tl_assert(nd != &key);
    782       tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0);
    783 
    784       b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key );
    785       tl_assert(b);
    786       tl_assert(oldV == 0);
    787       tl_assert(oldK == keyW); /* check we deleted the node we just found */
    788    }
    789 
    790    return anyFound;
    791 }
    792 
    793 
    794 //////////////////////////////////////////////////////////////
    795 //                                                          //
    796 // Invar                                                    //
    797 //                                                          //
    798 //////////////////////////////////////////////////////////////
    799 
    800 /* An invariant, as resulting from watching the destination of a
    801    memory referencing instruction.  Initially is Inv_Unset until the
    802    instruction makes a first access. */
    803 
    804 typedef
    805    enum {
    806       Inv_Unset=1,  /* not established yet */
    807       Inv_Unknown,  /* unknown location */
    808       Inv_Stack0,   /* array-typed stack block in innermost frame */
    809       Inv_StackN,   /* array-typed stack block in non-innermost frame */
    810       Inv_Global,   /* array-typed global block */
    811    }
    812    InvarTag;
    813 
    814 typedef
    815    struct {
    816       InvarTag tag;
    817       union {
    818          struct {
    819          } Unset;
    820          struct {
    821          } Unknown;
    822          struct {
    823             Addr  addr;
    824             SizeT szB;
    825             StackBlock* descr;
    826          } Stack0; /* innermost stack frame */
    827          struct {
    828             /* Pointer to a node in the interval tree for
    829               this thread. */
    830             StackTreeNode* nd;
    831          } StackN; /* non-innermost stack frame */
    832          struct {
    833            /* Pointer to a GlobalBlock in the interval tree of
    834               global blocks. */
    835            GlobalTreeNode* nd;
    836          } Global;
    837       }
    838       Inv;
    839    }
    840    Invar;
    841 
    842 /* Partial debugging printing for an Invar. */
    843 static void pp_Invar ( Invar* i )
    844 {
    845    switch (i->tag) {
    846       case Inv_Unset:
    847          VG_(printf)("Unset");
    848          break;
    849       case Inv_Unknown:
    850          VG_(printf)("Unknown");
    851          break;
    852       case Inv_Stack0:
    853          VG_(printf)("Stack0 [%#lx,+%lu)",
    854                      i->Inv.Stack0.addr, i->Inv.Stack0.szB);
    855          break;
    856       case Inv_StackN:
    857          VG_(printf)("StackN [%#lx,+%lu)",
    858                      i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB);
    859          break;
    860       case Inv_Global:
    861          VG_(printf)("Global [%#lx,+%lu)",
    862                      i->Inv.Global.nd->addr, i->Inv.Global.nd->szB);
    863          break;
    864       default:
    865          tl_assert(0);
    866    }
    867 }
    868 
    869 /* Compare two Invars for equality. */
    870 static Bool eq_Invar ( Invar* i1, Invar* i2 )
    871 {
    872    tl_assert(i1->tag != Inv_Unset);
    873    tl_assert(i2->tag != Inv_Unset);
    874    if (i1->tag != i2->tag)
    875       return False;
    876    switch (i1->tag) {
    877       case Inv_Unknown:
    878          return True;
    879       case Inv_Stack0:
    880          return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr
    881                 && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB;
    882       case Inv_StackN:
    883          return i1->Inv.StackN.nd == i2->Inv.StackN.nd;
    884       case Inv_Global:
    885          return i1->Inv.Global.nd == i2->Inv.Global.nd;
    886       default:
    887          tl_assert(0);
    888    }
    889    /*NOTREACHED*/
    890    tl_assert(0);
    891 }
    892 
    893 /* Generate a piece of text showing 'ea' is relative to 'invar', if
    894    known.  If unknown, generate an empty string.  'buf' must be at
    895    least 32 bytes in size.  Also return the absolute value of the
    896    delta, if known, or zero if not known.
    897 */
    898 static void gen_delta_str ( /*OUT*/HChar* buf,
    899                             /*OUT*/UWord* absDelta,
    900                             Invar* inv, Addr ea )
    901 {
    902    Addr  block = 0;
    903    SizeT szB   = 0;
    904 
    905    buf[0] = 0;
    906    *absDelta = 0;
    907 
    908    switch (inv->tag) {
    909       case Inv_Unknown:
    910       case Inv_Unset:
    911          return; /* unknown */
    912       case Inv_Stack0:
    913          block = inv->Inv.Stack0.addr;
    914          szB   = inv->Inv.Stack0.szB;
    915          break;
    916       case Inv_StackN:
    917          block = inv->Inv.StackN.nd->addr;
    918          szB   = inv->Inv.StackN.nd->szB;
    919          break;
    920       case Inv_Global:
    921          block = inv->Inv.Global.nd->addr;
    922          szB = inv->Inv.Global.nd->szB;
    923          break;
    924       default:
    925          tl_assert(0);
    926    }
    927    tl_assert(szB > 0);
    928    if (ea < block) {
    929       *absDelta = block - ea;
    930       VG_(sprintf)(buf, "%'lu before", *absDelta);
    931    }
    932    else if (ea >= block + szB) {
    933       *absDelta = ea - (block + szB);
    934       VG_(sprintf)(buf, "%'lu after", *absDelta);
    935    }
    936    else {
    937      // Leave *absDelta at zero.
    938      VG_(sprintf)(buf, "%'lu inside", ea - block);
    939    }
    940 }
    941 
    942 
    943 /* Print selected parts of an Invar, suitable for use in error
    944    messages. */
    945 static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth )
    946 {
    947    HChar* str;
    948    tl_assert(nBuf >= 128);
    949    buf[0] = 0;
    950    switch (inv->tag) {
    951       case Inv_Unknown:
    952          VG_(sprintf)(buf, "%s", "unknown");
    953          break;
    954       case Inv_Stack0:
    955          str = "array";
    956          VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in this frame",
    957                       str, inv->Inv.Stack0.descr->name,
    958                       inv->Inv.Stack0.szB );
    959          break;
    960       case Inv_StackN:
    961          str = "array";
    962          VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in frame %lu back from here",
    963                       str, inv->Inv.StackN.nd->descr->name,
    964                            inv->Inv.StackN.nd->descr->szB,
    965                            depth - inv->Inv.StackN.nd->depth );
    966          break;
    967       case Inv_Global:
    968          str = "array";
    969          VG_(sprintf)(buf, "global %s \"%s\" of size %'lu in object with soname \"%s\"",
    970                       str, inv->Inv.Global.nd->descr->name,
    971                            inv->Inv.Global.nd->descr->szB,
    972                            inv->Inv.Global.nd->descr->soname );
    973          break;
    974       case Inv_Unset:
    975          VG_(sprintf)(buf, "%s", "Unset!");
    976          break;
    977       default:
    978          tl_assert(0);
    979    }
    980 }
    981 
    982 
    983 //////////////////////////////////////////////////////////////
    984 //                                                          //
    985 // our globals                                              //
    986 //                                                          //
    987 //////////////////////////////////////////////////////////////
    988 
    989 //////////////////////////////////////////////////////////////
    990 ///
    991 
    992 #define N_QCACHE 16
    993 
    994 /* Powers of two only, else the result will be chaos */
    995 #define QCACHE_ADVANCE_EVERY 16
    996 
    997 /* Per-thread query cache.  Note that the invar can only be Inv_StackN
    998    (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
    999 typedef
   1000    struct {
   1001       Addr  addr;
   1002       SizeT szB;
   1003       Invar inv;
   1004    }
   1005    QCElem;
   1006 
   1007 typedef
   1008    struct {
   1009       Word   nInUse;
   1010       QCElem elems[N_QCACHE];
   1011    }
   1012    QCache;
   1013 
   1014 static void QCache__invalidate ( QCache* qc ) {
   1015    tl_assert(qc->nInUse >= 0);
   1016    qc->nInUse = 0;
   1017 }
   1018 
   1019 static void QCache__pp ( QCache* qc, HChar* who )
   1020 {
   1021    Word i;
   1022    VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who);
   1023    for (i = 0; i < qc->nInUse; i++) {
   1024       VG_(printf)("  [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB);
   1025       pp_Invar(&qc->elems[i].inv);
   1026       VG_(printf)("\n");
   1027    }
   1028    VG_(printf)(">>>\n");
   1029 }
   1030 
   1031 static ULong stats__qcache_queries = 0;
   1032 static ULong stats__qcache_misses  = 0;
   1033 static ULong stats__qcache_probes  = 0;
   1034 
   1035 ///
   1036 //////////////////////////////////////////////////////////////
   1037 
   1038 /* Each thread has:
   1039    * a shadow stack of StackFrames, which is a double-linked list
   1040    * an stack block interval tree
   1041 */
   1042 static  struct _StackFrame*          shadowStacks[VG_N_THREADS];
   1043 
   1044 static  WordFM* /* StackTreeNode */  siTrees[VG_N_THREADS];
   1045 
   1046 static  QCache                       qcaches[VG_N_THREADS];
   1047 
   1048 
   1049 /* Additionally, there is one global variable interval tree
   1050    for the entire process.
   1051 */
   1052 static WordFM* /* GlobalTreeNode */ giTree;
   1053 
   1054 
   1055 static void invalidate_all_QCaches ( void )
   1056 {
   1057    Word i;
   1058    for (i = 0; i < VG_N_THREADS; i++) {
   1059       QCache__invalidate( &qcaches[i] );
   1060    }
   1061 }
   1062 
   1063 static void ourGlobals_init ( void )
   1064 {
   1065    Word i;
   1066    for (i = 0; i < VG_N_THREADS; i++) {
   1067       shadowStacks[i] = NULL;
   1068       siTrees[i] = NULL;
   1069    }
   1070    invalidate_all_QCaches();
   1071    giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free,
   1072                         (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode );
   1073 }
   1074 
   1075 
   1076 //////////////////////////////////////////////////////////////
   1077 //                                                          //
   1078 // Handle global variable load/unload events                //
   1079 //                                                          //
   1080 //////////////////////////////////////////////////////////////
   1081 
   1082 static void acquire_globals ( ULong di_handle )
   1083 {
   1084    Word n, i;
   1085    XArray* /* of GlobalBlock */ gbs;
   1086    if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle );
   1087    gbs = VG_(di_get_global_blocks_from_dihandle)
   1088             (di_handle, True/*arrays only*/);
   1089    if (0) VG_(printf)("   GOT %ld globals\n", VG_(sizeXA)( gbs ));
   1090 
   1091    n = VG_(sizeXA)( gbs );
   1092    for (i = 0; i < n; i++) {
   1093       GlobalBlock* gbp;
   1094       GlobalBlock* gb = VG_(indexXA)( gbs, i );
   1095       if (0) VG_(printf)("   new Global size %2lu at %#lx:  %s %s\n",
   1096                          gb->szB, gb->addr, gb->soname, gb->name );
   1097       tl_assert(gb->szB > 0);
   1098       /* Make a persistent copy of each GlobalBlock, and add it
   1099          to the tree. */
   1100       gbp = get_persistent_GlobalBlock( gb );
   1101       add_block_to_GlobalTree( giTree, gbp );
   1102    }
   1103 
   1104    VG_(deleteXA)( gbs );
   1105 }
   1106 
   1107 
   1108 /* We only intercept these two because we need to see any di_handles
   1109    that might arise from the mappings/allocations. */
   1110 void sg_new_mem_mmap( Addr a, SizeT len,
   1111                       Bool rr, Bool ww, Bool xx, ULong di_handle )
   1112 {
   1113    if (di_handle > 0)
   1114       acquire_globals(di_handle);
   1115 }
   1116 void sg_new_mem_startup( Addr a, SizeT len,
   1117                          Bool rr, Bool ww, Bool xx, ULong di_handle )
   1118 {
   1119    if (di_handle > 0)
   1120       acquire_globals(di_handle);
   1121 }
   1122 void sg_die_mem_munmap ( Addr a, SizeT len )
   1123 {
   1124    Bool debug = (Bool)0;
   1125    Bool overlap = False;
   1126 
   1127    if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len );
   1128 
   1129    if (len == 0)
   1130       return;
   1131 
   1132    overlap = del_GlobalTree_range(giTree, a, len);
   1133 
   1134    { /* redundant sanity check */
   1135      UWord keyW, valW;
   1136      VG_(initIterFM)( giTree );
   1137      while (VG_(nextIterFM)( giTree, &keyW, &valW )) {
   1138        GlobalTreeNode* nd = (GlobalTreeNode*)keyW;
   1139         tl_assert(valW == 0);
   1140         tl_assert(nd->szB > 0);
   1141         tl_assert(nd->addr + nd->szB <= a
   1142                   || a + len <= nd->addr);
   1143      }
   1144      VG_(doneIterFM)( giTree );
   1145    }
   1146 
   1147    if (!overlap)
   1148       return;
   1149 
   1150    /* Ok, the range contained some blocks.  Therefore we'll need to
   1151       visit all the Invars in all the thread shadow stacks, and
   1152       convert all Inv_Global entries that intersect [a,a+len) to
   1153       Inv_Unknown. */
   1154    tl_assert(len > 0);
   1155    preen_global_Invars( a, len );
   1156    invalidate_all_QCaches();
   1157 }
   1158 
   1159 
   1160 //////////////////////////////////////////////////////////////
   1161 //                                                          //
   1162 // StackFrame                                               //
   1163 //                                                          //
   1164 //////////////////////////////////////////////////////////////
   1165 
   1166 static ULong stats__total_accesses   = 0;
   1167 static ULong stats__classify_Stack0  = 0;
   1168 static ULong stats__classify_StackN  = 0;
   1169 static ULong stats__classify_Global  = 0;
   1170 static ULong stats__classify_Unknown = 0;
   1171 static ULong stats__Invars_preened   = 0;
   1172 static ULong stats__Invars_changed   = 0;
   1173 static ULong stats__t_i_b_empty      = 0;
   1174 static ULong stats__htab_fast        = 0;
   1175 static ULong stats__htab_searches    = 0;
   1176 static ULong stats__htab_probes      = 0;
   1177 static ULong stats__htab_resizes     = 0;
   1178 
   1179 
   1180 /* A dynamic instance of an instruction */
   1181 typedef
   1182    struct {
   1183       /* IMMUTABLE */
   1184       Addr    insn_addr; /* NB! zero means 'not in use' */
   1185       XArray* blocks; /* XArray* of StackBlock, or NULL if none */
   1186       /* MUTABLE */
   1187       Invar invar;
   1188    }
   1189    IInstance;
   1190 
   1191 
   1192 #define N_HTAB_FIXED 64
   1193 
   1194 typedef
   1195    struct _StackFrame {
   1196       /* The sp when the frame was created, so we know when to get rid
   1197          of it. */
   1198       Addr creation_sp;
   1199       /* The stack frames for a thread are arranged as a doubly linked
   1200          list.  Obviously the outermost frame in the stack has .outer
   1201          as NULL and the innermost in theory has .inner as NULL.
   1202          However, when a function returns, we don't delete the
   1203          just-vacated StackFrame.  Instead, it is retained in the list
   1204          and will be re-used when the next call happens.  This is so
   1205          as to avoid constantly having to dynamically allocate and
   1206          deallocate frames. */
   1207       struct _StackFrame* inner;
   1208       struct _StackFrame* outer;
   1209       Word depth; /* 0 for outermost; increases inwards */
   1210       /* Information for each memory referencing instruction, for this
   1211          instantiation of the function.  The iinstances array is
   1212          operated as a simple linear-probe hash table, which is
   1213          dynamically expanded as necessary.  Once critical thing is
   1214          that an IInstance with a .insn_addr of zero is interpreted to
   1215          mean that hash table slot is unused.  This means we can't
   1216          store an IInstance for address zero. */
   1217       /* Note that htab initially points to htab_fixed.  If htab_fixed
   1218          turns out not to be big enough then htab is made to point to
   1219          dynamically allocated memory.  But it's often the case that
   1220          htab_fixed is big enough, so this optimisation saves a huge
   1221          number of sg_malloc/sg_free call pairs. */
   1222       IInstance* htab;
   1223       UWord      htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */
   1224       UWord      htab_used; /* number of hash table slots currently in use */
   1225       /* If this frame is currently making a call, then the following
   1226          are relevant. */
   1227       Addr sp_at_call;
   1228       Addr fp_at_call;
   1229       XArray* /* of Addr */ blocks_added_by_call;
   1230       /* See comment just above */
   1231       IInstance htab_fixed[N_HTAB_FIXED];
   1232    }
   1233    StackFrame;
   1234 
   1235 
   1236 
   1237 
   1238 
   1239 /* Move this somewhere else? */
   1240 /* Visit all Invars in the entire system.  If 'isHeap' is True, change
   1241    all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown.  If
   1242    'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
   1243    instead. */
   1244 
   1245 __attribute__((noinline))
   1246 static void preen_global_Invar ( Invar* inv, Addr a, SizeT len )
   1247 {
   1248    stats__Invars_preened++;
   1249    tl_assert(len > 0);
   1250    tl_assert(inv);
   1251    switch (inv->tag) {
   1252       case Inv_Global:
   1253          tl_assert(inv->Inv.Global.nd);
   1254          tl_assert(inv->Inv.Global.nd->szB > 0);
   1255          if (0) VG_(printf)("preen_Invar Global %#lx %lu\n",
   1256                             inv->Inv.Global.nd->addr,
   1257                             inv->Inv.Global.nd->szB);
   1258          if (0 == cmp_nonempty_intervals(a, len, inv->Inv.Global.nd->addr,
   1259                                                  inv->Inv.Global.nd->szB)) {
   1260             inv->tag = Inv_Unknown;
   1261             stats__Invars_changed++;
   1262          }
   1263          break;
   1264       case Inv_Stack0:
   1265       case Inv_StackN:
   1266       case Inv_Unknown:
   1267          break;
   1268       default:
   1269          tl_assert(0);
   1270    }
   1271 }
   1272 
   1273 __attribute__((noinline))
   1274 static void preen_global_Invars ( Addr a, SizeT len )
   1275 {
   1276    Int         i;
   1277    UWord       u;
   1278    StackFrame* frame;
   1279    tl_assert(len > 0);
   1280    for (i = 0; i < VG_N_THREADS; i++) {
   1281       frame = shadowStacks[i];
   1282       if (!frame)
   1283          continue; /* no frames for this thread */
   1284       /* start from the innermost frame */
   1285       while (frame->inner)
   1286          frame = frame->inner;
   1287       tl_assert(frame->outer);
   1288       /* work through the frames from innermost to outermost.  The
   1289          order isn't important; we just need to ensure we visit each
   1290          frame once (including those which are not actually active,
   1291          more 'inner' than the 'innermost active frame', viz, just
   1292          hanging around waiting to be used, when the current innermost
   1293          active frame makes more calls.  See comments on definition of
   1294          struct _StackFrame. */
   1295       for (; frame; frame = frame->outer) {
   1296          UWord xx = 0; /* sanity check only; count of used htab entries */
   1297          if (!frame->htab)
   1298             continue; /* frame not in use.  See shadowStack_unwind(). */
   1299          for (u = 0; u < frame->htab_size; u++) {
   1300             IInstance* ii = &frame->htab[u];
   1301             if (ii->insn_addr == 0)
   1302                continue; /* not in use */
   1303             if (0) { pp_Invar(&ii->invar); VG_(printf)(" x\n"); }
   1304             preen_global_Invar( &ii->invar, a, len );
   1305             xx++;
   1306          }
   1307          tl_assert(xx == frame->htab_used);
   1308       }
   1309    }
   1310 }
   1311 
   1312 
   1313 /* XXX this should be >> 2 on ppc32/64 since the bottom two bits
   1314    of the ip are guaranteed to be zero */
   1315 inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) {
   1316    return (ip >> 0) & (htab_size - 1);
   1317 }
   1318 
   1319 __attribute__((noinline))
   1320 static void initialise_II_hash_table ( StackFrame* sf )
   1321 {
   1322    UWord i;
   1323    sf->htab_size = N_HTAB_FIXED; /* initial hash table size */
   1324    sf->htab = &sf->htab_fixed[0];
   1325    tl_assert(sf->htab);
   1326    sf->htab_used = 0;
   1327    for (i = 0; i < sf->htab_size; i++)
   1328       sf->htab[i].insn_addr = 0; /* NOT IN USE */
   1329 }
   1330 
   1331 
   1332 __attribute__((noinline))
   1333 static void resize_II_hash_table ( StackFrame* sf )
   1334 {
   1335    UWord     i, j, ix, old_size, new_size;
   1336    IInstance *old_htab, *new_htab, *old;
   1337 
   1338    tl_assert(sf && sf->htab);
   1339    old_size = sf->htab_size;
   1340    new_size = 2 * old_size;
   1341    old_htab = sf->htab;
   1342    new_htab = sg_malloc( "di.sg_main.rIht.1",
   1343                          new_size * sizeof(IInstance) );
   1344    for (i = 0; i < new_size; i++) {
   1345       new_htab[i].insn_addr = 0; /* NOT IN USE */
   1346    }
   1347    for (i = 0; i < old_size; i++) {
   1348       old = &old_htab[i];
   1349       if (old->insn_addr == 0 /* NOT IN USE */)
   1350          continue;
   1351       ix = compute_II_hash(old->insn_addr, new_size);
   1352       /* find out where to put this, in the new table */
   1353       j = new_size;
   1354       while (1) {
   1355          if (new_htab[ix].insn_addr == 0)
   1356             break;
   1357          /* This can't ever happen, because it would mean the new
   1358             table is full; that isn't allowed -- even the old table is
   1359             only allowed to become half full. */
   1360          tl_assert(j > 0);
   1361          j--;
   1362          ix++; if (ix == new_size) ix = 0;
   1363       }
   1364       /* copy the old entry to this location */
   1365       tl_assert(ix < new_size);
   1366       tl_assert(new_htab[ix].insn_addr == 0);
   1367       new_htab[ix] = *old;
   1368       tl_assert(new_htab[ix].insn_addr != 0);
   1369    }
   1370    /* all entries copied; free old table. */
   1371    if (old_htab != &sf->htab_fixed[0])
   1372       sg_free(old_htab);
   1373    sf->htab = new_htab;
   1374    sf->htab_size = new_size;
   1375    /* check sf->htab_used is correct.  Optional and a bit expensive
   1376       but anyway: */
   1377    j = 0;
   1378    for (i = 0; i < new_size; i++) {
   1379       if (new_htab[i].insn_addr != 0) {
   1380          j++;
   1381       }
   1382    }
   1383    tl_assert(j == sf->htab_used);
   1384    if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size);
   1385 }
   1386 
   1387 
   1388 __attribute__((noinline))
   1389 static IInstance* find_or_create_IInstance_SLOW (
   1390                      StackFrame* sf,
   1391                      Addr ip,
   1392                      XArray* /* StackBlock */ ip_frameblocks
   1393                   )
   1394 {
   1395    UWord i, ix;
   1396 
   1397    stats__htab_searches++;
   1398 
   1399    tl_assert(sf);
   1400    tl_assert(sf->htab);
   1401 
   1402    /* Make sure the table loading doesn't get too high. */
   1403    if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) {
   1404       stats__htab_resizes++;
   1405       resize_II_hash_table(sf);
   1406    }
   1407    tl_assert(2 * sf->htab_used <= sf->htab_size);
   1408 
   1409    ix = compute_II_hash(ip, sf->htab_size);
   1410    i = sf->htab_size;
   1411    while (1) {
   1412       stats__htab_probes++;
   1413       /* Note that because of the way the fast-case handler works,
   1414          these two tests are actually redundant in the first iteration
   1415          of this loop.  (Except they aren't redundant if the code just
   1416          above resized the table first. :-) */
   1417       if (sf->htab[ix].insn_addr == ip)
   1418          return &sf->htab[ix];
   1419       if (sf->htab[ix].insn_addr == 0)
   1420          break;
   1421       /* If i ever gets to zero and we have found neither what we're
   1422          looking for nor an empty slot, the table must be full.  Which
   1423          isn't possible -- we monitor the load factor to ensure it
   1424          doesn't get above say 50%; if that ever does happen the table
   1425          is resized. */
   1426       tl_assert(i > 0);
   1427       i--;
   1428       ix++;
   1429       if (ix == sf->htab_size) ix = 0;
   1430    }
   1431 
   1432    /* So now we've found a free slot at ix, and we can use that. */
   1433    tl_assert(sf->htab[ix].insn_addr == 0);
   1434 
   1435    /* Add a new record in this slot. */
   1436    tl_assert(ip != 0); /* CAN'T REPRESENT THIS */
   1437    sf->htab[ix].insn_addr = ip;
   1438    sf->htab[ix].blocks    = ip_frameblocks;
   1439    sf->htab[ix].invar.tag = Inv_Unset;
   1440    sf->htab_used++;
   1441    return &sf->htab[ix];
   1442 }
   1443 
   1444 
   1445 inline
   1446 static IInstance* find_or_create_IInstance (
   1447                      StackFrame* sf,
   1448                      Addr ip,
   1449                      XArray* /* StackBlock */ ip_frameblocks
   1450                   )
   1451 {
   1452    UWord ix = compute_II_hash(ip, sf->htab_size);
   1453    /* Is it in the first slot we come to? */
   1454    if (LIKELY(sf->htab[ix].insn_addr == ip)) {
   1455       stats__htab_fast++;
   1456       return &sf->htab[ix];
   1457    }
   1458    /* If the first slot we come to is empty, bag it. */
   1459    if (LIKELY(sf->htab[ix].insn_addr == 0)) {
   1460       stats__htab_fast++;
   1461       tl_assert(ip != 0);
   1462       sf->htab[ix].insn_addr = ip;
   1463       sf->htab[ix].blocks    = ip_frameblocks;
   1464       sf->htab[ix].invar.tag = Inv_Unset;
   1465       sf->htab_used++;
   1466       return &sf->htab[ix];
   1467    }
   1468    /* Otherwise we hand off to the slow case, which searches other
   1469       slots, and optionally resizes the table if necessary. */
   1470    return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks );
   1471 }
   1472 
   1473 
   1474 __attribute__((noinline))
   1475 static Addr calculate_StackBlock_EA ( StackBlock* descr,
   1476                                       Addr sp, Addr fp ) {
   1477    UWord w1 = (UWord)descr->base;
   1478    UWord w2 = (UWord)(descr->spRel ? sp : fp);
   1479    UWord ea = w1 + w2;
   1480    return ea;
   1481 }
   1482 
   1483 /* Given an array of StackBlocks, return an array of Addrs, holding
   1484    their effective addresses.  Caller deallocates result array. */
   1485 __attribute__((noinline))
   1486 static XArray* /* Addr */ calculate_StackBlock_EAs (
   1487                              XArray* /* StackBlock */ blocks,
   1488                              Addr sp, Addr fp
   1489                           )
   1490 {
   1491    XArray* res;
   1492    Word i, n = VG_(sizeXA)( blocks );
   1493    tl_assert(n > 0);
   1494    res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) );
   1495    for (i = 0; i < n; i++) {
   1496       StackBlock* blk = VG_(indexXA)( blocks, i );
   1497       Addr ea = calculate_StackBlock_EA( blk, sp, fp );
   1498       VG_(addToXA)( res, &ea );
   1499    }
   1500    return res;
   1501 }
   1502 
   1503 
   1504 /* Try to classify the block into which a memory access falls, and
   1505    write the result in 'inv'.  This writes all relevant fields of
   1506    'inv'. */
   1507 __attribute__((noinline))
   1508 static void classify_address ( /*OUT*/Invar* inv,
   1509                                ThreadId tid,
   1510                                Addr ea, Addr sp, Addr fp,
   1511                                UWord szB,
   1512                                XArray* /* of StackBlock */ thisInstrBlocks )
   1513 {
   1514    tl_assert(szB > 0);
   1515    /* First, look in the stack blocks accessible in this instruction's
   1516       frame. */
   1517    {
   1518      Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks );
   1519      if (nBlocks == 0) stats__t_i_b_empty++;
   1520      for (i = 0; i < nBlocks; i++) {
   1521         StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i );
   1522         Addr bea = calculate_StackBlock_EA( descr, sp, fp );
   1523         if (bea <= ea && ea + szB <= bea + descr->szB) {
   1524            /* found it */
   1525            inv->tag = Inv_Stack0;
   1526            inv->Inv.Stack0.addr  = bea;
   1527            inv->Inv.Stack0.szB   = descr->szB;
   1528            inv->Inv.Stack0.descr = descr;
   1529            stats__classify_Stack0++;
   1530            return;
   1531         }
   1532      }
   1533    }
   1534    /* Look in this thread's query cache */
   1535    { Word i;
   1536      QCache* cache = &qcaches[tid];
   1537      static UWord ctr = 0;
   1538      stats__qcache_queries++;
   1539      for (i = 0; i < cache->nInUse; i++) {
   1540         if (0) /* expensive in a loop like this */
   1541                tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0);
   1542         stats__qcache_probes++;
   1543         if (is_subinterval_of(cache->elems[i].addr,
   1544                               cache->elems[i].szB, ea, szB)) {
   1545            if (i > 0
   1546                && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) {
   1547               QCElem tmp;
   1548               tmp = cache->elems[i-1];
   1549               cache->elems[i-1] = cache->elems[i];
   1550               cache->elems[i] = tmp;
   1551               i--;
   1552            }
   1553            *inv = cache->elems[i].inv;
   1554            return;
   1555         }
   1556      }
   1557      stats__qcache_misses++;
   1558    }
   1559    /* Ok, so it's not a block in the top frame.  Perhaps it's a block
   1560       in some calling frame?  Consult this thread's stack-block
   1561       interval tree to find out. */
   1562    { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea );
   1563      /* We know that [ea,ea+1) is in the block, but we need to
   1564         restrict to the case where the whole access falls within
   1565         it. */
   1566      if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
   1567         nd = NULL;
   1568      }
   1569      if (nd) {
   1570         /* found it */
   1571         inv->tag = Inv_StackN;
   1572         inv->Inv.StackN.nd = nd;
   1573         stats__classify_StackN++;
   1574         goto out;
   1575      }
   1576    }
   1577    /* Not in a stack block.  Try the global pool. */
   1578    { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea);
   1579      /* We know that [ea,ea+1) is in the block, but we need to
   1580         restrict to the case where the whole access falls within
   1581         it. */
   1582      if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
   1583         nd = NULL;
   1584      }
   1585      if (nd) {
   1586         /* found it */
   1587         inv->tag = Inv_Global;
   1588         inv->Inv.Global.nd = nd;
   1589         stats__classify_Global++;
   1590         goto out;
   1591      }
   1592    }
   1593    /* No idea - give up. */
   1594    inv->tag = Inv_Unknown;
   1595    stats__classify_Unknown++;
   1596 
   1597    /* Update the cache */
   1598   out:
   1599    { Addr    toadd_addr = 0;
   1600      SizeT   toadd_szB  = 0;
   1601      QCache* cache      = &qcaches[tid];
   1602 
   1603      static UWord ctr = 0;
   1604      Bool show = False;
   1605      if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True;
   1606 
   1607      if (show) QCache__pp(cache, "before upd");
   1608 
   1609      switch (inv->tag) {
   1610         case Inv_Global:
   1611            toadd_addr = inv->Inv.Global.nd->addr;
   1612            toadd_szB  = inv->Inv.Global.nd->szB;
   1613            break;
   1614         case Inv_StackN:
   1615            toadd_addr = inv->Inv.StackN.nd->addr;
   1616            toadd_szB  = inv->Inv.StackN.nd->szB;
   1617            break;
   1618         case Inv_Unknown: {
   1619            /* This is more complex.  We need to figure out the
   1620               intersection of the "holes" in the global and stack
   1621               interval trees into which [ea,ea+szB) falls.  This is
   1622               further complicated by the fact that [ea,ea+szB) might
   1623               not fall cleanly into a hole; it may instead fall across
   1624               the boundary of a stack or global block.  In that case
   1625               we just ignore it and don't update the cache, since we
   1626               have no way to represent this situation precisely. */
   1627            StackTreeNode  sNegInf, sPosInf, sKey, *sLB, *sUB;
   1628            GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB;
   1629            Addr gMin, gMax, sMin, sMax, uMin, uMax;
   1630            Bool sOK, gOK;
   1631            sNegInf.addr = 0;
   1632            sNegInf.szB  = 1;
   1633            sPosInf.addr = ~(UWord)0;
   1634            sPosInf.szB  = 1;
   1635            gNegInf.addr = 0;
   1636            gNegInf.szB  = 1;
   1637            gPosInf.addr = ~(UWord)0;
   1638            gPosInf.szB  = 1;
   1639            sKey.addr = ea;
   1640            sKey.szB  = szB;
   1641            gKey.addr = ea;
   1642            gKey.szB  = szB;
   1643            if (0) VG_(printf)("Tree sizes %ld %ld\n",
   1644                               VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree));
   1645            sOK = VG_(findBoundsFM)( siTrees[tid],
   1646                                     (UWord*)&sLB,    NULL/*unused*/,
   1647                                     (UWord*)&sUB,    NULL/*unused*/,
   1648                                     (UWord)&sNegInf, 0/*unused*/,
   1649                                     (UWord)&sPosInf, 0/*unused*/,
   1650                                     (UWord)&sKey );
   1651            gOK = VG_(findBoundsFM)( giTree,
   1652                                     (UWord*)&gLB,    NULL/*unused*/,
   1653                                     (UWord*)&gUB,    NULL/*unused*/,
   1654                                     (UWord)&gNegInf, 0/*unused*/,
   1655                                     (UWord)&gPosInf, 0/*unused*/,
   1656                                     (UWord)&gKey );
   1657            if (!(sOK && gOK)) {
   1658               /* If this happens, then [ea,ea+szB) partially overlaps
   1659                  a heap or stack block.  We can't represent that, so
   1660                  just forget it (should be very rare).  However, do
   1661                  maximum sanity checks first.  In such a
   1662                  partial overlap case, it can't be the case that both
   1663                  [ea] and [ea+szB-1] overlap the same block, since if
   1664                  that were indeed the case then it wouldn't be a
   1665                  partial overlap; rather it would simply fall inside
   1666                  that block entirely and we shouldn't be inside this
   1667                  conditional at all. */
   1668               if (!sOK) {
   1669                  StackTreeNode *ndFirst, *ndLast;
   1670                  ndFirst = find_StackTreeNode( siTrees[tid], ea );
   1671                  ndLast  = find_StackTreeNode( siTrees[tid], ea+szB-1 );
   1672                  /* if both ends of the range fall inside a block,
   1673                     they can't be in the same block. */
   1674                  if (ndFirst && ndLast)
   1675                     tl_assert(ndFirst != ndLast);
   1676                  /* for each end of the range, if it is in a block,
   1677                     the range as a whole can't be entirely within the
   1678                     block. */
   1679                  if (ndFirst)
   1680                     tl_assert(!is_subinterval_of(ndFirst->addr,
   1681                                                  ndFirst->szB, ea, szB));
   1682                  if (ndLast)
   1683                     tl_assert(!is_subinterval_of(ndLast->addr,
   1684                                                  ndLast->szB, ea, szB));
   1685               }
   1686               if (!gOK) {
   1687                  GlobalTreeNode *ndFirst, *ndLast;
   1688                  ndFirst = find_GlobalTreeNode( giTree, ea );
   1689                  ndLast  = find_GlobalTreeNode( giTree, ea+szB-1 );
   1690                  /* if both ends of the range fall inside a block,
   1691                     they can't be in the same block. */
   1692                  if (ndFirst && ndLast)
   1693                     tl_assert(ndFirst != ndLast);
   1694                  /* for each end of the range, if it is in a block,
   1695                     the range as a whole can't be entirely within the
   1696                     block. */
   1697                  if (ndFirst)
   1698                     tl_assert(!is_subinterval_of(ndFirst->addr,
   1699                                                  ndFirst->szB, ea, szB));
   1700                  if (ndLast)
   1701                     tl_assert(!is_subinterval_of(ndLast->addr,
   1702                                                  ndLast->szB, ea, szB));
   1703               }
   1704               if (0) VG_(printf)("overlapping blocks in cache\n");
   1705               return;
   1706            }
   1707            sMin = sLB == &sNegInf  ? 0         : (sLB->addr + sLB->szB);
   1708            sMax = sUB == &sPosInf  ? ~(UWord)0 : (sUB->addr - 1);
   1709            gMin = gLB == &gNegInf  ? 0         : (gLB->addr + gLB->szB);
   1710            gMax = gUB == &gPosInf  ? ~(UWord)0 : (gUB->addr - 1);
   1711            if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
   1712                               sMin, sMax, gMin, gMax);
   1713            /* [sMin,sMax] and [gMin,gMax] must both contain
   1714               [ea,ea+szB) (right?)  That implies they must overlap at
   1715               at least over [ea,ea+szB). */
   1716            tl_assert(sMin <= ea && ea+szB-1 <= sMax);
   1717            tl_assert(gMin <= ea && ea+szB-1 <= gMax);
   1718            /* So now compute their intersection. */
   1719            uMin = Addr__max( sMin, gMin );
   1720            uMax = Addr__min( sMax, gMax );
   1721            if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax);
   1722            tl_assert(uMin <= uMax);
   1723            tl_assert(uMin <= ea && ea+szB-1 <= uMax);
   1724            /* Finally, we can park [uMin,uMax] in the cache.  However,
   1725               if uMax is ~0, we can't represent the difference; hence
   1726               fudge uMax. */
   1727            if (uMin < uMax && uMax == ~(UWord)0)
   1728               uMax--;
   1729            toadd_addr = uMin;
   1730            toadd_szB  = uMax - uMin + 1;
   1731            break;
   1732         }
   1733         default:
   1734            /* We should only be caching info for the above 3 cases */
   1735           tl_assert(0);
   1736      } /* switch (inv->tag) */
   1737 
   1738      { /* and actually add this to the cache, finally */
   1739        Word i;
   1740        Word ip = cache->nInUse / 2; /* doesn't seem critical */
   1741 
   1742        if (cache->nInUse < N_QCACHE)
   1743           cache->nInUse++;
   1744        for (i = cache->nInUse-1; i > ip; i--) {
   1745           cache->elems[i] = cache->elems[i-1];
   1746        }
   1747 
   1748        tl_assert(toadd_szB > 0);
   1749        cache->elems[ip].addr = toadd_addr;
   1750        cache->elems[ip].szB  = toadd_szB;
   1751        cache->elems[ip].inv  = *inv;
   1752      }
   1753 
   1754      if (show) QCache__pp(cache, "after upd");
   1755 
   1756    }
   1757 }
   1758 
   1759 
   1760 /* CALLED FROM GENERATED CODE */
   1761 static
   1762 VG_REGPARM(3)
   1763 void helperc__mem_access ( /* Known only at run time: */
   1764                            Addr ea, Addr sp, Addr fp,
   1765                            /* Known at translation time: */
   1766                            Word sszB, Addr ip, XArray* ip_frameBlocks )
   1767 {
   1768    UWord szB;
   1769    IInstance* iinstance;
   1770    Invar* inv;
   1771    Invar new_inv;
   1772    ThreadId tid = VG_(get_running_tid)();
   1773    StackFrame* frame;
   1774    HChar bufE[160], bufA[160], bufD[32];
   1775 
   1776    stats__total_accesses++;
   1777 
   1778    tl_assert(is_sane_TId(tid));
   1779    frame = shadowStacks[tid];
   1780    tl_assert(frame);
   1781 
   1782    /* Find the instance info for this instruction. */
   1783    tl_assert(ip_frameBlocks);
   1784    iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks );
   1785    tl_assert(iinstance);
   1786    tl_assert(iinstance->blocks == ip_frameBlocks);
   1787 
   1788    szB = (sszB < 0) ? (-sszB) : sszB;
   1789    tl_assert(szB > 0);
   1790 
   1791    inv = &iinstance->invar;
   1792 
   1793    /* Deal with first uses of instruction instances. */
   1794    if (inv->tag == Inv_Unset) {
   1795       /* This is the first use of this instance of the instruction, so
   1796          we can't make any check; we merely record what we saw, so we
   1797          can compare it against what happens for 2nd and subsequent
   1798          accesses. */
   1799       classify_address( inv,
   1800                         tid, ea, sp, fp, szB, iinstance->blocks );
   1801       tl_assert(inv->tag != Inv_Unset);
   1802       return;
   1803    }
   1804 
   1805    /* So generate an Invar and see if it's different from what
   1806       we had before. */
   1807    classify_address( &new_inv,
   1808                      tid, ea, sp, fp, szB, iinstance->blocks );
   1809    tl_assert(new_inv.tag != Inv_Unset);
   1810 
   1811    /* Did we see something different from before?  If no, then there's
   1812       no error. */
   1813    if (LIKELY(eq_Invar(&new_inv, inv)))
   1814       return;
   1815 
   1816    tl_assert(inv->tag != Inv_Unset);
   1817 
   1818    VG_(memset)(bufE, 0, sizeof(bufE));
   1819    show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth );
   1820 
   1821    VG_(memset)(bufA, 0, sizeof(bufA));
   1822    show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth );
   1823 
   1824    VG_(memset)(bufD, 0, sizeof(bufD));
   1825    UWord absDelta;
   1826    gen_delta_str( bufD, &absDelta, inv, ea );
   1827 
   1828    if (absDelta < 1024)
   1829       sg_record_error_SorG( tid, ea, sszB, bufE, bufA, bufD );
   1830 
   1831    /* And now install the new observation as "standard", so as to
   1832       make future error messages make more sense. */
   1833    *inv = new_inv;
   1834 }
   1835 
   1836 
   1837 ////////////////////////////////////////
   1838 /* Primary push-a-new-frame routine.  Called indirectly from
   1839    generated code. */
   1840 
   1841 static UWord stats__max_sitree_size = 0;
   1842 static UWord stats__max_gitree_size = 0;
   1843 
   1844 static
   1845 void shadowStack_new_frame ( ThreadId tid,
   1846                              Addr     sp_at_call_insn,
   1847                              Addr     sp_post_call_insn,
   1848                              Addr     fp_at_call_insn,
   1849                              Addr     ip_post_call_insn,
   1850                              XArray*  descrs_at_call_insn )
   1851 {
   1852    StackFrame *callee, *caller;
   1853    tl_assert(is_sane_TId(tid));
   1854 
   1855    caller = shadowStacks[tid];
   1856    tl_assert(caller);
   1857 
   1858    if (caller->outer) { /* "this is not the outermost frame" */
   1859       tl_assert(caller->outer->inner == caller);
   1860       tl_assert(caller->outer->depth >= 0);
   1861       tl_assert(1 + caller->outer->depth == caller->depth);
   1862    } else {
   1863       tl_assert(caller->depth == 0);
   1864    }
   1865 
   1866    caller->sp_at_call = sp_at_call_insn;
   1867    caller->fp_at_call = fp_at_call_insn;
   1868 
   1869    if (descrs_at_call_insn) {
   1870       tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 );
   1871       caller->blocks_added_by_call
   1872          = calculate_StackBlock_EAs( descrs_at_call_insn,
   1873                                      sp_at_call_insn, fp_at_call_insn );
   1874       if (caller->blocks_added_by_call)
   1875          add_blocks_to_StackTree( siTrees[tid],
   1876                                   descrs_at_call_insn,
   1877                                   caller->blocks_added_by_call,
   1878                                   caller->depth /* stack depth at which
   1879                                                    these blocks are
   1880                                                    considered to exist*/ );
   1881       if (1) {
   1882          UWord s  = VG_(sizeFM)( siTrees[tid] );
   1883          UWord g  = VG_(sizeFM)( giTree );
   1884          Bool  sb = s > stats__max_sitree_size;
   1885          Bool  gb = g > stats__max_gitree_size;
   1886          if (sb) stats__max_sitree_size = s;
   1887          if (gb) stats__max_gitree_size = g;
   1888          if (0 && (sb || gb))
   1889             VG_(message)(Vg_DebugMsg,
   1890                          "exp-sgcheck: new max tree sizes: "
   1891                          "StackTree %ld, GlobalTree %ld\n",
   1892                          stats__max_sitree_size, stats__max_gitree_size );
   1893       }
   1894    } else {
   1895       caller->blocks_added_by_call = NULL;
   1896    }
   1897 
   1898    /* caller->blocks_added_by_call is used again (and then freed) when
   1899       this frame is removed from the stack. */
   1900 
   1901    if (caller->inner) {
   1902       callee = caller->inner;
   1903    } else {
   1904       callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame));
   1905       VG_(memset)(callee, 0, sizeof(StackFrame));
   1906       callee->outer = caller;
   1907       caller->inner = callee;
   1908       callee->depth = 1 + caller->depth;
   1909       tl_assert(callee->inner == NULL);
   1910    }
   1911 
   1912    /* This sets up .htab, .htab_size and .htab_used */
   1913    initialise_II_hash_table( callee );
   1914 
   1915    callee->creation_sp    = sp_post_call_insn;
   1916    callee->sp_at_call     = 0; // not actually required ..
   1917    callee->fp_at_call     = 0; // .. these 3 initialisations are ..
   1918    callee->blocks_added_by_call = NULL; // .. just for cleanness
   1919 
   1920    /* record the new running stack frame */
   1921    shadowStacks[tid] = callee;
   1922 
   1923    /* and this thread's query cache is now invalid */
   1924    QCache__invalidate( &qcaches[tid] );
   1925 
   1926    if (0)
   1927    { Word d = callee->depth;
   1928      HChar fnname[80];
   1929      Bool ok;
   1930      Addr ip = ip_post_call_insn;
   1931      ok = VG_(get_fnname_w_offset)( ip, fnname, sizeof(fnname) );
   1932      while (d > 0) {
   1933         VG_(printf)(" ");
   1934         d--;
   1935      }
   1936      VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip);
   1937    }
   1938 }
   1939 
   1940 /* CALLED FROM GENERATED CODE */
   1941 static
   1942 VG_REGPARM(3)
   1943 void helperc__new_frame ( Addr sp_post_call_insn,
   1944                           Addr fp_at_call_insn,
   1945                           Addr ip_post_call_insn,
   1946                           XArray* blocks_at_call_insn,
   1947                           Word sp_adjust )
   1948 {
   1949    ThreadId tid = VG_(get_running_tid)();
   1950    Addr     sp_at_call_insn = sp_post_call_insn + sp_adjust;
   1951    shadowStack_new_frame( tid,
   1952                           sp_at_call_insn,
   1953                           sp_post_call_insn,
   1954                           fp_at_call_insn,
   1955                           ip_post_call_insn,
   1956                           blocks_at_call_insn );
   1957 }
   1958 
   1959 
   1960 ////////////////////////////////////////
   1961 /* Primary remove-frame(s) routine.  Called indirectly from
   1962    generated code. */
   1963 
   1964 __attribute__((noinline))
   1965 static void shadowStack_unwind ( ThreadId tid, Addr sp_now )
   1966 {
   1967    StackFrame *innermost, *innermostOrig;
   1968    tl_assert(is_sane_TId(tid));
   1969    innermost = shadowStacks[tid];
   1970    tl_assert(innermost);
   1971    innermostOrig = innermost;
   1972    //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
   1973    while (1) {
   1974       if (!innermost->outer)
   1975          break;
   1976       if (innermost->inner)
   1977          tl_assert(innermost->inner->outer == innermost);
   1978       tl_assert(innermost->outer->inner == innermost);
   1979       tl_assert(innermost->blocks_added_by_call == NULL);
   1980       if (sp_now <= innermost->creation_sp) break;
   1981       //VG_(printf)("UNWIND     dump %p\n", innermost->creation_sp);
   1982       tl_assert(innermost->htab);
   1983       if (innermost->htab != &innermost->htab_fixed[0])
   1984          sg_free(innermost->htab);
   1985       /* be on the safe side */
   1986       innermost->creation_sp = 0;
   1987       innermost->htab = NULL;
   1988       innermost->htab_size = 0;
   1989       innermost->htab_used = 0;
   1990       innermost->sp_at_call = 0;
   1991       innermost->fp_at_call = 0;
   1992       innermost->blocks_added_by_call = NULL;
   1993       innermost = innermost->outer;
   1994 
   1995       /* So now we're "back" in the calling frame.  Remove from this
   1996          thread's stack-interval-tree, the blocks added at the time of
   1997          the call. */
   1998 
   1999       if (innermost->outer) { /* not at the outermost frame */
   2000          if (innermost->blocks_added_by_call == NULL) {
   2001          } else {
   2002             del_blocks_from_StackTree( siTrees[tid],
   2003                                        innermost->blocks_added_by_call );
   2004             VG_(deleteXA)( innermost->blocks_added_by_call );
   2005             innermost->blocks_added_by_call = NULL;
   2006          }
   2007       }
   2008       /* That completes the required tidying of the interval tree
   2009          associated with the frame we just removed. */
   2010 
   2011       if (0) {
   2012          Word d = innermost->depth;
   2013          while (d > 0) {
   2014             VG_(printf)(" ");
   2015             d--;
   2016          }
   2017          VG_(printf)("X\n");
   2018       }
   2019 
   2020    }
   2021 
   2022    tl_assert(innermost);
   2023 
   2024    if (innermost != innermostOrig) {
   2025       shadowStacks[tid] = innermost;
   2026       /* this thread's query cache is now invalid */
   2027       QCache__invalidate( &qcaches[tid] );
   2028    }
   2029 }
   2030 
   2031 
   2032 
   2033 //////////////////////////////////////////////////////////////
   2034 //                                                          //
   2035 // Instrumentation                                          //
   2036 //                                                          //
   2037 //////////////////////////////////////////////////////////////
   2038 
   2039 /* What does instrumentation need to do?
   2040 
   2041    - at each Call transfer, generate a call to shadowStack_new_frame
   2042      do this by manually inspecting the IR
   2043 
   2044    - at each sp change, if the sp change is negative,
   2045      call shadowStack_unwind
   2046      do this by asking for SP-change analysis
   2047 
   2048    - for each memory referencing instruction,
   2049      call helperc__mem_access
   2050 */
   2051 
   2052 /* A complication: sg_ instrumentation and h_ instrumentation need to
   2053    be interleaved.  Since the latter is a lot more complex than the
   2054    former, we split the sg_ instrumentation here into four functions
   2055    and let the h_ instrumenter call the four functions as it goes.
   2056    Hence the h_ instrumenter drives the sg_ instrumenter.
   2057 
   2058    To make this viable, the sg_ instrumenter carries what running
   2059    state it needs in 'struct _SGEnv'.  This is exported only
   2060    abstractly from this file.
   2061 */
   2062 
   2063 struct _SGEnv {
   2064    /* the current insn's IP */
   2065    Addr64 curr_IP;
   2066    /* whether the above is actually known */
   2067    Bool curr_IP_known;
   2068    /* if we find a mem ref, is it the first for this insn?  Used for
   2069       detecting insns which make more than one memory ref, a situation
   2070       we basically can't really handle properly; and so we ignore all
   2071       but the first ref. */
   2072    Bool firstRef;
   2073    /* READONLY */
   2074    IRTemp (*newIRTemp_cb)(IRType,void*);
   2075    void* newIRTemp_opaque;
   2076 };
   2077 
   2078 
   2079 /* --- Helper fns for instrumentation --- */
   2080 
   2081 static IRTemp gen_Get_SP ( struct _SGEnv*  sge,
   2082                            IRSB*           bbOut,
   2083                            VexGuestLayout* layout,
   2084                            Int             hWordTy_szB )
   2085 {
   2086    IRExpr* sp_expr;
   2087    IRTemp  sp_temp;
   2088    IRType  sp_type;
   2089    /* This in effect forces the host and guest word sizes to be the
   2090       same. */
   2091    tl_assert(hWordTy_szB == layout->sizeof_SP);
   2092    sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
   2093    sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
   2094    sp_temp = sge->newIRTemp_cb( sp_type, sge->newIRTemp_opaque );
   2095    addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
   2096    return sp_temp;
   2097 }
   2098 
   2099 static IRTemp gen_Get_FP ( struct _SGEnv*  sge,
   2100                            IRSB*           bbOut,
   2101                            VexGuestLayout* layout,
   2102                            Int             hWordTy_szB )
   2103 {
   2104    IRExpr* fp_expr;
   2105    IRTemp  fp_temp;
   2106    IRType  fp_type;
   2107    /* This in effect forces the host and guest word sizes to be the
   2108       same. */
   2109    tl_assert(hWordTy_szB == layout->sizeof_SP);
   2110    fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32;
   2111    fp_expr = IRExpr_Get( layout->offset_FP, fp_type );
   2112    fp_temp = sge->newIRTemp_cb( fp_type, sge->newIRTemp_opaque );
   2113    addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) );
   2114    return fp_temp;
   2115 }
   2116 
   2117 static void instrument_mem_access ( struct _SGEnv* sge,
   2118                                     IRSB*   bbOut,
   2119                                     IRExpr* addr,
   2120                                     Int     szB,
   2121                                     Bool    isStore,
   2122                                     Int     hWordTy_szB,
   2123                                     Addr    curr_IP,
   2124                                     VexGuestLayout* layout )
   2125 {
   2126    IRType  tyAddr      = Ity_INVALID;
   2127    XArray* frameBlocks = NULL;
   2128 
   2129    tl_assert(isIRAtom(addr));
   2130    tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
   2131 
   2132    tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
   2133    tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
   2134 
   2135 #if defined(VGA_x86)
   2136    { UChar* p = (UChar*)curr_IP;
   2137      // pop %ebp; RET
   2138      if (p[0] == 0xc3 && p[-1] == 0x5d) return;
   2139      // pop %ebp; RET $imm16
   2140      if (p[0] == 0xc2 && p[-1] == 0x5d) return;
   2141      // PUSH %EBP; mov %esp,%ebp
   2142      if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return;
   2143    }
   2144 #endif
   2145 
   2146    /* First off, find or create the StackBlocks for this instruction. */
   2147    frameBlocks = get_StackBlocks_for_IP( curr_IP );
   2148    tl_assert(frameBlocks);
   2149    //if (VG_(sizeXA)( frameBlocks ) == 0)
   2150    //   frameBlocks = NULL;
   2151 
   2152    /* Generate a call to "helperc__mem_access", passing:
   2153          addr current_SP current_FP szB curr_IP frameBlocks
   2154    */
   2155    { IRTemp t_SP = gen_Get_SP( sge, bbOut, layout, hWordTy_szB );
   2156      IRTemp t_FP = gen_Get_FP( sge, bbOut, layout, hWordTy_szB );
   2157      IRExpr** args
   2158         = mkIRExprVec_6( addr,
   2159                          IRExpr_RdTmp(t_SP),
   2160                          IRExpr_RdTmp(t_FP),
   2161                          mkIRExpr_HWord( isStore ? (-szB) : szB ),
   2162                          mkIRExpr_HWord( curr_IP ),
   2163                          mkIRExpr_HWord( (HWord)frameBlocks ) );
   2164      IRDirty* di
   2165         = unsafeIRDirty_0_N( 3/*regparms*/,
   2166                              "helperc__mem_access",
   2167                             VG_(fnptr_to_fnentry)( &helperc__mem_access ),
   2168                              args );
   2169 
   2170      addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
   2171    }
   2172 }
   2173 
   2174 
   2175 /* --- Instrumentation main (4 fns) --- */
   2176 
   2177 struct _SGEnv * sg_instrument_init ( IRTemp (*newIRTemp_cb)(IRType,void*),
   2178                                      void* newIRTemp_opaque )
   2179 {
   2180    struct _SGEnv * env = sg_malloc("di.sg_main.sii.1",
   2181                                    sizeof(struct _SGEnv));
   2182    tl_assert(env);
   2183    env->curr_IP          = 0;
   2184    env->curr_IP_known    = False;
   2185    env->firstRef         = True;
   2186    env->newIRTemp_cb     = newIRTemp_cb;
   2187    env->newIRTemp_opaque = newIRTemp_opaque;
   2188    return env;
   2189 }
   2190 
   2191 void sg_instrument_fini ( struct _SGEnv * env )
   2192 {
   2193    sg_free(env);
   2194 }
   2195 
   2196 /* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
   2197    as required.  This must be called before 'st' itself is added to
   2198    'sbOut'. */
   2199 void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env,
   2200                             /*MOD*/IRSB* sbOut,
   2201                             IRStmt* st,
   2202                             VexGuestLayout* layout,
   2203                             IRType gWordTy, IRType hWordTy )
   2204 {
   2205    if (!sg_clo_enable_sg_checks)
   2206       return;
   2207 
   2208    tl_assert(st);
   2209    tl_assert(isFlatIRStmt(st));
   2210    switch (st->tag) {
   2211       case Ist_NoOp:
   2212       case Ist_AbiHint:
   2213       case Ist_Put:
   2214       case Ist_PutI:
   2215       case Ist_MBE:
   2216          /* None of these can contain any memory references. */
   2217          break;
   2218 
   2219       case Ist_Exit:
   2220          tl_assert(st->Ist.Exit.jk != Ijk_Call);
   2221          /* else we must deal with a conditional call */
   2222          break;
   2223 
   2224       case Ist_IMark:
   2225          env->curr_IP_known = True;
   2226          env->curr_IP       = (Addr)st->Ist.IMark.addr;
   2227          env->firstRef      = True;
   2228          break;
   2229 
   2230       case Ist_Store:
   2231          tl_assert(env->curr_IP_known);
   2232          if (env->firstRef) {
   2233             instrument_mem_access(
   2234                env, sbOut,
   2235                st->Ist.Store.addr,
   2236                sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)),
   2237                True/*isStore*/,
   2238                sizeofIRType(hWordTy),
   2239                env->curr_IP, layout
   2240             );
   2241             env->firstRef = False;
   2242          }
   2243          break;
   2244 
   2245       case Ist_WrTmp: {
   2246          IRExpr* data = st->Ist.WrTmp.data;
   2247          if (data->tag == Iex_Load) {
   2248             tl_assert(env->curr_IP_known);
   2249             if (env->firstRef) {
   2250                instrument_mem_access(
   2251                   env, sbOut,
   2252                   data->Iex.Load.addr,
   2253                   sizeofIRType(data->Iex.Load.ty),
   2254                   False/*!isStore*/,
   2255                   sizeofIRType(hWordTy),
   2256                   env->curr_IP, layout
   2257                );
   2258                env->firstRef = False;
   2259             }
   2260          }
   2261          break;
   2262       }
   2263 
   2264       case Ist_Dirty: {
   2265          Int      dataSize;
   2266          IRDirty* d = st->Ist.Dirty.details;
   2267          if (d->mFx != Ifx_None) {
   2268             /* This dirty helper accesses memory.  Collect the
   2269                details. */
   2270             tl_assert(env->curr_IP_known);
   2271             if (env->firstRef) {
   2272                tl_assert(d->mAddr != NULL);
   2273                tl_assert(d->mSize != 0);
   2274                dataSize = d->mSize;
   2275                if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
   2276                   instrument_mem_access(
   2277                      env, sbOut, d->mAddr, dataSize, False/*!isStore*/,
   2278                      sizeofIRType(hWordTy), env->curr_IP, layout
   2279                   );
   2280                }
   2281                if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
   2282                   instrument_mem_access(
   2283                      env, sbOut, d->mAddr, dataSize, True/*isStore*/,
   2284                      sizeofIRType(hWordTy), env->curr_IP, layout
   2285                   );
   2286                }
   2287                env->firstRef = False;
   2288             }
   2289          } else {
   2290             tl_assert(d->mAddr == NULL);
   2291             tl_assert(d->mSize == 0);
   2292          }
   2293          break;
   2294       }
   2295 
   2296       case Ist_CAS: {
   2297          /* We treat it as a read and a write of the location.  I
   2298             think that is the same behaviour as it was before IRCAS
   2299             was introduced, since prior to that point, the Vex front
   2300             ends would translate a lock-prefixed instruction into a
   2301             (normal) read followed by a (normal) write. */
   2302          if (env->firstRef) {
   2303             Int    dataSize;
   2304             IRCAS* cas = st->Ist.CAS.details;
   2305             tl_assert(cas->addr != NULL);
   2306             tl_assert(cas->dataLo != NULL);
   2307             dataSize = sizeofIRType(typeOfIRExpr(sbOut->tyenv, cas->dataLo));
   2308             if (cas->dataHi != NULL)
   2309                dataSize *= 2; /* since it's a doubleword-CAS */
   2310             instrument_mem_access(
   2311                env, sbOut, cas->addr, dataSize, False/*!isStore*/,
   2312                sizeofIRType(hWordTy), env->curr_IP, layout
   2313             );
   2314             instrument_mem_access(
   2315                env, sbOut, cas->addr, dataSize, True/*isStore*/,
   2316                sizeofIRType(hWordTy), env->curr_IP, layout
   2317             );
   2318             env->firstRef = False;
   2319          }
   2320          break;
   2321       }
   2322 
   2323       default:
   2324          tl_assert(0);
   2325 
   2326    } /* switch (st->tag) */
   2327 }
   2328 
   2329 
   2330 /* Add instrumentation for the final jump of an IRSB 'sbOut', and
   2331    possibly modify 'env' as required.  This must be the last
   2332    instrumentation statement in the block. */
   2333 void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env,
   2334                                 /*MOD*/IRSB* sbOut,
   2335                                 IRExpr* next,
   2336                                 IRJumpKind jumpkind,
   2337                                 VexGuestLayout* layout,
   2338                                 IRType gWordTy, IRType hWordTy )
   2339 {
   2340    if (!sg_clo_enable_sg_checks)
   2341       return;
   2342 
   2343    if (jumpkind == Ijk_Call) {
   2344       // Assumes x86 or amd64
   2345       IRTemp   sp_post_call_insn, fp_post_call_insn;
   2346       XArray*  frameBlocks;
   2347       IRExpr** args;
   2348       IRDirty* di;
   2349       sp_post_call_insn
   2350          = gen_Get_SP( env, sbOut, layout, sizeofIRType(hWordTy) );
   2351       fp_post_call_insn
   2352          = gen_Get_FP( env, sbOut, layout, sizeofIRType(hWordTy) );
   2353       tl_assert(env->curr_IP_known);
   2354       frameBlocks = get_StackBlocks_for_IP( env->curr_IP );
   2355       tl_assert(frameBlocks);
   2356       if (VG_(sizeXA)(frameBlocks) == 0)
   2357          frameBlocks = NULL;
   2358       args
   2359          = mkIRExprVec_5(
   2360               IRExpr_RdTmp(sp_post_call_insn),
   2361               IRExpr_RdTmp(fp_post_call_insn),
   2362                          /* assume the call doesn't change FP */
   2363               next,
   2364               mkIRExpr_HWord( (HWord)frameBlocks ),
   2365               mkIRExpr_HWord( sizeofIRType(gWordTy) )
   2366            );
   2367       di = unsafeIRDirty_0_N(
   2368               3/*regparms*/,
   2369               "helperc__new_frame",
   2370               VG_(fnptr_to_fnentry)( &helperc__new_frame ),
   2371               args );
   2372       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
   2373    }
   2374 }
   2375 
   2376 
   2377 //////////////////////////////////////////////////////////////
   2378 //                                                          //
   2379 // end Instrumentation                                      //
   2380 //                                                          //
   2381 //////////////////////////////////////////////////////////////
   2382 
   2383 
   2384 //////////////////////////////////////////////////////////////
   2385 //                                                          //
   2386 // misc                                                     //
   2387 //                                                          //
   2388 //////////////////////////////////////////////////////////////
   2389 
   2390 /* Make a new empty stack frame that is suitable for being the
   2391    outermost frame in a stack.  It has a creation_sp of effectively
   2392    infinity, so it can never be removed. */
   2393 static StackFrame* new_root_StackFrame ( void )
   2394 {
   2395    StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame));
   2396    VG_(memset)( sframe, 0, sizeof(*sframe) );
   2397    sframe->creation_sp = ~0UL;
   2398 
   2399    /* This sets up .htab, .htab_size and .htab_used */
   2400    initialise_II_hash_table( sframe );
   2401 
   2402    /* ->depth, ->outer, ->inner are 0, NULL, NULL */
   2403 
   2404    return sframe;
   2405 }
   2406 
   2407 /* Primary routine for setting up the shadow stack for a new thread.
   2408    Note that this is used to create not only child thread stacks, but
   2409    the root thread's stack too.  We create a new stack with
   2410    .creation_sp set to infinity, so that the outermost frame can never
   2411    be removed (by shadowStack_unwind).  The core calls this function
   2412    as soon as a thread is created.  We cannot yet get its SP value,
   2413    since that may not yet be set. */
   2414 static void shadowStack_thread_create ( ThreadId parent, ThreadId child )
   2415 {
   2416    tl_assert(is_sane_TId(child));
   2417    if (parent == VG_INVALID_THREADID) {
   2418       /* creating the main thread's stack */
   2419    } else {
   2420       tl_assert(is_sane_TId(parent));
   2421       tl_assert(parent != child);
   2422       tl_assert(shadowStacks[parent] != NULL);
   2423       tl_assert(siTrees[parent] != NULL);
   2424    }
   2425 
   2426    /* Create the child's stack.  Bear in mind we may be re-using
   2427       it. */
   2428    if (shadowStacks[child] == NULL) {
   2429       /* First use of this stack.  Just allocate an initial frame. */
   2430       tl_assert(siTrees[child] == NULL);
   2431    } else {
   2432       StackFrame *frame, *frame2;
   2433       /* re-using a stack. */
   2434       /* get rid of the interval tree */
   2435       tl_assert(siTrees[child] != NULL);
   2436       delete_StackTree( siTrees[child] );
   2437       siTrees[child] = NULL;
   2438       /* Throw away all existing frames. */
   2439       frame = shadowStacks[child];
   2440       while (frame->outer)
   2441          frame = frame->outer;
   2442       tl_assert(frame->depth == 0);
   2443       while (frame) {
   2444          frame2 = frame->inner;
   2445          if (frame2) tl_assert(1 + frame->depth == frame2->depth);
   2446          sg_free(frame);
   2447          frame = frame2;
   2448       }
   2449       shadowStacks[child] = NULL;
   2450    }
   2451 
   2452    tl_assert(shadowStacks[child] == NULL);
   2453    tl_assert(siTrees[child] == NULL);
   2454 
   2455    /* Set up the initial stack frame. */
   2456    shadowStacks[child] = new_root_StackFrame();
   2457 
   2458    /* and set up the child's stack block interval tree. */
   2459    siTrees[child] = new_StackTree();
   2460 }
   2461 
   2462 /* Once a thread is ready to go, the core calls here.  We take the
   2463    opportunity to push a second frame on its stack, with the
   2464    presumably valid SP value that is going to be used for the thread's
   2465    startup.  Hence we should always wind up with a valid outermost
   2466    frame for the thread. */
   2467 static void shadowStack_set_initial_SP ( ThreadId tid )
   2468 {
   2469    StackFrame* sf;
   2470    tl_assert(is_sane_TId(tid));
   2471    sf = shadowStacks[tid];
   2472    tl_assert(sf != NULL);
   2473    tl_assert(sf->outer == NULL);
   2474    tl_assert(sf->inner == NULL);
   2475    tl_assert(sf->creation_sp == ~0UL);
   2476    shadowStack_new_frame( tid, 0, VG_(get_SP)(tid),
   2477                                0, VG_(get_IP)(tid), NULL );
   2478 }
   2479 
   2480 
   2481 //////////////////////////////////////////////////////////////
   2482 //                                                          //
   2483 // main-ish                                                 //
   2484 //                                                          //
   2485 //////////////////////////////////////////////////////////////
   2486 
   2487 /* CALLED indirectly FROM GENERATED CODE.  Calls here are created by
   2488    sp-change analysis, as requested in pc_pre_clo_int(). */
   2489 void sg_die_mem_stack ( Addr old_SP, SizeT len ) {
   2490    ThreadId  tid = VG_(get_running_tid)();
   2491    shadowStack_unwind( tid, old_SP+len );
   2492 }
   2493 
   2494 void sg_pre_clo_init ( void ) {
   2495    ourGlobals_init();
   2496    init_StackBlocks_set();
   2497    init_GlobalBlock_set();
   2498 }
   2499 
   2500 void sg_post_clo_init ( void ) {
   2501 }
   2502 
   2503 void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
   2504    shadowStack_thread_create(parent, child);
   2505 }
   2506 
   2507 void sg_pre_thread_first_insn ( ThreadId tid ) {
   2508    shadowStack_set_initial_SP(tid);
   2509 }
   2510 
   2511 void sg_fini(Int exitcode)
   2512 {
   2513    if (VG_(clo_stats)) {
   2514       VG_(message)(Vg_DebugMsg,
   2515          " sg_:  %'llu total accesses, of which:\n", stats__total_accesses);
   2516       VG_(message)(Vg_DebugMsg,
   2517          " sg_:     stack0: %'12llu classify\n",
   2518          stats__classify_Stack0);
   2519       VG_(message)(Vg_DebugMsg,
   2520          " sg_:     stackN: %'12llu classify\n",
   2521          stats__classify_StackN);
   2522       VG_(message)(Vg_DebugMsg,
   2523          " sg_:     global: %'12llu classify\n",
   2524          stats__classify_Global);
   2525       VG_(message)(Vg_DebugMsg,
   2526          " sg_:    unknown: %'12llu classify\n",
   2527          stats__classify_Unknown);
   2528       VG_(message)(Vg_DebugMsg,
   2529          " sg_:  %'llu Invars preened, of which %'llu changed\n",
   2530          stats__Invars_preened, stats__Invars_changed);
   2531       VG_(message)(Vg_DebugMsg,
   2532          " sg_:   t_i_b_MT: %'12llu\n", stats__t_i_b_empty);
   2533       VG_(message)(Vg_DebugMsg,
   2534          " sg_:     qcache: %'llu searches, %'llu probes, %'llu misses\n",
   2535          stats__qcache_queries, stats__qcache_probes, stats__qcache_misses);
   2536       VG_(message)(Vg_DebugMsg,
   2537          " sg_:  htab-fast: %'llu hits\n",
   2538          stats__htab_fast);
   2539       VG_(message)(Vg_DebugMsg,
   2540          " sg_:  htab-slow: %'llu searches, %'llu probes, %'llu resizes\n",
   2541          stats__htab_searches, stats__htab_probes, stats__htab_resizes);
   2542    }
   2543 }
   2544 
   2545 
   2546 /*--------------------------------------------------------------------*/
   2547 /*--- end                                                sg_main.c ---*/
   2548 /*--------------------------------------------------------------------*/
   2549