Home | History | Annotate | Download | only in coregrind
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Take snapshots of client stacks.              m_stacktrace.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2015 Julian Seward
     11       jseward (at) acm.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #include "pub_core_basics.h"
     32 #include "pub_core_vki.h"
     33 #include "pub_core_threadstate.h"
     34 #include "pub_core_debuginfo.h"     // XXX: circular dependency
     35 #include "pub_core_aspacemgr.h"     // For VG_(is_addressable)()
     36 #include "pub_core_libcbase.h"
     37 #include "pub_core_libcassert.h"
     38 #include "pub_core_libcprint.h"
     39 #include "pub_core_machine.h"
     40 #include "pub_core_options.h"
     41 #include "pub_core_stacks.h"        // VG_(stack_limits)
     42 #include "pub_core_stacktrace.h"
     43 #include "pub_core_syswrap.h"       // VG_(is_in_syscall)
     44 #include "pub_core_xarray.h"
     45 #include "pub_core_clientstate.h"   // VG_(client__dl_sysinfo_int80)
     46 #include "pub_core_trampoline.h"
     47 
     48 
     49 /*------------------------------------------------------------*/
     50 /*---                                                      ---*/
     51 /*--- BEGIN platform-dependent unwinder worker functions   ---*/
     52 /*---                                                      ---*/
     53 /*------------------------------------------------------------*/
     54 
     55 /* Take a snapshot of the client's stack, putting up to 'max_n_ips'
     56    IPs into 'ips'.  In order to be thread-safe, we pass in the
     57    thread's IP SP, FP if that's meaningful, and LR if that's
     58    meaningful.  Returns number of IPs put in 'ips'.
     59 
     60    If you know what the thread ID for this stack is, send that as the
     61    first parameter, else send zero.  This helps generate better stack
     62    traces on ppc64-linux and has no effect on other platforms.
     63 */
     64 
     65 /* Do frame merging in the _i frames in _ips array of recursive cycles
     66    of up to _nframes.  The merge is done during stack unwinding
     67    (i.e. in platform specific unwinders) to collect as many
     68    "interesting" stack traces as possible. */
     69 #define RECURSIVE_MERGE(_nframes,_ips,_i) if (UNLIKELY(_nframes > 0)) \
     70 do {                                                                  \
     71    Int dist;                                                          \
     72    for (dist = 1; dist <= _nframes && dist < (Int)_i; dist++) {       \
     73       if (_ips[_i-1] == _ips[_i-1-dist]) {                            \
     74          _i = _i - dist;                                              \
     75          break;                                                       \
     76       }                                                               \
     77    }                                                                  \
     78 } while (0)
     79 
     80 /* Note about calculation of fp_min : fp_min is the lowest address
     81    which can be accessed during unwinding. This is SP - VG_STACK_REDZONE_SZB.
     82    On most platforms, this will be equal to SP (as VG_STACK_REDZONE_SZB
     83    is 0). However, on some platforms (e.g. amd64), there is an accessible
     84    redzone below the SP. Some CFI unwind info are generated, taking this
     85    into account. As an example, the following is a CFI unwind info on
     86    amd64 found for a 'retq' instruction:
     87 [0x400f7e .. 0x400f7e]: let cfa=oldSP+8 in RA=*(cfa+-8) SP=cfa+0 BP=*(cfa+-16)
     88   0x400f7e: retq
     89   As you can see, the previous BP is found 16 bytes below the cfa, which
     90   is the oldSP+8. So, effectively, the BP is found 8 bytes below the SP.
     91   The fp_min must take this into account, otherwise, VG_(use_CF_info) will
     92   not unwind the BP. */
     93 
     94 /* ------------------------ x86 ------------------------- */
     95 
     96 #if defined(VGP_x86_linux) || defined(VGP_x86_darwin) \
     97     || defined(VGP_x86_solaris)
     98 
     99 #define N_FP_CF_VERIF 1021
    100 // prime number so that size of fp_CF_verif is just below 4K or 8K
    101 // Note that this prime nr differs from the one chosen in
    102 // m_debuginfo/debuginfo.c for the cfsi cache : in case we have
    103 // a collision here between two IPs, we expect to not (often) have the
    104 // same collision in the cfsi cache (and vice-versa).
    105 
    106 // unwinding with fp chain is ok:
    107 #define FPUNWIND 0
    108 // there is no CFI info for this IP:
    109 #define NOINFO   1
    110 // Unwind with FP is not ok, must use CF unwind:
    111 #define CFUNWIND 2
    112 
    113 static Addr fp_CF_verif_cache [N_FP_CF_VERIF];
    114 
    115 /* An unwind done by following the fp chain technique can be incorrect
    116    as not all frames are respecting the standard bp/sp ABI.
    117    The CF information is now generated by default by gcc
    118    (as part of the dwarf info). However, unwinding using CF information
    119    is significantly slower : a slowdown of 20% has been observed
    120    on an helgrind test case.
    121    So, by default, the unwinding will be done using the fp chain.
    122    But before accepting to unwind an IP with fp_chain, the result
    123    of the unwind will be checked with the CF information.
    124    This check can give 3 results:
    125      FPUNWIND (0): there is CF info, and it gives the same result as fp unwind.
    126        => it is assumed that future unwind for this IP can be done
    127           with the fast fp chain, without further CF checking
    128      NOINFO   (1): there is no CF info (so, fp unwind is the only do-able thing)
    129      CFUNWIND (2): there is CF info, but unwind result differs.
    130        => it is assumed that future unwind for this IP must be done
    131        with the CF info.
    132    Of course, if each fp unwind implies a check done with a CF unwind,
    133    it would just be slower => we cache the check result in an
    134    array of checked Addr.
    135    The check for an IP will be stored at
    136     fp_CF_verif_cache[IP % N_FP_CF_VERIF] as one of:
    137                      IP ^ FPUNWIND
    138                      IP ^ NOINFO
    139                      IP ^ CFUNWIND
    140 
    141    Note: we can re-use the last (ROUNDDOWN (log (N_FP_CF_VERIF))) bits
    142    to store the check result, as they are guaranteed to be non significant
    143    in the comparison between 2 IPs stored in fp_CF_verif_cache).
    144    In other words, if two IPs are only differing on the last 2 bits,
    145    then they will not land in the same cache bucket.
    146 */
    147 
    148 /* cached result of VG_(FPO_info_present)(). Refreshed each time
    149    the fp_CF_verif_generation is different of the current debuginfo
    150    generation. */
    151 static Bool FPO_info_present = False;
    152 
    153 static UInt fp_CF_verif_generation = 0;
    154 // Our cache has to be maintained in sync with the CFI cache.
    155 // Each time the debuginfo is changed, its generation will be incremented.
    156 // We will clear our cache when our saved generation differs from
    157 // the debuginfo generation.
    158 
    159 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
    160                                /*OUT*/Addr* ips, UInt max_n_ips,
    161                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
    162                                const UnwindStartRegs* startRegs,
    163                                Addr fp_max_orig )
    164 {
    165    const Bool do_stats = False; // compute and output some stats regularly.
    166    static struct {
    167       UInt nr; // nr of stacktraces computed
    168       UInt nf; // nr of frames computed
    169       UInt Ca; // unwind for which cache indicates CFUnwind must be used.
    170       UInt FF; // unwind for which cache indicates FPUnwind can be used.
    171       UInt Cf; // unwind at end of stack+store CFUNWIND (xip not end of stack).
    172       UInt Fw; // unwind at end of stack+store FPUNWIND
    173       UInt FO; // unwind + store FPUNWIND
    174       UInt CF; // unwind + store CFUNWIND. Details below.
    175       UInt xi; UInt xs; UInt xb; // register(s) which caused a 'store CFUNWIND'.
    176       UInt Ck; // unwind fp invalid+store FPUNWIND
    177       UInt MS; // microsoft unwind
    178    } stats;
    179 
    180    const Bool   debug = False;
    181    //                 = VG_(debugLog_getLevel) () > 3;
    182    //                 = True;
    183    //                 = stats.nr >= 123456;
    184    const HChar* unwind_case; // used when debug is True.
    185    // Debugging this function is not straightforward.
    186    // Here is the easiest way I have found:
    187    // 1. Change the above to True.
    188    // 2. Start your program under Valgrind with --tool=none --vgdb-error=0
    189    // 3. Use GDB/vgdb to put a breakpoint where you want to debug the stacktrace
    190    // 4. Continue till breakpoint is encountered
    191    // 5. From GDB, use 'monitor v.info scheduler' and examine the unwind traces.
    192    //    You might have to do twice 'monitor v.info scheduler' to see
    193    //    the effect of caching the results of the verification.
    194    //    You can also modify the debug dynamically using by using
    195    //    'monitor v.set debuglog 4.
    196 
    197    Int   i;
    198    Addr  fp_max;
    199    UInt  n_found = 0;
    200    const Int cmrf = VG_(clo_merge_recursive_frames);
    201 
    202    vg_assert(sizeof(Addr) == sizeof(UWord));
    203    vg_assert(sizeof(Addr) == sizeof(void*));
    204 
    205    D3UnwindRegs fpverif_uregs; // result of CF unwind for a check reason.
    206    Addr xip_verified = 0; // xip for which we have calculated fpverif_uregs
    207    // 0 assigned to silence false positive -Wuninitialized warning
    208    // This is a false positive as xip_verified is assigned when
    209    // xip_verif > CFUNWIND and only used if xip_verif > CFUNWIND.
    210 
    211    D3UnwindRegs uregs;
    212    uregs.xip = (Addr)startRegs->r_pc;
    213    uregs.xsp = (Addr)startRegs->r_sp;
    214    uregs.xbp = startRegs->misc.X86.r_ebp;
    215    Addr fp_min = uregs.xsp - VG_STACK_REDZONE_SZB;
    216 
    217    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
    218       stopping when the trail goes cold, which we guess to be
    219       when FP is not a reasonable stack location. */
    220 
    221    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
    222    // current page, at least.  Dunno if it helps.
    223    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
    224    fp_max = VG_PGROUNDUP(fp_max_orig);
    225    if (fp_max >= sizeof(Addr))
    226       fp_max -= sizeof(Addr);
    227 
    228    if (debug)
    229       VG_(printf)("max_n_ips=%u fp_min=0x%08lx fp_max_orig=0x08%lx, "
    230                   "fp_max=0x%08lx ip=0x%08lx fp=0x%08lx\n",
    231                   max_n_ips, fp_min, fp_max_orig, fp_max,
    232                   uregs.xip, uregs.xbp);
    233 
    234    /* Assertion broken before main() is reached in pthreaded programs;  the
    235     * offending stack traces only have one item.  --njn, 2002-aug-16 */
    236    /* vg_assert(fp_min <= fp_max);*/
    237    // On Darwin, this kicks in for pthread-related stack traces, so they're
    238    // only 1 entry long which is wrong.
    239 #  if defined(VGO_linux)
    240    if (fp_min + 512 >= fp_max) {
    241       /* If the stack limits look bogus, don't poke around ... but
    242          don't bomb out either. */
    243 #  elif defined(VGO_solaris)
    244    if (fp_max == 0) {
    245       /* VG_(get_StackTrace)() can be called by tools very early when
    246          various tracing options are enabled. Don't proceed further
    247          if the stack limits look bogus.
    248        */
    249 #  endif
    250 #  if defined(VGO_linux) || defined(VGO_solaris)
    251       if (sps) sps[0] = uregs.xsp;
    252       if (fps) fps[0] = uregs.xbp;
    253       ips[0] = uregs.xip;
    254       return 1;
    255    }
    256 #  endif
    257 
    258    if (UNLIKELY (fp_CF_verif_generation != VG_(debuginfo_generation)())) {
    259       fp_CF_verif_generation = VG_(debuginfo_generation)();
    260       VG_(memset)(&fp_CF_verif_cache, 0, sizeof(fp_CF_verif_cache));
    261       FPO_info_present = VG_(FPO_info_present)();
    262    }
    263 
    264 
    265    /* Loop unwinding the stack. Note that the IP value we get on
    266     * each pass (whether from CFI info or a stack frame) is a
    267     * return address so is actually after the calling instruction
    268     * in the calling function.
    269     *
    270     * Because of this we subtract one from the IP after each pass
    271     * of the loop so that we find the right CFI block on the next
    272     * pass - otherwise we can find the wrong CFI info if it happens
    273     * to change after the calling instruction and that will mean
    274     * that we will fail to unwind the next step.
    275     *
    276     * This most frequently happens at the end of a function when
    277     * a tail call occurs and we wind up using the CFI info for the
    278     * next function which is completely wrong.
    279     */
    280    if (sps) sps[0] = uregs.xsp;
    281    if (fps) fps[0] = uregs.xbp;
    282    ips[0] = uregs.xip;
    283    i = 1;
    284    if (do_stats) stats.nr++;
    285 
    286    while (True) {
    287 
    288       if (i >= max_n_ips)
    289          break;
    290 
    291       UWord hash = uregs.xip % N_FP_CF_VERIF;
    292       Addr xip_verif = uregs.xip ^ fp_CF_verif_cache [hash];
    293       if (debug)
    294          VG_(printf)("     uregs.xip 0x%08lx xip_verif[0x%08lx]"
    295                      " xbp 0x%08lx xsp 0x%08lx\n",
    296                      uregs.xip, xip_verif,
    297                      uregs.xbp, uregs.xsp);
    298       // If xip is in cache, then xip_verif will be <= CFUNWIND.
    299       // Otherwise, if not in cache, xip_verif will be > CFUNWIND.
    300 
    301       /* Try to derive a new (ip,sp,fp) triple from the current set. */
    302 
    303       /* Do we have to do CFI unwinding ?
    304          We do CFI unwinding if one of the following condition holds:
    305          a. fp_CF_verif_cache contains xip but indicates CFUNWIND must
    306             be done (i.e. fp unwind check failed when we did the first
    307             unwind for this IP).
    308          b. fp_CF_verif_cache does not contain xip.
    309             We will try CFI unwinding in fpverif_uregs and compare with
    310             FP unwind result to insert xip in the cache with the correct
    311             indicator. */
    312       if (UNLIKELY(xip_verif >= CFUNWIND)) {
    313          if (xip_verif == CFUNWIND) {
    314             /* case a : do "real" cfi unwind */
    315             if ( VG_(use_CF_info)( &uregs, fp_min, fp_max ) ) {
    316                if (debug) unwind_case = "Ca";
    317                if (do_stats) stats.Ca++;
    318                goto unwind_done;
    319             }
    320             /* ??? cache indicates we have to do CFI unwind (so, we
    321              previously found CFI info, and failed the fp unwind
    322              check). Now, we just failed with CFI.  So, once we
    323              succeed, once we fail.  No idea what is going on =>
    324              cleanup the cache entry and fallover to fp unwind (this
    325              time). */
    326             fp_CF_verif_cache [hash] = 0;
    327             if (debug) VG_(printf)("     cache reset as CFI ok then nok\n");
    328             //??? stats
    329             xip_verif = NOINFO;
    330          } else {
    331             /* case b : do "verif" cfi unwind in fpverif_uregs */
    332             fpverif_uregs = uregs;
    333             xip_verified = uregs.xip;
    334             if ( !VG_(use_CF_info)( &fpverif_uregs, fp_min, fp_max ) ) {
    335                fp_CF_verif_cache [hash] = uregs.xip ^ NOINFO;
    336                if (debug) VG_(printf)("     cache NOINFO fpverif_uregs\n");
    337                xip_verif = NOINFO;
    338             }
    339          }
    340       }
    341 
    342       /* On x86, try the old-fashioned method of following the
    343          %ebp-chain.  This can be done if the fp_CF_verif_cache for xip
    344          indicate fp unwind is ok. This must be done if the cache indicates
    345          there is no info. This is also done to confirm what to put in the cache
    346          if xip was not in the cache. */
    347       /* This deals with frames resulting from functions which begin "pushl%
    348          ebp ; movl %esp, %ebp" which is the ABI-mandated preamble. */
    349       if (fp_min <= uregs.xbp &&
    350           uregs.xbp <= fp_max - 1 * sizeof(UWord)/*see comment below*/ &&
    351           VG_IS_4_ALIGNED(uregs.xbp))
    352       {
    353          /* fp looks sane, so use it. */
    354          uregs.xip = (((UWord*)uregs.xbp)[1]);
    355          // We stop if we hit a zero (the traditional end-of-stack
    356          // marker) or a one -- these correspond to recorded IPs of 0 or -1.
    357          // The latter because r8818 (in this file) changes the meaning of
    358          // entries [1] and above in a stack trace, by subtracting 1 from
    359          // them.  Hence stacks that used to end with a zero value now end in
    360          // -1 and so we must detect that too.
    361          if (0 == uregs.xip || 1 == uregs.xip) {
    362             if (xip_verif > CFUNWIND) {
    363                // Check if we obtain the same result with fp unwind.
    364                // If same result, then mark xip as fp unwindable
    365                if (uregs.xip == fpverif_uregs.xip) {
    366                   fp_CF_verif_cache [hash] = xip_verified ^ FPUNWIND;
    367                   if (debug) VG_(printf)("     cache FPUNWIND 0\n");
    368                   unwind_case = "Fw";
    369                   if (do_stats) stats.Fw++;
    370                   break;
    371                } else {
    372                   fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
    373                   uregs = fpverif_uregs;
    374                   if (debug) VG_(printf)("     cache CFUNWIND 0\n");
    375                   unwind_case = "Cf";
    376                   if (do_stats) stats.Cf++;
    377                   goto unwind_done;
    378                }
    379             } else {
    380                // end of stack => out of the loop.
    381                break;
    382             }
    383          }
    384 
    385          uregs.xsp = uregs.xbp + sizeof(Addr) /*saved %ebp*/
    386                                + sizeof(Addr) /*ra*/;
    387          uregs.xbp = (((UWord*)uregs.xbp)[0]);
    388          if (xip_verif > CFUNWIND) {
    389             if (uregs.xip == fpverif_uregs.xip
    390                 && uregs.xsp == fpverif_uregs.xsp
    391                 && uregs.xbp == fpverif_uregs.xbp) {
    392                fp_CF_verif_cache [hash] = xip_verified ^ FPUNWIND;
    393                if (debug) VG_(printf)("     cache FPUNWIND >2\n");
    394                if (debug) unwind_case = "FO";
    395                if (do_stats) stats.FO++;
    396             } else {
    397                fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
    398                if (debug) VG_(printf)("     cache CFUNWIND >2\n");
    399                if (do_stats && uregs.xip != fpverif_uregs.xip) stats.xi++;
    400                if (do_stats && uregs.xsp != fpverif_uregs.xsp) stats.xs++;
    401                if (do_stats && uregs.xbp != fpverif_uregs.xbp) stats.xb++;
    402                uregs = fpverif_uregs;
    403                if (debug) unwind_case = "CF";
    404                if (do_stats) stats.CF++;
    405             }
    406          } else {
    407             if (debug) unwind_case = "FF";
    408             if (do_stats) stats.FF++;
    409          }
    410          goto unwind_done;
    411       } else {
    412          // fp unwind has failed.
    413          // If we were checking the validity of the cfi unwinding,
    414          // we mark in the cache that the fp unwind cannot be done, and that
    415          // cfi unwind is desired.
    416          if (xip_verif > CFUNWIND) {
    417             // We know that fpverif_uregs contains valid information,
    418             // as a failed cf unwind would have put NOINFO in xip_verif.
    419             fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
    420             if (debug) VG_(printf)("     cache CFUNWIND as fp failed\n");
    421             uregs = fpverif_uregs;
    422             if (debug) unwind_case = "Ck";
    423             if (do_stats) stats.Ck++;
    424             goto unwind_done;
    425          }
    426          // xip_verif is FPUNWIND or NOINFO.
    427          // We failed the cfi unwind and/or the fp unwind.
    428          // => fallback to FPO info.
    429       }
    430 
    431       /* And, similarly, try for MSVC FPO unwind info. */
    432       if (FPO_info_present
    433           && VG_(use_FPO_info)( &uregs.xip, &uregs.xsp, &uregs.xbp,
    434                                 fp_min, fp_max ) ) {
    435          if (debug) unwind_case = "MS";
    436          if (do_stats) stats.MS++;
    437          goto unwind_done;
    438       }
    439 
    440       /* No luck.  We have to give up. */
    441       break;
    442 
    443    unwind_done:
    444       /* Add a frame in ips/sps/fps */
    445       /* fp is %ebp.  sp is %esp.  ip is %eip. */
    446       if (0 == uregs.xip || 1 == uregs.xip) break;
    447       if (sps) sps[i] = uregs.xsp;
    448       if (fps) fps[i] = uregs.xbp;
    449       ips[i++] = uregs.xip - 1;
    450       /* -1: refer to calling insn, not the RA */
    451       if (debug)
    452          VG_(printf)("     ips%s[%d]=0x%08lx\n", unwind_case, i-1, ips[i-1]);
    453       uregs.xip = uregs.xip - 1;
    454       /* as per comment at the head of this loop */
    455       RECURSIVE_MERGE(cmrf,ips,i);
    456    }
    457 
    458    if (do_stats) stats.nf += i;
    459    if (do_stats && stats.nr % 10000 == 0) {
    460      VG_(printf)("nr %u nf %u "
    461                  "Ca %u FF %u "
    462                  "Cf %u "
    463                  "Fw %u FO %u "
    464                  "CF %u (xi %u xs %u xb %u) "
    465                  "Ck %u MS %u\n",
    466                  stats.nr, stats.nf,
    467                  stats.Ca, stats.FF,
    468                  stats.Cf,
    469                  stats.Fw, stats.FO,
    470                  stats.CF, stats.xi, stats.xs, stats.xb,
    471                  stats.Ck, stats.MS);
    472    }
    473    n_found = i;
    474    return n_found;
    475 }
    476 
    477 #undef N_FP_CF_VERIF
    478 #undef FPUNWIND
    479 #undef NOINFO
    480 #undef CFUNWIND
    481 
    482 #endif
    483 
    484 /* ----------------------- amd64 ------------------------ */
    485 
    486 #if defined(VGP_amd64_linux) || defined(VGP_amd64_darwin) \
    487     || defined(VGP_amd64_solaris)
    488 
    489 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
    490                                /*OUT*/Addr* ips, UInt max_n_ips,
    491                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
    492                                const UnwindStartRegs* startRegs,
    493                                Addr fp_max_orig )
    494 {
    495    const Bool  debug = False;
    496    Int   i;
    497    Addr  fp_max;
    498    UInt  n_found = 0;
    499    const Int cmrf = VG_(clo_merge_recursive_frames);
    500 
    501    vg_assert(sizeof(Addr) == sizeof(UWord));
    502    vg_assert(sizeof(Addr) == sizeof(void*));
    503 
    504    D3UnwindRegs uregs;
    505    uregs.xip = startRegs->r_pc;
    506    uregs.xsp = startRegs->r_sp;
    507    uregs.xbp = startRegs->misc.AMD64.r_rbp;
    508    Addr fp_min = uregs.xsp - VG_STACK_REDZONE_SZB;
    509 
    510    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
    511       stopping when the trail goes cold, which we guess to be
    512       when FP is not a reasonable stack location. */
    513 
    514    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
    515    // current page, at least.  Dunno if it helps.
    516    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
    517    fp_max = VG_PGROUNDUP(fp_max_orig);
    518    if (fp_max >= sizeof(Addr))
    519       fp_max -= sizeof(Addr);
    520 
    521    if (debug)
    522       VG_(printf)("max_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
    523                   "fp_max=0x%lx ip=0x%lx fp=0x%lx\n",
    524                   max_n_ips, fp_min, fp_max_orig, fp_max,
    525                   uregs.xip, uregs.xbp);
    526 
    527    /* Assertion broken before main() is reached in pthreaded programs;  the
    528     * offending stack traces only have one item.  --njn, 2002-aug-16 */
    529    /* vg_assert(fp_min <= fp_max);*/
    530    // On Darwin, this kicks in for pthread-related stack traces, so they're
    531    // only 1 entry long which is wrong.
    532 #  if defined(VGO_linux)
    533    if (fp_min + 256 >= fp_max) {
    534       /* If the stack limits look bogus, don't poke around ... but
    535          don't bomb out either. */
    536 #  elif defined(VGO_solaris)
    537    if (fp_max == 0) {
    538       /* VG_(get_StackTrace)() can be called by tools very early when
    539          various tracing options are enabled. Don't proceed further
    540          if the stack limits look bogus.
    541        */
    542 #  endif
    543 #  if defined(VGO_linux) || defined(VGO_solaris)
    544 
    545       if (sps) sps[0] = uregs.xsp;
    546       if (fps) fps[0] = uregs.xbp;
    547       ips[0] = uregs.xip;
    548       return 1;
    549    }
    550 #  endif
    551 
    552    /* fp is %rbp.  sp is %rsp.  ip is %rip. */
    553 
    554    ips[0] = uregs.xip;
    555    if (sps) sps[0] = uregs.xsp;
    556    if (fps) fps[0] = uregs.xbp;
    557    i = 1;
    558    if (debug)
    559       VG_(printf)("     ipsS[%d]=%#08lx rbp %#08lx rsp %#08lx\n",
    560                   i-1, ips[i-1], uregs.xbp, uregs.xsp);
    561 
    562 #  if defined(VGO_darwin)
    563    if (VG_(is_valid_tid)(tid_if_known) &&
    564       VG_(is_in_syscall)(tid_if_known) &&
    565       i < max_n_ips) {
    566       /* On Darwin, all the system call stubs have no function
    567        * prolog.  So instead of top of the stack being a new
    568        * frame comprising a saved BP and a return address, we
    569        * just have the return address in the caller's frame.
    570        * Adjust for this by recording the return address.
    571        */
    572       ips[i] = *(Addr *)uregs.xsp - 1;
    573       if (sps) sps[i] = uregs.xsp;
    574       if (fps) fps[i] = uregs.xbp;
    575       i++;
    576    }
    577 #  endif
    578 
    579    /* Loop unwinding the stack. Note that the IP value we get on
    580     * each pass (whether from CFI info or a stack frame) is a
    581     * return address so is actually after the calling instruction
    582     * in the calling function.
    583     *
    584     * Because of this we subtract one from the IP after each pass
    585     * of the loop so that we find the right CFI block on the next
    586     * pass - otherwise we can find the wrong CFI info if it happens
    587     * to change after the calling instruction and that will mean
    588     * that we will fail to unwind the next step.
    589     *
    590     * This most frequently happens at the end of a function when
    591     * a tail call occurs and we wind up using the CFI info for the
    592     * next function which is completely wrong.
    593     */
    594    while (True) {
    595 
    596       if (i >= max_n_ips)
    597          break;
    598 
    599       /* Try to derive a new (ip,sp,fp) triple from the current set. */
    600 
    601       /* First off, see if there is any CFI info to hand which can
    602          be used. */
    603       if ( VG_(use_CF_info)( &uregs, fp_min, fp_max ) ) {
    604          if (0 == uregs.xip || 1 == uregs.xip) break;
    605          if (sps) sps[i] = uregs.xsp;
    606          if (fps) fps[i] = uregs.xbp;
    607          ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
    608          if (debug)
    609             VG_(printf)("     ipsC[%d]=%#08lx rbp %#08lx rsp %#08lx\n",
    610                         i-1, ips[i-1], uregs.xbp, uregs.xsp);
    611          uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
    612          RECURSIVE_MERGE(cmrf,ips,i);
    613          continue;
    614       }
    615 
    616       /* If VG_(use_CF_info) fails, it won't modify ip/sp/fp, so
    617          we can safely try the old-fashioned method. */
    618       /* This bit is supposed to deal with frames resulting from
    619          functions which begin "pushq %rbp ; movq %rsp, %rbp".
    620          Unfortunately, since we can't (easily) look at the insns at
    621          the start of the fn, like GDB does, there's no reliable way
    622          to tell.  Hence the hack of first trying out CFI, and if that
    623          fails, then use this as a fallback. */
    624       /* Note: re "- 1 * sizeof(UWord)", need to take account of the
    625          fact that we are prodding at & ((UWord*)fp)[1] and so need to
    626          adjust the limit check accordingly.  Omitting this has been
    627          observed to cause segfaults on rare occasions. */
    628       if (fp_min <= uregs.xbp && uregs.xbp <= fp_max - 1 * sizeof(UWord)) {
    629          /* fp looks sane, so use it. */
    630          uregs.xip = (((UWord*)uregs.xbp)[1]);
    631          if (0 == uregs.xip || 1 == uregs.xip) break;
    632          uregs.xsp = uregs.xbp + sizeof(Addr) /*saved %rbp*/
    633                                + sizeof(Addr) /*ra*/;
    634          uregs.xbp = (((UWord*)uregs.xbp)[0]);
    635          if (sps) sps[i] = uregs.xsp;
    636          if (fps) fps[i] = uregs.xbp;
    637          ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
    638          if (debug)
    639             VG_(printf)("     ipsF[%d]=%#08lx rbp %#08lx rsp %#08lx\n",
    640                         i-1, ips[i-1], uregs.xbp, uregs.xsp);
    641          uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
    642          RECURSIVE_MERGE(cmrf,ips,i);
    643          continue;
    644       }
    645 
    646       /* Last-ditch hack (evidently GDB does something similar).  We
    647          are in the middle of nowhere and we have a nonsense value for
    648          the frame pointer.  If the stack pointer is still valid,
    649          assume that what it points at is a return address.  Yes,
    650          desperate measures.  Could do better here:
    651          - check that the supposed return address is in
    652            an executable page
    653          - check that the supposed return address is just after a call insn
    654          - given those two checks, don't just consider *sp as the return
    655            address; instead scan a likely section of stack (eg sp .. sp+256)
    656            and use suitable values found there.
    657       */
    658       if (fp_min <= uregs.xsp && uregs.xsp < fp_max) {
    659          uregs.xip = ((UWord*)uregs.xsp)[0];
    660          if (0 == uregs.xip || 1 == uregs.xip) break;
    661          if (sps) sps[i] = uregs.xsp;
    662          if (fps) fps[i] = uregs.xbp;
    663          ips[i++] = uregs.xip == 0
    664                     ? 0 /* sp[0] == 0 ==> stuck at the bottom of a
    665                            thread stack */
    666                     : uregs.xip - 1;
    667                         /* -1: refer to calling insn, not the RA */
    668          if (debug)
    669             VG_(printf)("     ipsH[%d]=%#08lx\n", i-1, ips[i-1]);
    670          uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
    671          uregs.xsp += 8;
    672          RECURSIVE_MERGE(cmrf,ips,i);
    673          continue;
    674       }
    675 
    676       /* No luck at all.  We have to give up. */
    677       break;
    678    }
    679 
    680    n_found = i;
    681    return n_found;
    682 }
    683 
    684 #endif
    685 
    686 /* -----------------------ppc32/64 ---------------------- */
    687 
    688 #if defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux) \
    689     || defined(VGP_ppc64le_linux)
    690 
    691 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
    692                                /*OUT*/Addr* ips, UInt max_n_ips,
    693                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
    694                                const UnwindStartRegs* startRegs,
    695                                Addr fp_max_orig )
    696 {
    697    Bool  lr_is_first_RA = False;
    698 #  if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
    699    Word redir_stack_size = 0;
    700    Word redirs_used      = 0;
    701 #  endif
    702    const Int cmrf = VG_(clo_merge_recursive_frames);
    703 
    704    Bool  debug = False;
    705    Int   i;
    706    Addr  fp_max;
    707    UInt  n_found = 0;
    708 
    709    vg_assert(sizeof(Addr) == sizeof(UWord));
    710    vg_assert(sizeof(Addr) == sizeof(void*));
    711 
    712    Addr ip = (Addr)startRegs->r_pc;
    713    Addr sp = (Addr)startRegs->r_sp;
    714    Addr fp = sp;
    715 #  if defined(VGP_ppc32_linux)
    716    Addr lr = startRegs->misc.PPC32.r_lr;
    717 #  elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
    718    Addr lr = startRegs->misc.PPC64.r_lr;
    719 #  endif
    720    Addr fp_min = sp - VG_STACK_REDZONE_SZB;
    721 
    722    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
    723       stopping when the trail goes cold, which we guess to be
    724       when FP is not a reasonable stack location. */
    725 
    726    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
    727    // current page, at least.  Dunno if it helps.
    728    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
    729    fp_max = VG_PGROUNDUP(fp_max_orig);
    730    if (fp_max >= sizeof(Addr))
    731       fp_max -= sizeof(Addr);
    732 
    733    if (debug)
    734       VG_(printf)("max_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
    735                   "fp_max=0x%lx ip=0x%lx fp=0x%lx\n",
    736 		  max_n_ips, fp_min, fp_max_orig, fp_max, ip, fp);
    737 
    738    /* Assertion broken before main() is reached in pthreaded programs;  the
    739     * offending stack traces only have one item.  --njn, 2002-aug-16 */
    740    /* vg_assert(fp_min <= fp_max);*/
    741    if (fp_min + 512 >= fp_max) {
    742       /* If the stack limits look bogus, don't poke around ... but
    743          don't bomb out either. */
    744       if (sps) sps[0] = sp;
    745       if (fps) fps[0] = fp;
    746       ips[0] = ip;
    747       return 1;
    748    }
    749 
    750    /* fp is %r1.  ip is %cia.  Note, ppc uses r1 as both the stack and
    751       frame pointers. */
    752 
    753 #  if defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
    754    redir_stack_size = VEX_GUEST_PPC64_REDIR_STACK_SIZE;
    755    redirs_used      = 0;
    756 #  endif
    757 
    758 #  if defined(VG_PLAT_USES_PPCTOC) || defined (VGP_ppc64le_linux)
    759    /* Deal with bogus LR values caused by function
    760       interception/wrapping on ppc-TOC platforms; see comment on
    761       similar code a few lines further down. */
    762    if (lr == (Addr)&VG_(ppctoc_magic_redirect_return_stub)
    763        && VG_(is_valid_tid)(tid_if_known)) {
    764       Word hsp = VG_(threads)[tid_if_known].arch.vex.guest_REDIR_SP;
    765       redirs_used++;
    766       if (hsp >= 1 && hsp < redir_stack_size)
    767          lr = VG_(threads)[tid_if_known]
    768                  .arch.vex.guest_REDIR_STACK[hsp-1];
    769    }
    770 #  endif
    771 
    772    /* We have to determine whether or not LR currently holds this fn
    773       (call it F)'s return address.  It might not if F has previously
    774       called some other function, hence overwriting LR with a pointer
    775       to some part of F.  Hence if LR and IP point to the same
    776       function then we conclude LR does not hold this function's
    777       return address; instead the LR at entry must have been saved in
    778       the stack by F's prologue and so we must get it from there
    779       instead.  Note all this guff only applies to the innermost
    780       frame. */
    781    lr_is_first_RA = False;
    782    {
    783       const HChar *buf_lr, *buf_ip;
    784       /* The following conditional looks grossly inefficient and
    785          surely could be majorly improved, with not much effort. */
    786       if (VG_(get_fnname_raw) (lr, &buf_lr)) {
    787          HChar buf_lr_copy[VG_(strlen)(buf_lr) + 1];
    788          VG_(strcpy)(buf_lr_copy, buf_lr);
    789          if (VG_(get_fnname_raw) (ip, &buf_ip))
    790             if (VG_(strcmp)(buf_lr_copy, buf_ip))
    791                lr_is_first_RA = True;
    792       }
    793    }
    794 
    795    if (sps) sps[0] = fp; /* NB. not sp */
    796    if (fps) fps[0] = fp;
    797    ips[0] = ip;
    798    i = 1;
    799 
    800    if (fp_min <= fp && fp < fp_max-VG_WORDSIZE+1) {
    801 
    802       /* initial FP is sane; keep going */
    803       fp = (((UWord*)fp)[0]);
    804 
    805       while (True) {
    806 
    807         /* On ppc64-linux (ppc64-elf, really), the lr save
    808            slot is 2 words back from sp, whereas on ppc32-elf(?) it's
    809            only one word back. */
    810 #        if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
    811          const Int lr_offset = 2;
    812 #        else
    813          const Int lr_offset = 1;
    814 #        endif
    815 
    816          if (i >= max_n_ips)
    817             break;
    818 
    819          /* Try to derive a new (ip,fp) pair from the current set. */
    820 
    821          if (fp_min <= fp && fp <= fp_max - lr_offset * sizeof(UWord)) {
    822             /* fp looks sane, so use it. */
    823 
    824             if (i == 1 && lr_is_first_RA)
    825                ip = lr;
    826             else
    827                ip = (((UWord*)fp)[lr_offset]);
    828 
    829 #           if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
    830             /* Nasty hack to do with function replacement/wrapping on
    831                ppc64-linux.  If LR points to our magic return stub,
    832                then we are in a wrapped or intercepted function, in
    833                which LR has been messed with.  The original LR will
    834                have been pushed onto the thread's hidden REDIR stack
    835                one down from the top (top element is the saved R2) and
    836                so we should restore the value from there instead.
    837                Since nested redirections can and do happen, we keep
    838                track of the number of nested LRs used by the unwinding
    839                so far with 'redirs_used'. */
    840             if (ip == (Addr)&VG_(ppctoc_magic_redirect_return_stub)
    841                 && VG_(is_valid_tid)(tid_if_known)) {
    842                Word hsp = VG_(threads)[tid_if_known]
    843                              .arch.vex.guest_REDIR_SP;
    844                hsp -= 2 * redirs_used;
    845                redirs_used ++;
    846                if (hsp >= 1 && hsp < redir_stack_size)
    847                   ip = VG_(threads)[tid_if_known]
    848                           .arch.vex.guest_REDIR_STACK[hsp-1];
    849             }
    850 #           endif
    851 
    852             if (0 == ip || 1 == ip) break;
    853             if (sps) sps[i] = fp; /* NB. not sp */
    854             if (fps) fps[i] = fp;
    855             fp = (((UWord*)fp)[0]);
    856             ips[i++] = ip - 1; /* -1: refer to calling insn, not the RA */
    857             if (debug)
    858                VG_(printf)("     ipsF[%d]=%#08lx\n", i-1, ips[i-1]);
    859             ip = ip - 1; /* ip is probably dead at this point, but
    860                             play safe, a la x86/amd64 above.  See
    861                             extensive comments above. */
    862             RECURSIVE_MERGE(cmrf,ips,i);
    863             continue;
    864          }
    865 
    866          /* No luck there.  We have to give up. */
    867          break;
    868       }
    869    }
    870 
    871    n_found = i;
    872    return n_found;
    873 }
    874 
    875 #endif
    876 
    877 /* ------------------------ arm ------------------------- */
    878 
    879 #if defined(VGP_arm_linux)
    880 
    881 static Bool in_same_fn ( Addr a1, Addr a2 )
    882 {
    883    const HChar *buf_a1, *buf_a2;
    884    /* The following conditional looks grossly inefficient and
    885       surely could be majorly improved, with not much effort. */
    886    if (VG_(get_fnname_raw) (a1, &buf_a1)) {
    887       HChar buf_a1_copy[VG_(strlen)(buf_a1) + 1];
    888       VG_(strcpy)(buf_a1_copy, buf_a1);
    889       if (VG_(get_fnname_raw) (a2, &buf_a2))
    890          if (VG_(strcmp)(buf_a1_copy, buf_a2))
    891             return True;
    892    }
    893    return False;
    894 }
    895 
    896 static Bool in_same_page ( Addr a1, Addr a2 ) {
    897    return (a1 & ~0xFFF) == (a2 & ~0xFFF);
    898 }
    899 
    900 static Addr abs_diff ( Addr a1, Addr a2 ) {
    901    return (Addr)(a1 > a2 ? a1 - a2 : a2 - a1);
    902 }
    903 
    904 static Bool has_XT_perms ( Addr a )
    905 {
    906    NSegment const* seg = VG_(am_find_nsegment)(a);
    907    return seg && seg->hasX && seg->hasT;
    908 }
    909 
    910 static Bool looks_like_Thumb_call32 ( UShort w0, UShort w1 )
    911 {
    912    if (0)
    913       VG_(printf)("isT32call %04x %04x\n", (UInt)w0, (UInt)w1);
    914    // BL  simm26
    915    if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) return True;
    916    // BLX simm26
    917    if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) return True;
    918    return False;
    919 }
    920 
    921 static Bool looks_like_Thumb_call16 ( UShort w0 )
    922 {
    923    return False;
    924 }
    925 
    926 static Bool looks_like_ARM_call ( UInt a0 )
    927 {
    928    if (0)
    929       VG_(printf)("isA32call %08x\n", a0);
    930    // Leading E forces unconditional only -- fix
    931    if ((a0 & 0xFF000000) == 0xEB000000) return True;
    932    return False;
    933 }
    934 
    935 static Bool looks_like_RA ( Addr ra )
    936 {
    937    /* 'ra' is a plausible return address if it points to
    938        an instruction after a call insn. */
    939    Bool isT = (ra & 1);
    940    if (isT) {
    941       // returning to Thumb code
    942       ra &= ~1;
    943       ra -= 4;
    944       if (has_XT_perms(ra)) {
    945          UShort w0 = *(UShort*)ra;
    946          UShort w1 = in_same_page(ra, ra+2) ? *(UShort*)(ra+2) : 0;
    947          if (looks_like_Thumb_call16(w1) || looks_like_Thumb_call32(w0,w1))
    948             return True;
    949       }
    950    } else {
    951       // ARM
    952       ra &= ~3;
    953       ra -= 4;
    954       if (has_XT_perms(ra)) {
    955          UInt a0 = *(UInt*)ra;
    956          if (looks_like_ARM_call(a0))
    957             return True;
    958       }
    959    }
    960    return False;
    961 }
    962 
    963 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
    964                                /*OUT*/Addr* ips, UInt max_n_ips,
    965                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
    966                                const UnwindStartRegs* startRegs,
    967                                Addr fp_max_orig )
    968 {
    969    Bool  debug = False;
    970    Int   i;
    971    Addr  fp_max;
    972    UInt  n_found = 0;
    973    const Int cmrf = VG_(clo_merge_recursive_frames);
    974 
    975    vg_assert(sizeof(Addr) == sizeof(UWord));
    976    vg_assert(sizeof(Addr) == sizeof(void*));
    977 
    978    D3UnwindRegs uregs;
    979    uregs.r15 = startRegs->r_pc & 0xFFFFFFFE;
    980    uregs.r14 = startRegs->misc.ARM.r14;
    981    uregs.r13 = startRegs->r_sp;
    982    uregs.r12 = startRegs->misc.ARM.r12;
    983    uregs.r11 = startRegs->misc.ARM.r11;
    984    uregs.r7  = startRegs->misc.ARM.r7;
    985    Addr fp_min = uregs.r13 - VG_STACK_REDZONE_SZB;
    986 
    987    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
    988       stopping when the trail goes cold, which we guess to be
    989       when FP is not a reasonable stack location. */
    990 
    991    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
    992    // current page, at least.  Dunno if it helps.
    993    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
    994    fp_max = VG_PGROUNDUP(fp_max_orig);
    995    if (fp_max >= sizeof(Addr))
    996       fp_max -= sizeof(Addr);
    997 
    998    if (debug)
    999       VG_(printf)("\nmax_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
   1000                   "fp_max=0x%lx r15=0x%lx r13=0x%lx\n",
   1001                   max_n_ips, fp_min, fp_max_orig, fp_max,
   1002                   uregs.r15, uregs.r13);
   1003 
   1004    /* Assertion broken before main() is reached in pthreaded programs;  the
   1005     * offending stack traces only have one item.  --njn, 2002-aug-16 */
   1006    /* vg_assert(fp_min <= fp_max);*/
   1007    // On Darwin, this kicks in for pthread-related stack traces, so they're
   1008    // only 1 entry long which is wrong.
   1009    if (fp_min + 512 >= fp_max) {
   1010       /* If the stack limits look bogus, don't poke around ... but
   1011          don't bomb out either. */
   1012       if (sps) sps[0] = uregs.r13;
   1013       if (fps) fps[0] = 0;
   1014       ips[0] = uregs.r15;
   1015       return 1;
   1016    }
   1017 
   1018    /* */
   1019 
   1020    if (sps) sps[0] = uregs.r13;
   1021    if (fps) fps[0] = 0;
   1022    ips[0] = uregs.r15;
   1023    i = 1;
   1024 
   1025    /* Loop unwinding the stack. */
   1026    Bool do_stack_scan = False;
   1027 
   1028    /* First try the Official Way, using Dwarf CFI. */
   1029    while (True) {
   1030       if (debug) {
   1031          VG_(printf)("i: %d, r15: 0x%lx, r13: 0x%lx\n",
   1032                      i, uregs.r15, uregs.r13);
   1033       }
   1034 
   1035       if (i >= max_n_ips)
   1036          break;
   1037 
   1038       if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
   1039          if (sps) sps[i] = uregs.r13;
   1040          if (fps) fps[i] = 0;
   1041          ips[i++] = (uregs.r15 & 0xFFFFFFFE) - 1;
   1042          if (debug)
   1043             VG_(printf)("USING CFI: r15: 0x%lx, r13: 0x%lx\n",
   1044                         uregs.r15, uregs.r13);
   1045          uregs.r15 = (uregs.r15 & 0xFFFFFFFE) - 1;
   1046          RECURSIVE_MERGE(cmrf,ips,i);
   1047          continue;
   1048       }
   1049 
   1050       /* No luck.  We have to give up. */
   1051       do_stack_scan = True;
   1052       break;
   1053    }
   1054 
   1055    /* Now try Plan B (maybe) -- stack scanning.  This often gives
   1056       pretty bad results, so this has to be enabled explicitly by the
   1057       user. */
   1058    if (do_stack_scan
   1059        && i < max_n_ips && i < (Int)VG_(clo_unw_stack_scan_thresh)) {
   1060       Int  nByStackScan = 0;
   1061       Addr lr = uregs.r14;
   1062       Addr sp = uregs.r13 & ~3;
   1063       Addr pc = uregs.r15;
   1064       // First see if LR contains
   1065       // something that could be a valid return address.
   1066       if (!in_same_fn(lr, pc) && looks_like_RA(lr)) {
   1067          // take it only if 'cand' isn't obviously a duplicate
   1068          // of the last found IP value
   1069          Addr cand = (lr & 0xFFFFFFFE) - 1;
   1070          if (abs_diff(cand, ips[i-1]) > 1) {
   1071             if (sps) sps[i] = 0;
   1072             if (fps) fps[i] = 0;
   1073             ips[i++] = cand;
   1074             RECURSIVE_MERGE(cmrf,ips,i);
   1075             nByStackScan++;
   1076          }
   1077       }
   1078       while (in_same_page(sp, uregs.r13)) {
   1079          if (i >= max_n_ips)
   1080             break;
   1081          // we're in the same page; fairly safe to keep going
   1082          UWord w = *(UWord*)(sp & ~0x3);
   1083          if (looks_like_RA(w)) {
   1084             Addr cand = (w & 0xFFFFFFFE) - 1;
   1085             // take it only if 'cand' isn't obviously a duplicate
   1086             // of the last found IP value
   1087             if (abs_diff(cand, ips[i-1]) > 1) {
   1088                if (sps) sps[i] = 0;
   1089                if (fps) fps[i] = 0;
   1090                ips[i++] = cand;
   1091                RECURSIVE_MERGE(cmrf,ips,i);
   1092                if (++nByStackScan >= VG_(clo_unw_stack_scan_frames)) break;
   1093             }
   1094          }
   1095          sp += 4;
   1096       }
   1097    }
   1098 
   1099    n_found = i;
   1100    return n_found;
   1101 }
   1102 
   1103 #endif
   1104 
   1105 /* ------------------------ arm64 ------------------------- */
   1106 
   1107 #if defined(VGP_arm64_linux)
   1108 
   1109 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
   1110                                /*OUT*/Addr* ips, UInt max_n_ips,
   1111                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
   1112                                const UnwindStartRegs* startRegs,
   1113                                Addr fp_max_orig )
   1114 {
   1115    Bool  debug = False;
   1116    Int   i;
   1117    Addr  fp_max;
   1118    UInt  n_found = 0;
   1119    const Int cmrf = VG_(clo_merge_recursive_frames);
   1120 
   1121    vg_assert(sizeof(Addr) == sizeof(UWord));
   1122    vg_assert(sizeof(Addr) == sizeof(void*));
   1123 
   1124    D3UnwindRegs uregs;
   1125    uregs.pc = startRegs->r_pc;
   1126    uregs.sp = startRegs->r_sp;
   1127    uregs.x30 = startRegs->misc.ARM64.x30;
   1128    uregs.x29 = startRegs->misc.ARM64.x29;
   1129    Addr fp_min = uregs.sp - VG_STACK_REDZONE_SZB;
   1130 
   1131    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
   1132       stopping when the trail goes cold, which we guess to be
   1133       when FP is not a reasonable stack location. */
   1134 
   1135    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
   1136    // current page, at least.  Dunno if it helps.
   1137    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
   1138    fp_max = VG_PGROUNDUP(fp_max_orig);
   1139    if (fp_max >= sizeof(Addr))
   1140       fp_max -= sizeof(Addr);
   1141 
   1142    if (debug)
   1143       VG_(printf)("\nmax_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
   1144                   "fp_max=0x%lx PC=0x%lx SP=0x%lx\n",
   1145                   max_n_ips, fp_min, fp_max_orig, fp_max,
   1146                   uregs.pc, uregs.sp);
   1147 
   1148    /* Assertion broken before main() is reached in pthreaded programs;  the
   1149     * offending stack traces only have one item.  --njn, 2002-aug-16 */
   1150    /* vg_assert(fp_min <= fp_max);*/
   1151    // On Darwin, this kicks in for pthread-related stack traces, so they're
   1152    // only 1 entry long which is wrong.
   1153    if (fp_min + 512 >= fp_max) {
   1154       /* If the stack limits look bogus, don't poke around ... but
   1155          don't bomb out either. */
   1156       if (sps) sps[0] = uregs.sp;
   1157       if (fps) fps[0] = uregs.x29;
   1158       ips[0] = uregs.pc;
   1159       return 1;
   1160    }
   1161 
   1162    /* */
   1163 
   1164    if (sps) sps[0] = uregs.sp;
   1165    if (fps) fps[0] = uregs.x29;
   1166    ips[0] = uregs.pc;
   1167    i = 1;
   1168 
   1169    /* Loop unwinding the stack, using CFI. */
   1170    while (True) {
   1171       if (debug) {
   1172          VG_(printf)("i: %d, pc: 0x%lx, sp: 0x%lx\n",
   1173                      i, uregs.pc, uregs.sp);
   1174       }
   1175 
   1176       if (i >= max_n_ips)
   1177          break;
   1178 
   1179       if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
   1180          if (sps) sps[i] = uregs.sp;
   1181          if (fps) fps[i] = uregs.x29;
   1182          ips[i++] = uregs.pc - 1;
   1183          if (debug)
   1184             VG_(printf)("USING CFI: pc: 0x%lx, sp: 0x%lx\n",
   1185                         uregs.pc, uregs.sp);
   1186          uregs.pc = uregs.pc - 1;
   1187          RECURSIVE_MERGE(cmrf,ips,i);
   1188          continue;
   1189       }
   1190 
   1191       /* No luck.  We have to give up. */
   1192       break;
   1193    }
   1194 
   1195    n_found = i;
   1196    return n_found;
   1197 }
   1198 
   1199 #endif
   1200 
   1201 /* ------------------------ s390x ------------------------- */
   1202 
   1203 #if defined(VGP_s390x_linux)
   1204 
   1205 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
   1206                                /*OUT*/Addr* ips, UInt max_n_ips,
   1207                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
   1208                                const UnwindStartRegs* startRegs,
   1209                                Addr fp_max_orig )
   1210 {
   1211    Bool  debug = False;
   1212    Int   i;
   1213    Addr  fp_max;
   1214    UInt  n_found = 0;
   1215    const Int cmrf = VG_(clo_merge_recursive_frames);
   1216 
   1217    vg_assert(sizeof(Addr) == sizeof(UWord));
   1218    vg_assert(sizeof(Addr) == sizeof(void*));
   1219 
   1220    D3UnwindRegs uregs;
   1221    uregs.ia = startRegs->r_pc;
   1222    uregs.sp = startRegs->r_sp;
   1223    Addr fp_min = uregs.sp - VG_STACK_REDZONE_SZB;
   1224    uregs.fp = startRegs->misc.S390X.r_fp;
   1225    uregs.lr = startRegs->misc.S390X.r_lr;
   1226 
   1227    fp_max = VG_PGROUNDUP(fp_max_orig);
   1228    if (fp_max >= sizeof(Addr))
   1229       fp_max -= sizeof(Addr);
   1230 
   1231    if (debug)
   1232       VG_(printf)("max_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
   1233                   "fp_max=0x%lx IA=0x%lx SP=0x%lx FP=0x%lx\n",
   1234                   max_n_ips, fp_min, fp_max_orig, fp_max,
   1235                   uregs.ia, uregs.sp,uregs.fp);
   1236 
   1237    /* The first frame is pretty obvious */
   1238    ips[0] = uregs.ia;
   1239    if (sps) sps[0] = uregs.sp;
   1240    if (fps) fps[0] = uregs.fp;
   1241    i = 1;
   1242 
   1243    /* for everything else we have to rely on the eh_frame. gcc defaults to
   1244       not create a backchain and all the other  tools (like gdb) also have
   1245       to use the CFI. */
   1246    while (True) {
   1247       if (i >= max_n_ips)
   1248          break;
   1249 
   1250       if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
   1251          if (sps) sps[i] = uregs.sp;
   1252          if (fps) fps[i] = uregs.fp;
   1253          ips[i++] = uregs.ia - 1;
   1254          uregs.ia = uregs.ia - 1;
   1255          RECURSIVE_MERGE(cmrf,ips,i);
   1256          continue;
   1257       }
   1258       /* A problem on the first frame? Lets assume it was a bad jump.
   1259          We will use the link register and the current stack and frame
   1260          pointers and see if we can use the CFI in the next round. */
   1261       if (i == 1) {
   1262          if (sps) {
   1263             sps[i] = sps[0];
   1264             uregs.sp = sps[0];
   1265          }
   1266          if (fps) {
   1267             fps[i] = fps[0];
   1268             uregs.fp = fps[0];
   1269          }
   1270          uregs.ia = uregs.lr - 1;
   1271          ips[i++] = uregs.lr - 1;
   1272          RECURSIVE_MERGE(cmrf,ips,i);
   1273          continue;
   1274       }
   1275 
   1276       /* No luck.  We have to give up. */
   1277       break;
   1278    }
   1279 
   1280    n_found = i;
   1281    return n_found;
   1282 }
   1283 
   1284 #endif
   1285 
   1286 /* ------------------------ mips 32/64 ------------------------- */
   1287 #if defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
   1288 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
   1289                                /*OUT*/Addr* ips, UInt max_n_ips,
   1290                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
   1291                                const UnwindStartRegs* startRegs,
   1292                                Addr fp_max_orig )
   1293 {
   1294    Bool  debug = False;
   1295    Int   i;
   1296    Addr  fp_max;
   1297    UInt  n_found = 0;
   1298    const Int cmrf = VG_(clo_merge_recursive_frames);
   1299 
   1300    vg_assert(sizeof(Addr) == sizeof(UWord));
   1301    vg_assert(sizeof(Addr) == sizeof(void*));
   1302 
   1303    D3UnwindRegs uregs;
   1304    uregs.pc = startRegs->r_pc;
   1305    uregs.sp = startRegs->r_sp;
   1306    Addr fp_min = uregs.sp - VG_STACK_REDZONE_SZB;
   1307 
   1308 #if defined(VGP_mips32_linux)
   1309    uregs.fp = startRegs->misc.MIPS32.r30;
   1310    uregs.ra = startRegs->misc.MIPS32.r31;
   1311 #elif defined(VGP_mips64_linux)
   1312    uregs.fp = startRegs->misc.MIPS64.r30;
   1313    uregs.ra = startRegs->misc.MIPS64.r31;
   1314 #endif
   1315 
   1316    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
   1317       stopping when the trail goes cold, which we guess to be
   1318       when FP is not a reasonable stack location. */
   1319 
   1320    fp_max = VG_PGROUNDUP(fp_max_orig);
   1321    if (fp_max >= sizeof(Addr))
   1322       fp_max -= sizeof(Addr);
   1323 
   1324    if (debug)
   1325       VG_(printf)("max_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
   1326                   "fp_max=0x%lx pc=0x%lx sp=0x%lx fp=0x%lx\n",
   1327                   max_n_ips, fp_min, fp_max_orig, fp_max,
   1328                   uregs.pc, uregs.sp, uregs.fp);
   1329 
   1330    if (sps) sps[0] = uregs.sp;
   1331    if (fps) fps[0] = uregs.fp;
   1332    ips[0] = uregs.pc;
   1333    i = 1;
   1334 
   1335    /* Loop unwinding the stack. */
   1336 
   1337    while (True) {
   1338       if (debug) {
   1339          VG_(printf)("i: %d, pc: 0x%lx, sp: 0x%lx, ra: 0x%lx\n",
   1340                      i, uregs.pc, uregs.sp, uregs.ra);
   1341       }
   1342       if (i >= max_n_ips)
   1343          break;
   1344 
   1345       D3UnwindRegs uregs_copy = uregs;
   1346       if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
   1347          if (debug)
   1348             VG_(printf)("USING CFI: pc: 0x%lx, sp: 0x%lx, ra: 0x%lx\n",
   1349                         uregs.pc, uregs.sp, uregs.ra);
   1350          if (0 != uregs.pc && 1 != uregs.pc) {
   1351             if (sps) sps[i] = uregs.sp;
   1352             if (fps) fps[i] = uregs.fp;
   1353             ips[i++] = uregs.pc - 4;
   1354             uregs.pc = uregs.pc - 4;
   1355             RECURSIVE_MERGE(cmrf,ips,i);
   1356             continue;
   1357          } else
   1358             uregs = uregs_copy;
   1359       }
   1360 
   1361       int seen_sp_adjust = 0;
   1362       long frame_offset = 0;
   1363       PtrdiffT offset;
   1364       if (VG_(get_inst_offset_in_function)(uregs.pc, &offset)) {
   1365          Addr start_pc = uregs.pc - offset;
   1366          Addr limit_pc = uregs.pc;
   1367          Addr cur_pc;
   1368          for (cur_pc = start_pc; cur_pc < limit_pc; cur_pc += 4) {
   1369             unsigned long inst, high_word, low_word;
   1370             unsigned long * cur_inst;
   1371             /* Fetch the instruction.   */
   1372             cur_inst = (unsigned long *)cur_pc;
   1373             inst = *((UInt *) cur_inst);
   1374             if(debug)
   1375                VG_(printf)("cur_pc: 0x%lx, inst: 0x%lx\n", cur_pc, inst);
   1376 
   1377             /* Save some code by pre-extracting some useful fields.  */
   1378             high_word = (inst >> 16) & 0xffff;
   1379             low_word = inst & 0xffff;
   1380 
   1381             if (high_word == 0x27bd        /* addiu $sp,$sp,-i */
   1382                 || high_word == 0x23bd     /* addi $sp,$sp,-i */
   1383                 || high_word == 0x67bd) {  /* daddiu $sp,$sp,-i */
   1384                if (low_word & 0x8000)	/* negative stack adjustment? */
   1385                   frame_offset += 0x10000 - low_word;
   1386                else
   1387                   /* Exit loop if a positive stack adjustment is found, which
   1388                      usually means that the stack cleanup code in the function
   1389                      epilogue is reached.  */
   1390                break;
   1391             seen_sp_adjust = 1;
   1392             }
   1393          }
   1394          if(debug)
   1395             VG_(printf)("offset: 0x%lx\n", frame_offset);
   1396       }
   1397       if (seen_sp_adjust) {
   1398          if (0 == uregs.pc || 1 == uregs.pc) break;
   1399          if (uregs.pc == uregs.ra - 8) break;
   1400          if (sps) {
   1401             sps[i] = uregs.sp + frame_offset;
   1402          }
   1403          uregs.sp = uregs.sp + frame_offset;
   1404 
   1405          if (fps) {
   1406             fps[i] = fps[0];
   1407             uregs.fp = fps[0];
   1408          }
   1409          if (0 == uregs.ra || 1 == uregs.ra) break;
   1410          uregs.pc = uregs.ra - 8;
   1411          ips[i++] = uregs.ra - 8;
   1412          RECURSIVE_MERGE(cmrf,ips,i);
   1413          continue;
   1414       }
   1415 
   1416       if (i == 1) {
   1417          if (sps) {
   1418             sps[i] = sps[0];
   1419             uregs.sp = sps[0];
   1420          }
   1421          if (fps) {
   1422             fps[i] = fps[0];
   1423             uregs.fp = fps[0];
   1424          }
   1425          if (0 == uregs.ra || 1 == uregs.ra) break;
   1426          uregs.pc = uregs.ra - 8;
   1427          ips[i++] = uregs.ra - 8;
   1428          RECURSIVE_MERGE(cmrf,ips,i);
   1429          continue;
   1430       }
   1431       /* No luck.  We have to give up. */
   1432       break;
   1433    }
   1434 
   1435    n_found = i;
   1436    return n_found;
   1437 }
   1438 
   1439 #endif
   1440 
   1441 /* ------------------------ tilegx ------------------------- */
   1442 #if defined(VGP_tilegx_linux)
   1443 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
   1444                                /*OUT*/Addr* ips, UInt max_n_ips,
   1445                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
   1446                                const UnwindStartRegs* startRegs,
   1447                                Addr fp_max_orig )
   1448 {
   1449    Bool  debug = False;
   1450    Int   i;
   1451    Addr  fp_max;
   1452    UInt  n_found = 0;
   1453    const Int cmrf = VG_(clo_merge_recursive_frames);
   1454 
   1455    vg_assert(sizeof(Addr) == sizeof(UWord));
   1456    vg_assert(sizeof(Addr) == sizeof(void*));
   1457 
   1458    D3UnwindRegs uregs;
   1459    uregs.pc = startRegs->r_pc;
   1460    uregs.sp = startRegs->r_sp;
   1461    Addr fp_min = uregs.sp - VG_STACK_REDZONE_SZB;
   1462 
   1463    uregs.fp = startRegs->misc.TILEGX.r52;
   1464    uregs.lr = startRegs->misc.TILEGX.r55;
   1465 
   1466    fp_max = VG_PGROUNDUP(fp_max_orig);
   1467    if (fp_max >= sizeof(Addr))
   1468       fp_max -= sizeof(Addr);
   1469 
   1470    if (debug)
   1471       VG_(printf)("max_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
   1472                   "fp_max=0x%lx pc=0x%lx sp=0x%lx fp=0x%lx\n",
   1473                   max_n_ips, fp_min, fp_max_orig, fp_max,
   1474                   uregs.pc, uregs.sp, uregs.fp);
   1475 
   1476    if (sps) sps[0] = uregs.sp;
   1477    if (fps) fps[0] = uregs.fp;
   1478    ips[0] = uregs.pc;
   1479    i = 1;
   1480 
   1481    /* Loop unwinding the stack. */
   1482    while (True) {
   1483       if (debug) {
   1484          VG_(printf)("i: %d, pc: 0x%lx, sp: 0x%lx, lr: 0x%lx\n",
   1485                      i, uregs.pc, uregs.sp, uregs.lr);
   1486      }
   1487      if (i >= max_n_ips)
   1488         break;
   1489 
   1490      D3UnwindRegs uregs_copy = uregs;
   1491      if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
   1492         if (debug)
   1493            VG_(printf)("USING CFI: pc: 0x%lx, sp: 0x%lx, fp: 0x%lx, lr: 0x%lx\n",
   1494                        uregs.pc, uregs.sp, uregs.fp, uregs.lr);
   1495         if (0 != uregs.pc && 1 != uregs.pc &&
   1496             (uregs.pc < fp_min || uregs.pc > fp_max)) {
   1497            if (sps) sps[i] = uregs.sp;
   1498            if (fps) fps[i] = uregs.fp;
   1499            if (uregs.pc != uregs_copy.pc && uregs.sp != uregs_copy.sp)
   1500               ips[i++] = uregs.pc - 8;
   1501            uregs.pc = uregs.pc - 8;
   1502            RECURSIVE_MERGE(cmrf,ips,i);
   1503            continue;
   1504         } else
   1505            uregs = uregs_copy;
   1506      }
   1507 
   1508      Long frame_offset = 0;
   1509      PtrdiffT offset;
   1510      if (VG_(get_inst_offset_in_function)(uregs.pc, &offset)) {
   1511         Addr start_pc = uregs.pc;
   1512         Addr limit_pc = uregs.pc - offset;
   1513         Addr cur_pc;
   1514         /* Try to find any stack adjustment from current instruction
   1515            bundles downward. */
   1516         for (cur_pc = start_pc; cur_pc > limit_pc; cur_pc -= 8) {
   1517            ULong inst;
   1518            Long off = 0;
   1519            ULong* cur_inst;
   1520            /* Fetch the instruction.   */
   1521            cur_inst = (ULong *)cur_pc;
   1522            inst = *cur_inst;
   1523            if(debug)
   1524               VG_(printf)("cur_pc: 0x%lx, inst: 0x%lx\n", cur_pc, inst);
   1525 
   1526            if ((inst & 0xC000000000000000ULL) == 0) {
   1527               /* Bundle is X type. */
   1528              if ((inst & 0xC000000070000fffULL) ==
   1529                  (0x0000000010000db6ULL)) {
   1530                 /* addli at X0 */
   1531                 off = (short)(0xFFFF & (inst >> 12));
   1532              } else if ((inst & 0xF80007ff80000000ULL) ==
   1533                         (0x000006db00000000ULL)) {
   1534                 /* addli at X1 addli*/
   1535                 off = (short)(0xFFFF & (inst >> 43));
   1536              } else if ((inst & 0xC00000007FF00FFFULL) ==
   1537                         (0x0000000040100db6ULL)) {
   1538                 /* addi at X0 */
   1539                 off = (char)(0xFF & (inst >> 12));
   1540              } else if ((inst & 0xFFF807ff80000000ULL) ==
   1541                         (0x180806db00000000ULL)) {
   1542                 /* addi at X1 */
   1543                 off = (char)(0xFF & (inst >> 43));
   1544              }
   1545            } else {
   1546               /* Bundle is Y type. */
   1547               if ((inst & 0x0000000078000FFFULL) ==
   1548                   (0x0000000000000db6ULL)) {
   1549                  /* addi at Y0 */
   1550                  off = (char)(0xFF & (inst >> 12));
   1551               } else if ((inst & 0x3C0007FF80000000ULL) ==
   1552                          (0x040006db00000000ULL)) {
   1553                  /* addi at Y1 */
   1554                  off = (char)(0xFF & (inst >> 43));
   1555               }
   1556            }
   1557 
   1558            if(debug && off)
   1559               VG_(printf)("offset: -0x%lx\n", -off);
   1560 
   1561            if (off < 0) {
   1562               /* frame offset should be modular of 8 */
   1563               vg_assert((off & 7) == 0);
   1564               frame_offset += off;
   1565            } else if (off > 0)
   1566               /* Exit loop if a positive stack adjustment is found, which
   1567                  usually means that the stack cleanup code in the function
   1568                  epilogue is reached.  */
   1569              break;
   1570         }
   1571      }
   1572 
   1573      if (frame_offset < 0) {
   1574         if (0 == uregs.pc || 1 == uregs.pc) break;
   1575 
   1576         /* Subtract the offset from the current stack. */
   1577         uregs.sp = uregs.sp + (ULong)(-frame_offset);
   1578 
   1579         if (debug)
   1580            VG_(printf)("offset: i: %d, pc: 0x%lx, sp: 0x%lx, lr: 0x%lx\n",
   1581                        i, uregs.pc, uregs.sp, uregs.lr);
   1582 
   1583         if (uregs.pc == uregs.lr - 8 ||
   1584             uregs.lr - 8 >= fp_min && uregs.lr - 8 <= fp_max) {
   1585            if (debug)
   1586               VG_(printf)("new lr = 0x%lx\n", *(ULong*)uregs.sp);
   1587            uregs.lr = *(ULong*)uregs.sp;
   1588         }
   1589 
   1590         uregs.pc = uregs.lr - 8;
   1591 
   1592         if (uregs.lr != 0) {
   1593            /* Avoid the invalid pc = 0xffff...ff8 */
   1594            if (sps)
   1595               sps[i] = uregs.sp;
   1596 
   1597            if (fps)
   1598               fps[i] = fps[0];
   1599 
   1600            ips[i++] = uregs.pc;
   1601 
   1602            RECURSIVE_MERGE(cmrf,ips,i);
   1603         }
   1604         continue;
   1605      }
   1606 
   1607      /* A special case for the 1st frame. Assume it was a bad jump.
   1608         Use the link register "lr" and current stack and frame to
   1609         try again. */
   1610      if (i == 1) {
   1611         if (sps) {
   1612            sps[1] = sps[0];
   1613            uregs.sp = sps[0];
   1614         }
   1615         if (fps) {
   1616            fps[1] = fps[0];
   1617            uregs.fp = fps[0];
   1618         }
   1619         if (0 == uregs.lr || 1 == uregs.lr)
   1620            break;
   1621 
   1622         uregs.pc = uregs.lr - 8;
   1623         ips[i++] = uregs.lr - 8;
   1624         RECURSIVE_MERGE(cmrf,ips,i);
   1625         continue;
   1626      }
   1627      /* No luck.  We have to give up. */
   1628      break;
   1629    }
   1630 
   1631    if (debug) {
   1632       /* Display the back trace. */
   1633       Int ii ;
   1634       for ( ii = 0; ii < i; ii++) {
   1635          if (sps) {
   1636             VG_(printf)("%d: pc=%lx  ", ii, ips[ii]);
   1637             VG_(printf)("sp=%lx\n", sps[ii]);
   1638          } else {
   1639             VG_(printf)("%d: pc=%lx\n", ii, ips[ii]);
   1640          }
   1641       }
   1642    }
   1643 
   1644    n_found = i;
   1645    return n_found;
   1646 }
   1647 #endif
   1648 
   1649 /*------------------------------------------------------------*/
   1650 /*---                                                      ---*/
   1651 /*--- END platform-dependent unwinder worker functions     ---*/
   1652 /*---                                                      ---*/
   1653 /*------------------------------------------------------------*/
   1654 
   1655 /*------------------------------------------------------------*/
   1656 /*--- Exported functions.                                  ---*/
   1657 /*------------------------------------------------------------*/
   1658 
   1659 UInt VG_(get_StackTrace) ( ThreadId tid,
   1660                            /*OUT*/StackTrace ips, UInt max_n_ips,
   1661                            /*OUT*/StackTrace sps,
   1662                            /*OUT*/StackTrace fps,
   1663                            Word first_ip_delta )
   1664 {
   1665    /* Get the register values with which to start the unwind. */
   1666    UnwindStartRegs startRegs;
   1667    VG_(memset)( &startRegs, 0, sizeof(startRegs) );
   1668    VG_(get_UnwindStartRegs)( &startRegs, tid );
   1669 
   1670    Addr stack_highest_byte = VG_(threads)[tid].client_stack_highest_byte;
   1671    Addr stack_lowest_byte  = 0;
   1672 
   1673 #  if defined(VGP_x86_linux)
   1674    /* Nasty little hack to deal with syscalls - if libc is using its
   1675       _dl_sysinfo_int80 function for syscalls (the TLS version does),
   1676       then ip will always appear to be in that function when doing a
   1677       syscall, not the actual libc function doing the syscall.  This
   1678       check sees if IP is within that function, and pops the return
   1679       address off the stack so that ip is placed within the library
   1680       function calling the syscall.  This makes stack backtraces much
   1681       more useful.
   1682 
   1683       The function is assumed to look like this (from glibc-2.3.6 sources):
   1684          _dl_sysinfo_int80:
   1685             int $0x80
   1686             ret
   1687       That is 3 (2+1) bytes long.  We could be more thorough and check
   1688       the 3 bytes of the function are as expected, but I can't be
   1689       bothered.
   1690    */
   1691    if (VG_(client__dl_sysinfo_int80) != 0 /* we know its address */
   1692        && startRegs.r_pc >= VG_(client__dl_sysinfo_int80)
   1693        && startRegs.r_pc < VG_(client__dl_sysinfo_int80)+3
   1694        && VG_(am_is_valid_for_client)(startRegs.r_pc, sizeof(Addr),
   1695                                       VKI_PROT_READ)) {
   1696       startRegs.r_pc  = (ULong) *(Addr*)(UWord)startRegs.r_sp;
   1697       startRegs.r_sp += (ULong) sizeof(Addr);
   1698    }
   1699 #  endif
   1700 
   1701    /* See if we can get a better idea of the stack limits */
   1702    VG_(stack_limits)( (Addr)startRegs.r_sp,
   1703                       &stack_lowest_byte, &stack_highest_byte );
   1704 
   1705    /* Take into account the first_ip_delta. */
   1706    startRegs.r_pc += (Long)(Word)first_ip_delta;
   1707 
   1708    if (0)
   1709       VG_(printf)("tid %u: stack_highest=0x%08lx ip=0x%010llx "
   1710                   "sp=0x%010llx\n",
   1711                   tid, stack_highest_byte,
   1712                   startRegs.r_pc, startRegs.r_sp);
   1713 
   1714    return VG_(get_StackTrace_wrk)(tid, ips, max_n_ips,
   1715                                        sps, fps,
   1716                                        &startRegs,
   1717                                        stack_highest_byte);
   1718 }
   1719 
   1720 static void printIpDesc(UInt n, Addr ip, void* uu_opaque)
   1721 {
   1722    InlIPCursor *iipc = VG_(new_IIPC)(ip);
   1723 
   1724    do {
   1725       const HChar *buf = VG_(describe_IP)(ip, iipc);
   1726       if (VG_(clo_xml)) {
   1727          VG_(printf_xml)("    %s\n", buf);
   1728       } else {
   1729          VG_(message)(Vg_UserMsg, "   %s %s\n",
   1730                       ( n == 0 ? "at" : "by" ), buf);
   1731       }
   1732       n++;
   1733       // Increase n to show "at" for only one level.
   1734    } while (VG_(next_IIPC)(iipc));
   1735    VG_(delete_IIPC)(iipc);
   1736 }
   1737 
   1738 /* Print a StackTrace. */
   1739 void VG_(pp_StackTrace) ( StackTrace ips, UInt n_ips )
   1740 {
   1741    vg_assert( n_ips > 0 );
   1742 
   1743    if (VG_(clo_xml))
   1744       VG_(printf_xml)("  <stack>\n");
   1745 
   1746    VG_(apply_StackTrace)( printIpDesc, NULL, ips, n_ips );
   1747 
   1748    if (VG_(clo_xml))
   1749       VG_(printf_xml)("  </stack>\n");
   1750 }
   1751 
   1752 /* Get and immediately print a StackTrace. */
   1753 void VG_(get_and_pp_StackTrace) ( ThreadId tid, UInt max_n_ips )
   1754 {
   1755    Addr ips[max_n_ips];
   1756    UInt n_ips
   1757       = VG_(get_StackTrace)(tid, ips, max_n_ips,
   1758                             NULL/*array to dump SP values in*/,
   1759                             NULL/*array to dump FP values in*/,
   1760                             0/*first_ip_delta*/);
   1761    VG_(pp_StackTrace)(ips, n_ips);
   1762 }
   1763 
   1764 void VG_(apply_StackTrace)(
   1765         void(*action)(UInt n, Addr ip, void* opaque),
   1766         void* opaque,
   1767         StackTrace ips, UInt n_ips
   1768      )
   1769 {
   1770    Bool main_done = False;
   1771    Int i = 0;
   1772 
   1773    vg_assert(n_ips > 0);
   1774    do {
   1775       Addr ip = ips[i];
   1776 
   1777       // Stop after the first appearance of "main" or one of the other names
   1778       // (the appearance of which is a pretty good sign that we've gone past
   1779       // main without seeing it, for whatever reason)
   1780       if ( ! VG_(clo_show_below_main) ) {
   1781          Vg_FnNameKind kind = VG_(get_fnname_kind_from_IP)(ip);
   1782          if (Vg_FnNameMain == kind || Vg_FnNameBelowMain == kind) {
   1783             main_done = True;
   1784          }
   1785       }
   1786 
   1787       // Act on the ip
   1788       action(i, ip, opaque);
   1789 
   1790       i++;
   1791    } while (i < n_ips && !main_done);
   1792 }
   1793 
   1794 
   1795 /*--------------------------------------------------------------------*/
   1796 /*--- end                                                          ---*/
   1797 /*--------------------------------------------------------------------*/
   1798