Home | History | Annotate | Download | only in coregrind
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Take snapshots of client stacks.              m_stacktrace.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2013 Julian Seward
     11       jseward (at) acm.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #include "pub_core_basics.h"
     32 #include "pub_core_vki.h"
     33 #include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
     34 #include "pub_core_threadstate.h"
     35 #include "pub_core_debuginfo.h"     // XXX: circular dependency
     36 #include "pub_core_aspacemgr.h"     // For VG_(is_addressable)()
     37 #include "pub_core_libcbase.h"
     38 #include "pub_core_libcassert.h"
     39 #include "pub_core_libcprint.h"
     40 #include "pub_core_machine.h"
     41 #include "pub_core_options.h"
     42 #include "pub_core_stacks.h"        // VG_(stack_limits)
     43 #include "pub_core_stacktrace.h"
     44 #include "pub_core_xarray.h"
     45 #include "pub_core_clientstate.h"   // VG_(client__dl_sysinfo_int80)
     46 #include "pub_core_trampoline.h"
     47 
     48 
     49 /*------------------------------------------------------------*/
     50 /*---                                                      ---*/
     51 /*--- BEGIN platform-dependent unwinder worker functions   ---*/
     52 /*---                                                      ---*/
     53 /*------------------------------------------------------------*/
     54 
     55 /* Take a snapshot of the client's stack, putting up to 'max_n_ips'
     56    IPs into 'ips'.  In order to be thread-safe, we pass in the
     57    thread's IP SP, FP if that's meaningful, and LR if that's
     58    meaningful.  Returns number of IPs put in 'ips'.
     59 
     60    If you know what the thread ID for this stack is, send that as the
     61    first parameter, else send zero.  This helps generate better stack
     62    traces on ppc64-linux and has no effect on other platforms.
     63 */
     64 
     65 /* Do frame merging in the _i frames in _ips array of recursive cycles
     66    of up to _nframes.  The merge is done during stack unwinding
     67    (i.e. in platform specific unwinders) to collect as many
     68    "interesting" stack traces as possible. */
     69 #define RECURSIVE_MERGE(_nframes,_ips,_i){                      \
     70    Int dist;                                                    \
     71    for (dist = 1; dist <= _nframes && dist < (Int)_i; dist++) { \
     72       if (_ips[_i-1] == _ips[_i-1-dist]) {                      \
     73          _i = _i - dist;                                        \
     74          break;                                                 \
     75       }                                                         \
     76    }                                                            \
     77 }
     78 
     79 
     80 /* ------------------------ x86 ------------------------- */
     81 
     82 #if defined(VGP_x86_linux) || defined(VGP_x86_darwin)
     83 
     84 #define N_FP_CF_VERIF 1021
     85 // prime number so that size of fp_CF_verif is just below 4K or 8K
     86 // Note that this prime nr differs from the one chosen in
     87 // m_debuginfo/debuginfo.c for the cfsi cache : in case we have
     88 // a collision here between two IPs, we expect to not (often) have the
     89 // same collision in the cfsi cache (and vice-versa).
     90 
     91 // unwinding with fp chain is ok:
     92 #define FPUNWIND 0
     93 // there is no CFI info for this IP:
     94 #define NOINFO   1
     95 // Unwind with FP is not ok, must use CF unwind:
     96 #define CFUNWIND 2
     97 
     98 static Addr fp_CF_verif_cache [N_FP_CF_VERIF];
     99 
    100 /* An unwind done by following the fp chain technique can be incorrect
    101    as not all frames are respecting the standard bp/sp ABI.
    102    The CF information is now generated by default by gcc
    103    (as part of the dwarf info). However, unwinding using CF information
    104    is significantly slower : a slowdown of 20% has been observed
    105    on an helgrind test case.
    106    So, by default, the unwinding will be done using the fp chain.
    107    But before accepting to unwind an IP with fp_chain, the result
    108    of the unwind will be checked with the CF information.
    109    This check can give 3 results:
    110      FPUNWIND (0): there is CF info, and it gives the same result as fp unwind.
    111        => it is assumed that future unwind for this IP can be done
    112           with the fast fp chain, without further CF checking
    113      NOINFO   (1): there is no CF info (so, fp unwind is the only do-able thing)
    114      CFUNWIND (2): there is CF info, but unwind result differs.
    115        => it is assumed that future unwind for this IP must be done
    116        with the CF info.
    117    Of course, if each fp unwind implies a check done with a CF unwind,
    118    it would just be slower => we cache the check result in an
    119    array of checked Addr.
    120    The check for an IP will be stored at
    121     fp_CF_verif_cache[IP % N_FP_CF_VERIF] as one of:
    122                      IP ^ FPUNWIND
    123                      IP ^ NOINFO
    124                      IP ^ CFUNWIND
    125 
    126    Note: we can re-use the last (ROUNDDOWN (log (N_FP_CF_VERIF))) bits
    127    to store the check result, as they are guaranteed to be non significant
    128    in the comparison between 2 IPs stored in fp_CF_verif_cache).
    129    In other words, if two IPs are only differing on the last 2 bits,
    130    then they will not land in the same cache bucket.
    131 */
    132 
    133 static UInt fp_CF_verif_generation = 0;
    134 // Our cache has to be maintained in sync with the CFI cache.
    135 // Each time the CFI cache is changed, its generation will be incremented.
    136 // We will clear our cache when our saved generation differs from
    137 // the CFI cache generation.
    138 
    139 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
    140                                /*OUT*/Addr* ips, UInt max_n_ips,
    141                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
    142                                UnwindStartRegs* startRegs,
    143                                Addr fp_max_orig )
    144 {
    145    const Bool do_stats = False; // compute and output some stats regularly.
    146    static struct {
    147       UInt nr; // nr of stacktraces computed
    148       UInt nf; // nr of frames computed
    149       UInt Ca; // unwind for which cache indicates CFUnwind must be used.
    150       UInt FF; // unwind for which cache indicates FPUnwind can be used.
    151       UInt Cf; // unwind at end of stack+store CFUNWIND (xip not end of stack).
    152       UInt Fw; // unwind at end of stack+store FPUNWIND
    153       UInt FO; // unwind + store FPUNWIND
    154       UInt CF; // unwind + store CFUNWIND. Details below.
    155       UInt xi; UInt xs; UInt xb; // register(s) which caused a 'store CFUNWIND'.
    156       UInt Ck; // unwind fp invalid+store FPUNWIND
    157       UInt MS; // microsoft unwind
    158    } stats;
    159 
    160    const Bool   debug = False;
    161    //                 = VG_(debugLog_getLevel) () > 3;
    162    //                 = True;
    163    //                 = stats.nr >= 123456;
    164    const HChar* unwind_case; // used when debug is True.
    165    // Debugging this function is not straightforward.
    166    // Here is the easiest way I have found:
    167    // 1. Change the above to True.
    168    // 2. Start your program under Valgrind with --tool=none --vgdb-error=0
    169    // 3. Use GDB/vgdb to put a breakpoint where you want to debug the stacktrace
    170    // 4. Continue till breakpoint is encountered
    171    // 5. From GDB, use 'monitor v.info scheduler' and examine the unwind traces.
    172    //    You might have to do twice 'monitor v.info scheduler' to see
    173    //    the effect of caching the results of the verification.
    174    //    You can also modify the debug dynamically using by using
    175    //    'monitor v.set debuglog 4.
    176 
    177    Int   i;
    178    Addr  fp_max;
    179    UInt  n_found = 0;
    180    const Int cmrf = VG_(clo_merge_recursive_frames);
    181 
    182    vg_assert(sizeof(Addr) == sizeof(UWord));
    183    vg_assert(sizeof(Addr) == sizeof(void*));
    184 
    185    D3UnwindRegs fpverif_uregs; // result of CF unwind for a check reason.
    186    Addr xip_verified = 0; // xip for which we have calculated fpverif_uregs
    187    // 0 assigned to silence false positive -Wuninitialized warning
    188    // This is a false positive as xip_verified is assigned when
    189    // xip_verif > CFUNWIND and only used if xip_verif > CFUNWIND.
    190 
    191    D3UnwindRegs uregs;
    192    uregs.xip = (Addr)startRegs->r_pc;
    193    uregs.xsp = (Addr)startRegs->r_sp;
    194    uregs.xbp = startRegs->misc.X86.r_ebp;
    195    Addr fp_min = uregs.xsp;
    196 
    197    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
    198       stopping when the trail goes cold, which we guess to be
    199       when FP is not a reasonable stack location. */
    200 
    201    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
    202    // current page, at least.  Dunno if it helps.
    203    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
    204    fp_max = VG_PGROUNDUP(fp_max_orig);
    205    if (fp_max >= sizeof(Addr))
    206       fp_max -= sizeof(Addr);
    207 
    208    if (debug)
    209       VG_(printf)("max_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
    210                   "fp_max=0x%lx ip=0x%lx fp=0x%lx\n",
    211                   max_n_ips, fp_min, fp_max_orig, fp_max,
    212                   uregs.xip, uregs.xbp);
    213 
    214    /* Assertion broken before main() is reached in pthreaded programs;  the
    215     * offending stack traces only have one item.  --njn, 2002-aug-16 */
    216    /* vg_assert(fp_min <= fp_max);*/
    217    // On Darwin, this kicks in for pthread-related stack traces, so they're
    218    // only 1 entry long which is wrong.
    219 #  if !defined(VGO_darwin)
    220    if (fp_min + 512 >= fp_max) {
    221       /* If the stack limits look bogus, don't poke around ... but
    222          don't bomb out either. */
    223       if (sps) sps[0] = uregs.xsp;
    224       if (fps) fps[0] = uregs.xbp;
    225       ips[0] = uregs.xip;
    226       return 1;
    227    }
    228 #  endif
    229 
    230    if (UNLIKELY (fp_CF_verif_generation != VG_(CF_info_generation)())) {
    231       fp_CF_verif_generation = VG_(CF_info_generation)();
    232       VG_(memset)(&fp_CF_verif_cache, 0, sizeof(fp_CF_verif_cache));
    233    }
    234 
    235 
    236    /* Loop unwinding the stack. Note that the IP value we get on
    237     * each pass (whether from CFI info or a stack frame) is a
    238     * return address so is actually after the calling instruction
    239     * in the calling function.
    240     *
    241     * Because of this we subtract one from the IP after each pass
    242     * of the loop so that we find the right CFI block on the next
    243     * pass - otherwise we can find the wrong CFI info if it happens
    244     * to change after the calling instruction and that will mean
    245     * that we will fail to unwind the next step.
    246     *
    247     * This most frequently happens at the end of a function when
    248     * a tail call occurs and we wind up using the CFI info for the
    249     * next function which is completely wrong.
    250     */
    251    if (sps) sps[0] = uregs.xsp;
    252    if (fps) fps[0] = uregs.xbp;
    253    ips[0] = uregs.xip;
    254    i = 1;
    255    if (do_stats) stats.nr++;
    256 
    257    while (True) {
    258 
    259       if (i >= max_n_ips)
    260          break;
    261 
    262       UWord hash = uregs.xip % N_FP_CF_VERIF;
    263       Addr xip_verif = uregs.xip ^ fp_CF_verif_cache [hash];
    264       if (debug)
    265          VG_(printf)("     uregs.xip 0x%08lx xip_verif[0x%08lx]\n",
    266                      uregs.xip, xip_verif);
    267       // If xip is in cache, then xip_verif will be <= CFUNWIND.
    268       // Otherwise, if not in cache, xip_verif will be > CFUNWIND.
    269 
    270       /* Try to derive a new (ip,sp,fp) triple from the current set. */
    271 
    272       /* Do we have to do CFI unwinding ?
    273          We do CFI unwinding if one of the following condition holds:
    274          a. fp_CF_verif_cache contains xip but indicates CFUNWIND must
    275             be done (i.e. fp unwind check failed when we did the first
    276             unwind for this IP).
    277          b. fp_CF_verif_cache does not contain xip.
    278             We will try CFI unwinding in fpverif_uregs and compare with
    279             FP unwind result to insert xip in the cache with the correct
    280             indicator. */
    281       if (UNLIKELY(xip_verif >= CFUNWIND)) {
    282          if (xip_verif == CFUNWIND) {
    283             /* case a : do "real" cfi unwind */
    284             if ( VG_(use_CF_info)( &uregs, fp_min, fp_max ) ) {
    285                if (debug) unwind_case = "Ca";
    286                if (do_stats) stats.Ca++;
    287                goto unwind_done;
    288             }
    289             /* ??? cache indicates we have to do CFI unwind (so, we
    290              previously found CFI info, and failed the fp unwind
    291              check). Now, we just failed with CFI.  So, once we
    292              succeed, once we fail.  No idea what is going on =>
    293              cleanup the cache entry and fallover to fp unwind (this
    294              time). */
    295             fp_CF_verif_cache [hash] = 0;
    296             if (debug) VG_(printf)("     cache reset as CFI ok then nok\n");
    297             //??? stats
    298             xip_verif = NOINFO;
    299          } else {
    300             /* case b : do "verif" cfi unwind in fpverif_uregs */
    301             fpverif_uregs = uregs;
    302             xip_verified = uregs.xip;
    303             if ( !VG_(use_CF_info)( &fpverif_uregs, fp_min, fp_max ) ) {
    304                fp_CF_verif_cache [hash] = uregs.xip ^ NOINFO;
    305                if (debug) VG_(printf)("     cache NOINFO fpverif_uregs\n");
    306                xip_verif = NOINFO;
    307             }
    308          }
    309       }
    310 
    311       /* On x86, try the old-fashioned method of following the
    312          %ebp-chain.  This can be done if the fp_CF_verif_cache for xip
    313          indicate fp unwind is ok. This must be done if the cache indicates
    314          there is no info. This is also done to confirm what to put in the cache
    315          if xip was not in the cache. */
    316       /* This deals with frames resulting from functions which begin "pushl%
    317          ebp ; movl %esp, %ebp" which is the ABI-mandated preamble. */
    318       if (fp_min <= uregs.xbp &&
    319           uregs.xbp <= fp_max - 1 * sizeof(UWord)/*see comment below*/)
    320       {
    321          /* fp looks sane, so use it. */
    322          uregs.xip = (((UWord*)uregs.xbp)[1]);
    323          // We stop if we hit a zero (the traditional end-of-stack
    324          // marker) or a one -- these correspond to recorded IPs of 0 or -1.
    325          // The latter because r8818 (in this file) changes the meaning of
    326          // entries [1] and above in a stack trace, by subtracting 1 from
    327          // them.  Hence stacks that used to end with a zero value now end in
    328          // -1 and so we must detect that too.
    329          if (0 == uregs.xip || 1 == uregs.xip) {
    330             if (xip_verif > CFUNWIND) {
    331                // Check if we obtain the same result with fp unwind.
    332                // If same result, then mark xip as fp unwindable
    333                if (uregs.xip == fpverif_uregs.xip) {
    334                   fp_CF_verif_cache [hash] = xip_verified ^ FPUNWIND;
    335                   if (debug) VG_(printf)("     cache FPUNWIND 0\n");
    336                   unwind_case = "Fw";
    337                   if (do_stats) stats.Fw++;
    338                   break;
    339                } else {
    340                   fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
    341                   uregs = fpverif_uregs;
    342                   if (debug) VG_(printf)("     cache CFUNWIND 0\n");
    343                   unwind_case = "Cf";
    344                   if (do_stats) stats.Cf++;
    345                   goto unwind_done;
    346                }
    347             } else {
    348                // end of stack => out of the loop.
    349                break;
    350             }
    351          }
    352 
    353          uregs.xsp = uregs.xbp + sizeof(Addr) /*saved %ebp*/
    354                                + sizeof(Addr) /*ra*/;
    355          uregs.xbp = (((UWord*)uregs.xbp)[0]);
    356          if (xip_verif > CFUNWIND) {
    357             if (uregs.xip == fpverif_uregs.xip
    358                 && uregs.xsp == fpverif_uregs.xsp
    359                 && uregs.xbp == fpverif_uregs.xbp) {
    360                fp_CF_verif_cache [hash] = xip_verified ^ FPUNWIND;
    361                if (debug) VG_(printf)("     cache FPUNWIND >2\n");
    362                if (debug) unwind_case = "FO";
    363                if (do_stats) stats.FO++;
    364             } else {
    365                fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
    366                if (debug) VG_(printf)("     cache CFUNWIND >2\n");
    367                if (do_stats && uregs.xip != fpverif_uregs.xip) stats.xi++;
    368                if (do_stats && uregs.xsp != fpverif_uregs.xsp) stats.xs++;
    369                if (do_stats && uregs.xbp != fpverif_uregs.xbp) stats.xb++;
    370                uregs = fpverif_uregs;
    371                if (debug) unwind_case = "CF";
    372                if (do_stats) stats.CF++;
    373             }
    374          } else {
    375             if (debug) unwind_case = "FF";
    376             if (do_stats) stats.FF++;
    377          }
    378          goto unwind_done;
    379       } else {
    380          // fp unwind has failed.
    381          // If we were checking the validity of the cfi unwinding,
    382          // we mark in the cache that the fp unwind cannot be done, and that
    383          // cfi unwind is desired.
    384          if (xip_verif > CFUNWIND) {
    385             // We know that fpverif_uregs contains valid information,
    386             // as a failed cf unwind would have put NOINFO in xip_verif.
    387             fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
    388             if (debug) VG_(printf)("     cache CFUNWIND as fp failed\n");
    389             uregs = fpverif_uregs;
    390             if (debug) unwind_case = "Ck";
    391             if (do_stats) stats.Ck++;
    392             goto unwind_done;
    393          }
    394          // xip_verif is FPUNWIND or NOINFO.
    395          // We failed the cfi unwind and/or the fp unwind.
    396          // => fallback to FPO info.
    397       }
    398 
    399       /* And, similarly, try for MSVC FPO unwind info. */
    400       if ( VG_(use_FPO_info)( &uregs.xip, &uregs.xsp, &uregs.xbp,
    401                               fp_min, fp_max ) ) {
    402          if (debug) unwind_case = "MS";
    403          if (do_stats) stats.MS++;
    404          goto unwind_done;
    405       }
    406 
    407       /* No luck.  We have to give up. */
    408       break;
    409 
    410    unwind_done:
    411       /* Add a frame in ips/sps/fps */
    412       /* fp is %ebp.  sp is %esp.  ip is %eip. */
    413       if (0 == uregs.xip || 1 == uregs.xip) break;
    414       if (sps) sps[i] = uregs.xsp;
    415       if (fps) fps[i] = uregs.xbp;
    416       ips[i++] = uregs.xip - 1;
    417       /* -1: refer to calling insn, not the RA */
    418       if (debug)
    419          VG_(printf)("     ips%s[%d]=0x%08lx\n", unwind_case, i-1, ips[i-1]);
    420       uregs.xip = uregs.xip - 1;
    421       /* as per comment at the head of this loop */
    422       if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
    423    }
    424 
    425    if (do_stats) stats.nf += i;
    426    if (do_stats && stats.nr % 10000 == 0) {
    427      VG_(printf)("nr %u nf %u "
    428                  "Ca %u FF %u "
    429                  "Cf %u "
    430                  "Fw %u FO %u "
    431                  "CF %u (xi %u xs %u xb %u) "
    432                  "Ck %u MS %u\n",
    433                  stats.nr, stats.nf,
    434                  stats.Ca, stats.FF,
    435                  stats.Cf,
    436                  stats.Fw, stats.FO,
    437                  stats.CF, stats.xi, stats.xs, stats.xb,
    438                  stats.Ck, stats.MS);
    439    }
    440    n_found = i;
    441    return n_found;
    442 }
    443 
    444 #undef N_FP_CF_VERIF
    445 #undef FPUNWIND
    446 #undef NOINFO
    447 #undef CFUNWIND
    448 
    449 #endif
    450 
    451 /* ----------------------- amd64 ------------------------ */
    452 
    453 #if defined(VGP_amd64_linux) || defined(VGP_amd64_darwin)
    454 
    455 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
    456                                /*OUT*/Addr* ips, UInt max_n_ips,
    457                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
    458                                UnwindStartRegs* startRegs,
    459                                Addr fp_max_orig )
    460 {
    461    Bool  debug = False;
    462    Int   i;
    463    Addr  fp_max;
    464    UInt  n_found = 0;
    465    const Int cmrf = VG_(clo_merge_recursive_frames);
    466 
    467    vg_assert(sizeof(Addr) == sizeof(UWord));
    468    vg_assert(sizeof(Addr) == sizeof(void*));
    469 
    470    D3UnwindRegs uregs;
    471    uregs.xip = startRegs->r_pc;
    472    uregs.xsp = startRegs->r_sp;
    473    uregs.xbp = startRegs->misc.AMD64.r_rbp;
    474    Addr fp_min = uregs.xsp;
    475 
    476    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
    477       stopping when the trail goes cold, which we guess to be
    478       when FP is not a reasonable stack location. */
    479 
    480    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
    481    // current page, at least.  Dunno if it helps.
    482    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
    483    fp_max = VG_PGROUNDUP(fp_max_orig);
    484    if (fp_max >= sizeof(Addr))
    485       fp_max -= sizeof(Addr);
    486 
    487    if (debug)
    488       VG_(printf)("max_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
    489                   "fp_max=0x%lx ip=0x%lx fp=0x%lx\n",
    490                   max_n_ips, fp_min, fp_max_orig, fp_max,
    491                   uregs.xip, uregs.xbp);
    492 
    493    /* Assertion broken before main() is reached in pthreaded programs;  the
    494     * offending stack traces only have one item.  --njn, 2002-aug-16 */
    495    /* vg_assert(fp_min <= fp_max);*/
    496    // On Darwin, this kicks in for pthread-related stack traces, so they're
    497    // only 1 entry long which is wrong.
    498 #  if !defined(VGO_darwin)
    499    if (fp_min + 256 >= fp_max) {
    500       /* If the stack limits look bogus, don't poke around ... but
    501          don't bomb out either. */
    502       if (sps) sps[0] = uregs.xsp;
    503       if (fps) fps[0] = uregs.xbp;
    504       ips[0] = uregs.xip;
    505       return 1;
    506    }
    507 #  endif
    508 
    509    /* fp is %rbp.  sp is %rsp.  ip is %rip. */
    510 
    511    ips[0] = uregs.xip;
    512    if (sps) sps[0] = uregs.xsp;
    513    if (fps) fps[0] = uregs.xbp;
    514    i = 1;
    515 
    516    /* Loop unwinding the stack. Note that the IP value we get on
    517     * each pass (whether from CFI info or a stack frame) is a
    518     * return address so is actually after the calling instruction
    519     * in the calling function.
    520     *
    521     * Because of this we subtract one from the IP after each pass
    522     * of the loop so that we find the right CFI block on the next
    523     * pass - otherwise we can find the wrong CFI info if it happens
    524     * to change after the calling instruction and that will mean
    525     * that we will fail to unwind the next step.
    526     *
    527     * This most frequently happens at the end of a function when
    528     * a tail call occurs and we wind up using the CFI info for the
    529     * next function which is completely wrong.
    530     */
    531    while (True) {
    532 
    533       if (i >= max_n_ips)
    534          break;
    535 
    536       /* Try to derive a new (ip,sp,fp) triple from the current set. */
    537 
    538       /* First off, see if there is any CFI info to hand which can
    539          be used. */
    540       if ( VG_(use_CF_info)( &uregs, fp_min, fp_max ) ) {
    541          if (0 == uregs.xip || 1 == uregs.xip) break;
    542          if (sps) sps[i] = uregs.xsp;
    543          if (fps) fps[i] = uregs.xbp;
    544          ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
    545          if (debug)
    546             VG_(printf)("     ipsC[%d]=%#08lx\n", i-1, ips[i-1]);
    547          uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
    548          if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
    549          continue;
    550       }
    551 
    552       /* If VG_(use_CF_info) fails, it won't modify ip/sp/fp, so
    553          we can safely try the old-fashioned method. */
    554       /* This bit is supposed to deal with frames resulting from
    555          functions which begin "pushq %rbp ; movq %rsp, %rbp".
    556          Unfortunately, since we can't (easily) look at the insns at
    557          the start of the fn, like GDB does, there's no reliable way
    558          to tell.  Hence the hack of first trying out CFI, and if that
    559          fails, then use this as a fallback. */
    560       /* Note: re "- 1 * sizeof(UWord)", need to take account of the
    561          fact that we are prodding at & ((UWord*)fp)[1] and so need to
    562          adjust the limit check accordingly.  Omitting this has been
    563          observed to cause segfaults on rare occasions. */
    564       if (fp_min <= uregs.xbp && uregs.xbp <= fp_max - 1 * sizeof(UWord)) {
    565          /* fp looks sane, so use it. */
    566          uregs.xip = (((UWord*)uregs.xbp)[1]);
    567          if (0 == uregs.xip || 1 == uregs.xip) break;
    568          uregs.xsp = uregs.xbp + sizeof(Addr) /*saved %rbp*/
    569                                + sizeof(Addr) /*ra*/;
    570          uregs.xbp = (((UWord*)uregs.xbp)[0]);
    571          if (sps) sps[i] = uregs.xsp;
    572          if (fps) fps[i] = uregs.xbp;
    573          ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
    574          if (debug)
    575             VG_(printf)("     ipsF[%d]=%#08lx\n", i-1, ips[i-1]);
    576          uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
    577          if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
    578          continue;
    579       }
    580 
    581       /* Last-ditch hack (evidently GDB does something similar).  We
    582          are in the middle of nowhere and we have a nonsense value for
    583          the frame pointer.  If the stack pointer is still valid,
    584          assume that what it points at is a return address.  Yes,
    585          desperate measures.  Could do better here:
    586          - check that the supposed return address is in
    587            an executable page
    588          - check that the supposed return address is just after a call insn
    589          - given those two checks, don't just consider *sp as the return
    590            address; instead scan a likely section of stack (eg sp .. sp+256)
    591            and use suitable values found there.
    592       */
    593       if (fp_min <= uregs.xsp && uregs.xsp < fp_max) {
    594          uregs.xip = ((UWord*)uregs.xsp)[0];
    595          if (0 == uregs.xip || 1 == uregs.xip) break;
    596          if (sps) sps[i] = uregs.xsp;
    597          if (fps) fps[i] = uregs.xbp;
    598          ips[i++] = uregs.xip == 0
    599                     ? 0 /* sp[0] == 0 ==> stuck at the bottom of a
    600                            thread stack */
    601                     : uregs.xip - 1;
    602                         /* -1: refer to calling insn, not the RA */
    603          if (debug)
    604             VG_(printf)("     ipsH[%d]=%#08lx\n", i-1, ips[i-1]);
    605          uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
    606          uregs.xsp += 8;
    607          if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
    608          continue;
    609       }
    610 
    611       /* No luck at all.  We have to give up. */
    612       break;
    613    }
    614 
    615    n_found = i;
    616    return n_found;
    617 }
    618 
    619 #endif
    620 
    621 /* -----------------------ppc32/64 ---------------------- */
    622 
    623 #if defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
    624 
    625 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
    626                                /*OUT*/Addr* ips, UInt max_n_ips,
    627                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
    628                                UnwindStartRegs* startRegs,
    629                                Addr fp_max_orig )
    630 {
    631    Bool  lr_is_first_RA = False;
    632 #  if defined(VG_PLAT_USES_PPCTOC)
    633    Word redir_stack_size = 0;
    634    Word redirs_used      = 0;
    635 #  endif
    636    const Int cmrf = VG_(clo_merge_recursive_frames);
    637 
    638    Bool  debug = False;
    639    Int   i;
    640    Addr  fp_max;
    641    UInt  n_found = 0;
    642 
    643    vg_assert(sizeof(Addr) == sizeof(UWord));
    644    vg_assert(sizeof(Addr) == sizeof(void*));
    645 
    646    Addr ip = (Addr)startRegs->r_pc;
    647    Addr sp = (Addr)startRegs->r_sp;
    648    Addr fp = sp;
    649 #  if defined(VGP_ppc32_linux)
    650    Addr lr = startRegs->misc.PPC32.r_lr;
    651 #  elif defined(VGP_ppc64_linux)
    652    Addr lr = startRegs->misc.PPC64.r_lr;
    653 #  endif
    654    Addr fp_min = sp;
    655 
    656    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
    657       stopping when the trail goes cold, which we guess to be
    658       when FP is not a reasonable stack location. */
    659 
    660    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
    661    // current page, at least.  Dunno if it helps.
    662    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
    663    fp_max = VG_PGROUNDUP(fp_max_orig);
    664    if (fp_max >= sizeof(Addr))
    665       fp_max -= sizeof(Addr);
    666 
    667    if (debug)
    668       VG_(printf)("max_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
    669                   "fp_max=0x%lx ip=0x%lx fp=0x%lx\n",
    670 		  max_n_ips, fp_min, fp_max_orig, fp_max, ip, fp);
    671 
    672    /* Assertion broken before main() is reached in pthreaded programs;  the
    673     * offending stack traces only have one item.  --njn, 2002-aug-16 */
    674    /* vg_assert(fp_min <= fp_max);*/
    675    if (fp_min + 512 >= fp_max) {
    676       /* If the stack limits look bogus, don't poke around ... but
    677          don't bomb out either. */
    678       if (sps) sps[0] = sp;
    679       if (fps) fps[0] = fp;
    680       ips[0] = ip;
    681       return 1;
    682    }
    683 
    684    /* fp is %r1.  ip is %cia.  Note, ppc uses r1 as both the stack and
    685       frame pointers. */
    686 
    687 #  if defined(VGP_ppc64_linux)
    688    redir_stack_size = VEX_GUEST_PPC64_REDIR_STACK_SIZE;
    689    redirs_used      = 0;
    690 #  endif
    691 
    692 #  if defined(VG_PLAT_USES_PPCTOC)
    693    /* Deal with bogus LR values caused by function
    694       interception/wrapping on ppc-TOC platforms; see comment on
    695       similar code a few lines further down. */
    696    if (ULong_to_Ptr(lr) == (void*)&VG_(ppctoc_magic_redirect_return_stub)
    697        && VG_(is_valid_tid)(tid_if_known)) {
    698       Word hsp = VG_(threads)[tid_if_known].arch.vex.guest_REDIR_SP;
    699       redirs_used++;
    700       if (hsp >= 1 && hsp < redir_stack_size)
    701          lr = VG_(threads)[tid_if_known]
    702                  .arch.vex.guest_REDIR_STACK[hsp-1];
    703    }
    704 #  endif
    705 
    706    /* We have to determine whether or not LR currently holds this fn
    707       (call it F)'s return address.  It might not if F has previously
    708       called some other function, hence overwriting LR with a pointer
    709       to some part of F.  Hence if LR and IP point to the same
    710       function then we conclude LR does not hold this function's
    711       return address; instead the LR at entry must have been saved in
    712       the stack by F's prologue and so we must get it from there
    713       instead.  Note all this guff only applies to the innermost
    714       frame. */
    715    lr_is_first_RA = False;
    716    {
    717 #     define M_VG_ERRTXT 1000
    718       HChar buf_lr[M_VG_ERRTXT], buf_ip[M_VG_ERRTXT];
    719       /* The following conditional looks grossly inefficient and
    720          surely could be majorly improved, with not much effort. */
    721       if (VG_(get_fnname_raw) (lr, buf_lr, M_VG_ERRTXT))
    722          if (VG_(get_fnname_raw) (ip, buf_ip, M_VG_ERRTXT))
    723             if (VG_(strncmp)(buf_lr, buf_ip, M_VG_ERRTXT))
    724                lr_is_first_RA = True;
    725 #     undef M_VG_ERRTXT
    726    }
    727 
    728    if (sps) sps[0] = fp; /* NB. not sp */
    729    if (fps) fps[0] = fp;
    730    ips[0] = ip;
    731    i = 1;
    732 
    733    if (fp_min <= fp && fp < fp_max-VG_WORDSIZE+1) {
    734 
    735       /* initial FP is sane; keep going */
    736       fp = (((UWord*)fp)[0]);
    737 
    738       while (True) {
    739 
    740         /* On ppc64-linux (ppc64-elf, really), the lr save
    741            slot is 2 words back from sp, whereas on ppc32-elf(?) it's
    742            only one word back. */
    743 #        if defined(VG_PLAT_USES_PPCTOC)
    744          const Int lr_offset = 2;
    745 #        else
    746          const Int lr_offset = 1;
    747 #        endif
    748 
    749          if (i >= max_n_ips)
    750             break;
    751 
    752          /* Try to derive a new (ip,fp) pair from the current set. */
    753 
    754          if (fp_min <= fp && fp <= fp_max - lr_offset * sizeof(UWord)) {
    755             /* fp looks sane, so use it. */
    756 
    757             if (i == 1 && lr_is_first_RA)
    758                ip = lr;
    759             else
    760                ip = (((UWord*)fp)[lr_offset]);
    761 
    762 #           if defined(VG_PLAT_USES_PPCTOC)
    763             /* Nasty hack to do with function replacement/wrapping on
    764                ppc64-linux.  If LR points to our magic return stub,
    765                then we are in a wrapped or intercepted function, in
    766                which LR has been messed with.  The original LR will
    767                have been pushed onto the thread's hidden REDIR stack
    768                one down from the top (top element is the saved R2) and
    769                so we should restore the value from there instead.
    770                Since nested redirections can and do happen, we keep
    771                track of the number of nested LRs used by the unwinding
    772                so far with 'redirs_used'. */
    773             if (ip == (Addr)&VG_(ppctoc_magic_redirect_return_stub)
    774                 && VG_(is_valid_tid)(tid_if_known)) {
    775                Word hsp = VG_(threads)[tid_if_known]
    776                              .arch.vex.guest_REDIR_SP;
    777                hsp -= 2 * redirs_used;
    778                redirs_used ++;
    779                if (hsp >= 1 && hsp < redir_stack_size)
    780                   ip = VG_(threads)[tid_if_known]
    781                           .arch.vex.guest_REDIR_STACK[hsp-1];
    782             }
    783 #           endif
    784 
    785             if (0 == ip || 1 == ip) break;
    786             if (sps) sps[i] = fp; /* NB. not sp */
    787             if (fps) fps[i] = fp;
    788             fp = (((UWord*)fp)[0]);
    789             ips[i++] = ip - 1; /* -1: refer to calling insn, not the RA */
    790             if (debug)
    791                VG_(printf)("     ipsF[%d]=%#08lx\n", i-1, ips[i-1]);
    792             ip = ip - 1; /* ip is probably dead at this point, but
    793                             play safe, a la x86/amd64 above.  See
    794                             extensive comments above. */
    795             if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
    796             continue;
    797          }
    798 
    799          /* No luck there.  We have to give up. */
    800          break;
    801       }
    802    }
    803 
    804    n_found = i;
    805    return n_found;
    806 }
    807 
    808 #endif
    809 
    810 /* ------------------------ arm ------------------------- */
    811 
    812 #if defined(VGP_arm_linux)
    813 
    814 static Bool in_same_fn ( Addr a1, Addr a2 )
    815 {
    816 #  define M_VG_ERRTXT 500
    817    HChar buf_a1[M_VG_ERRTXT], buf_a2[M_VG_ERRTXT];
    818    /* The following conditional looks grossly inefficient and
    819       surely could be majorly improved, with not much effort. */
    820    if (VG_(get_fnname_raw) (a1, buf_a1, M_VG_ERRTXT))
    821       if (VG_(get_fnname_raw) (a2, buf_a2, M_VG_ERRTXT))
    822          if (VG_(strncmp)(buf_a1, buf_a2, M_VG_ERRTXT))
    823             return True;
    824 #  undef M_VG_ERRTXT
    825    return False;
    826 }
    827 
    828 static Bool in_same_page ( Addr a1, Addr a2 ) {
    829    return (a1 & ~0xFFF) == (a2 & ~0xFFF);
    830 }
    831 
    832 static Addr abs_diff ( Addr a1, Addr a2 ) {
    833    return (Addr)(a1 > a2 ? a1 - a2 : a2 - a1);
    834 }
    835 
    836 static Bool has_XT_perms ( Addr a )
    837 {
    838    NSegment const* seg = VG_(am_find_nsegment)(a);
    839    return seg && seg->hasX && seg->hasT;
    840 }
    841 
    842 static Bool looks_like_Thumb_call32 ( UShort w0, UShort w1 )
    843 {
    844    if (0)
    845       VG_(printf)("isT32call %04x %04x\n", (UInt)w0, (UInt)w1);
    846    // BL  simm26
    847    if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) return True;
    848    // BLX simm26
    849    if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) return True;
    850    return False;
    851 }
    852 
    853 static Bool looks_like_Thumb_call16 ( UShort w0 )
    854 {
    855    return False;
    856 }
    857 
    858 static Bool looks_like_ARM_call ( UInt a0 )
    859 {
    860    if (0)
    861       VG_(printf)("isA32call %08x\n", a0);
    862    // Leading E forces unconditional only -- fix
    863    if ((a0 & 0xFF000000) == 0xEB000000) return True;
    864    return False;
    865 }
    866 
    867 static Bool looks_like_RA ( Addr ra )
    868 {
    869    /* 'ra' is a plausible return address if it points to
    870        an instruction after a call insn. */
    871    Bool isT = (ra & 1);
    872    if (isT) {
    873       // returning to Thumb code
    874       ra &= ~1;
    875       ra -= 4;
    876       if (has_XT_perms(ra)) {
    877          UShort w0 = *(UShort*)ra;
    878          UShort w1 = in_same_page(ra, ra+2) ? *(UShort*)(ra+2) : 0;
    879          if (looks_like_Thumb_call16(w1) || looks_like_Thumb_call32(w0,w1))
    880             return True;
    881       }
    882    } else {
    883       // ARM
    884       ra &= ~3;
    885       ra -= 4;
    886       if (has_XT_perms(ra)) {
    887          UInt a0 = *(UInt*)ra;
    888          if (looks_like_ARM_call(a0))
    889             return True;
    890       }
    891    }
    892    return False;
    893 }
    894 
    895 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
    896                                /*OUT*/Addr* ips, UInt max_n_ips,
    897                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
    898                                UnwindStartRegs* startRegs,
    899                                Addr fp_max_orig )
    900 {
    901    Bool  debug = False;
    902    Int   i;
    903    Addr  fp_max;
    904    UInt  n_found = 0;
    905    const Int cmrf = VG_(clo_merge_recursive_frames);
    906 
    907    vg_assert(sizeof(Addr) == sizeof(UWord));
    908    vg_assert(sizeof(Addr) == sizeof(void*));
    909 
    910    D3UnwindRegs uregs;
    911    uregs.r15 = startRegs->r_pc & 0xFFFFFFFE;
    912    uregs.r14 = startRegs->misc.ARM.r14;
    913    uregs.r13 = startRegs->r_sp;
    914    uregs.r12 = startRegs->misc.ARM.r12;
    915    uregs.r11 = startRegs->misc.ARM.r11;
    916    uregs.r7  = startRegs->misc.ARM.r7;
    917    Addr fp_min = uregs.r13;
    918 
    919    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
    920       stopping when the trail goes cold, which we guess to be
    921       when FP is not a reasonable stack location. */
    922 
    923    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
    924    // current page, at least.  Dunno if it helps.
    925    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
    926    fp_max = VG_PGROUNDUP(fp_max_orig);
    927    if (fp_max >= sizeof(Addr))
    928       fp_max -= sizeof(Addr);
    929 
    930    if (debug)
    931       VG_(printf)("\nmax_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
    932                   "fp_max=0x%lx r15=0x%lx r13=0x%lx\n",
    933                   max_n_ips, fp_min, fp_max_orig, fp_max,
    934                   uregs.r15, uregs.r13);
    935 
    936    /* Assertion broken before main() is reached in pthreaded programs;  the
    937     * offending stack traces only have one item.  --njn, 2002-aug-16 */
    938    /* vg_assert(fp_min <= fp_max);*/
    939    // On Darwin, this kicks in for pthread-related stack traces, so they're
    940    // only 1 entry long which is wrong.
    941    if (fp_min + 512 >= fp_max) {
    942       /* If the stack limits look bogus, don't poke around ... but
    943          don't bomb out either. */
    944       if (sps) sps[0] = uregs.r13;
    945       if (fps) fps[0] = 0;
    946       ips[0] = uregs.r15;
    947       return 1;
    948    }
    949 
    950    /* */
    951 
    952    if (sps) sps[0] = uregs.r13;
    953    if (fps) fps[0] = 0;
    954    ips[0] = uregs.r15;
    955    i = 1;
    956 
    957    /* Loop unwinding the stack. */
    958    Bool do_stack_scan = False;
    959 
    960    /* First try the Official Way, using Dwarf CFI. */
    961    while (True) {
    962       if (debug) {
    963          VG_(printf)("i: %d, r15: 0x%lx, r13: 0x%lx\n",
    964                      i, uregs.r15, uregs.r13);
    965       }
    966 
    967       if (i >= max_n_ips)
    968          break;
    969 
    970       if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
    971          if (sps) sps[i] = uregs.r13;
    972          if (fps) fps[i] = 0;
    973          ips[i++] = (uregs.r15 & 0xFFFFFFFE) - 1;
    974          if (debug)
    975             VG_(printf)("USING CFI: r15: 0x%lx, r13: 0x%lx\n",
    976                         uregs.r15, uregs.r13);
    977          uregs.r15 = (uregs.r15 & 0xFFFFFFFE) - 1;
    978          if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
    979          continue;
    980       }
    981 
    982       /* No luck.  We have to give up. */
    983       do_stack_scan = True;
    984       break;
    985    }
    986 
    987    /* Now try Plan B (maybe) -- stack scanning.  This often gives
    988       pretty bad results, so this has to be enabled explicitly by the
    989       user. */
    990    if (do_stack_scan
    991        && i < max_n_ips && i < (Int)VG_(clo_unw_stack_scan_thresh)) {
    992       Int  nByStackScan = 0;
    993       Addr lr = uregs.r14;
    994       Addr sp = uregs.r13 & ~3;
    995       Addr pc = uregs.r15;
    996       // First see if LR contains
    997       // something that could be a valid return address.
    998       if (!in_same_fn(lr, pc) && looks_like_RA(lr)) {
    999          // take it only if 'cand' isn't obviously a duplicate
   1000          // of the last found IP value
   1001          Addr cand = (lr & 0xFFFFFFFE) - 1;
   1002          if (abs_diff(cand, ips[i-1]) > 1) {
   1003             if (sps) sps[i] = 0;
   1004             if (fps) fps[i] = 0;
   1005             ips[i++] = cand;
   1006             if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
   1007             nByStackScan++;
   1008          }
   1009       }
   1010       while (in_same_page(sp, uregs.r13)) {
   1011          if (i >= max_n_ips)
   1012             break;
   1013          // we're in the same page; fairly safe to keep going
   1014          UWord w = *(UWord*)(sp & ~0x3);
   1015          if (looks_like_RA(w)) {
   1016             Addr cand = (w & 0xFFFFFFFE) - 1;
   1017             // take it only if 'cand' isn't obviously a duplicate
   1018             // of the last found IP value
   1019             if (abs_diff(cand, ips[i-1]) > 1) {
   1020                if (sps) sps[i] = 0;
   1021                if (fps) fps[i] = 0;
   1022                ips[i++] = cand;
   1023                if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
   1024                if (++nByStackScan >= VG_(clo_unw_stack_scan_frames)) break;
   1025             }
   1026          }
   1027          sp += 4;
   1028       }
   1029    }
   1030 
   1031    n_found = i;
   1032    return n_found;
   1033 }
   1034 
   1035 #endif
   1036 
   1037 /* ------------------------ arm64 ------------------------- */
   1038 
   1039 #if defined(VGP_arm64_linux)
   1040 
   1041 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
   1042                                /*OUT*/Addr* ips, UInt max_n_ips,
   1043                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
   1044                                UnwindStartRegs* startRegs,
   1045                                Addr fp_max_orig )
   1046 {
   1047    Bool  debug = False;
   1048    Int   i;
   1049    Addr  fp_max;
   1050    UInt  n_found = 0;
   1051    const Int cmrf = VG_(clo_merge_recursive_frames);
   1052 
   1053    vg_assert(sizeof(Addr) == sizeof(UWord));
   1054    vg_assert(sizeof(Addr) == sizeof(void*));
   1055 
   1056    D3UnwindRegs uregs;
   1057    uregs.pc = startRegs->r_pc;
   1058    uregs.sp = startRegs->r_sp;
   1059    uregs.x30 = startRegs->misc.ARM64.x30;
   1060    uregs.x29 = startRegs->misc.ARM64.x29;
   1061    Addr fp_min = uregs.sp;
   1062 
   1063    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
   1064       stopping when the trail goes cold, which we guess to be
   1065       when FP is not a reasonable stack location. */
   1066 
   1067    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
   1068    // current page, at least.  Dunno if it helps.
   1069    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
   1070    fp_max = VG_PGROUNDUP(fp_max_orig);
   1071    if (fp_max >= sizeof(Addr))
   1072       fp_max -= sizeof(Addr);
   1073 
   1074    if (debug)
   1075       VG_(printf)("\nmax_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
   1076                   "fp_max=0x%lx PC=0x%lx SP=0x%lx\n",
   1077                   max_n_ips, fp_min, fp_max_orig, fp_max,
   1078                   uregs.pc, uregs.sp);
   1079 
   1080    /* Assertion broken before main() is reached in pthreaded programs;  the
   1081     * offending stack traces only have one item.  --njn, 2002-aug-16 */
   1082    /* vg_assert(fp_min <= fp_max);*/
   1083    // On Darwin, this kicks in for pthread-related stack traces, so they're
   1084    // only 1 entry long which is wrong.
   1085    if (fp_min + 512 >= fp_max) {
   1086       /* If the stack limits look bogus, don't poke around ... but
   1087          don't bomb out either. */
   1088       if (sps) sps[0] = uregs.sp;
   1089       if (fps) fps[0] = uregs.x29;
   1090       ips[0] = uregs.pc;
   1091       return 1;
   1092    }
   1093 
   1094    /* */
   1095 
   1096    if (sps) sps[0] = uregs.sp;
   1097    if (fps) fps[0] = uregs.x29;
   1098    ips[0] = uregs.pc;
   1099    i = 1;
   1100 
   1101    /* Loop unwinding the stack, using CFI. */
   1102    while (True) {
   1103       if (debug) {
   1104          VG_(printf)("i: %d, pc: 0x%lx, sp: 0x%lx\n",
   1105                      i, uregs.pc, uregs.sp);
   1106       }
   1107 
   1108       if (i >= max_n_ips)
   1109          break;
   1110 
   1111       if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
   1112          if (sps) sps[i] = uregs.sp;
   1113          if (fps) fps[i] = uregs.x29;
   1114          ips[i++] = uregs.pc - 1;
   1115          if (debug)
   1116             VG_(printf)("USING CFI: pc: 0x%lx, sp: 0x%lx\n",
   1117                         uregs.pc, uregs.sp);
   1118          uregs.pc = uregs.pc - 1;
   1119          if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
   1120          continue;
   1121       }
   1122 
   1123       /* No luck.  We have to give up. */
   1124       break;
   1125    }
   1126 
   1127    n_found = i;
   1128    return n_found;
   1129 }
   1130 
   1131 #endif
   1132 
   1133 /* ------------------------ s390x ------------------------- */
   1134 
   1135 #if defined(VGP_s390x_linux)
   1136 
   1137 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
   1138                                /*OUT*/Addr* ips, UInt max_n_ips,
   1139                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
   1140                                UnwindStartRegs* startRegs,
   1141                                Addr fp_max_orig )
   1142 {
   1143    Bool  debug = False;
   1144    Int   i;
   1145    Addr  fp_max;
   1146    UInt  n_found = 0;
   1147    const Int cmrf = VG_(clo_merge_recursive_frames);
   1148 
   1149    vg_assert(sizeof(Addr) == sizeof(UWord));
   1150    vg_assert(sizeof(Addr) == sizeof(void*));
   1151 
   1152    D3UnwindRegs uregs;
   1153    uregs.ia = startRegs->r_pc;
   1154    uregs.sp = startRegs->r_sp;
   1155    Addr fp_min = uregs.sp;
   1156    uregs.fp = startRegs->misc.S390X.r_fp;
   1157    uregs.lr = startRegs->misc.S390X.r_lr;
   1158 
   1159    fp_max = VG_PGROUNDUP(fp_max_orig);
   1160    if (fp_max >= sizeof(Addr))
   1161       fp_max -= sizeof(Addr);
   1162 
   1163    if (debug)
   1164       VG_(printf)("max_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
   1165                   "fp_max=0x%lx IA=0x%lx SP=0x%lx FP=0x%lx\n",
   1166                   max_n_ips, fp_min, fp_max_orig, fp_max,
   1167                   uregs.ia, uregs.sp,uregs.fp);
   1168 
   1169    /* The first frame is pretty obvious */
   1170    ips[0] = uregs.ia;
   1171    if (sps) sps[0] = uregs.sp;
   1172    if (fps) fps[0] = uregs.fp;
   1173    i = 1;
   1174 
   1175    /* for everything else we have to rely on the eh_frame. gcc defaults to
   1176       not create a backchain and all the other  tools (like gdb) also have
   1177       to use the CFI. */
   1178    while (True) {
   1179       if (i >= max_n_ips)
   1180          break;
   1181 
   1182       if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
   1183          if (sps) sps[i] = uregs.sp;
   1184          if (fps) fps[i] = uregs.fp;
   1185          ips[i++] = uregs.ia - 1;
   1186          uregs.ia = uregs.ia - 1;
   1187          if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
   1188          continue;
   1189       }
   1190       /* A problem on the first frame? Lets assume it was a bad jump.
   1191          We will use the link register and the current stack and frame
   1192          pointers and see if we can use the CFI in the next round. */
   1193       if (i == 1) {
   1194          if (sps) {
   1195             sps[i] = sps[0];
   1196             uregs.sp = sps[0];
   1197          }
   1198          if (fps) {
   1199             fps[i] = fps[0];
   1200             uregs.fp = fps[0];
   1201          }
   1202          uregs.ia = uregs.lr - 1;
   1203          ips[i++] = uregs.lr - 1;
   1204          if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
   1205          continue;
   1206       }
   1207 
   1208       /* No luck.  We have to give up. */
   1209       break;
   1210    }
   1211 
   1212    n_found = i;
   1213    return n_found;
   1214 }
   1215 
   1216 #endif
   1217 
   1218 /* ------------------------ mips 32/64 ------------------------- */
   1219 #if defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
   1220 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
   1221                                /*OUT*/Addr* ips, UInt max_n_ips,
   1222                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
   1223                                UnwindStartRegs* startRegs,
   1224                                Addr fp_max_orig )
   1225 {
   1226    Bool  debug = False;
   1227    Int   i;
   1228    Addr  fp_max;
   1229    UInt  n_found = 0;
   1230    const Int cmrf = VG_(clo_merge_recursive_frames);
   1231 
   1232    vg_assert(sizeof(Addr) == sizeof(UWord));
   1233    vg_assert(sizeof(Addr) == sizeof(void*));
   1234 
   1235    D3UnwindRegs uregs;
   1236    uregs.pc = startRegs->r_pc;
   1237    uregs.sp = startRegs->r_sp;
   1238    Addr fp_min = uregs.sp;
   1239 
   1240 #if defined(VGP_mips32_linux)
   1241    uregs.fp = startRegs->misc.MIPS32.r30;
   1242    uregs.ra = startRegs->misc.MIPS32.r31;
   1243 #elif defined(VGP_mips64_linux)
   1244    uregs.fp = startRegs->misc.MIPS64.r30;
   1245    uregs.ra = startRegs->misc.MIPS64.r31;
   1246 #endif
   1247 
   1248    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
   1249       stopping when the trail goes cold, which we guess to be
   1250       when FP is not a reasonable stack location. */
   1251 
   1252    fp_max = VG_PGROUNDUP(fp_max_orig);
   1253    if (fp_max >= sizeof(Addr))
   1254       fp_max -= sizeof(Addr);
   1255 
   1256    if (debug)
   1257       VG_(printf)("max_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
   1258                   "fp_max=0x%lx pc=0x%lx sp=0x%lx fp=0x%lx\n",
   1259                   max_n_ips, fp_min, fp_max_orig, fp_max,
   1260                   uregs.pc, uregs.sp, uregs.fp);
   1261 
   1262    if (sps) sps[0] = uregs.sp;
   1263    if (fps) fps[0] = uregs.fp;
   1264    ips[0] = uregs.pc;
   1265    i = 1;
   1266 
   1267    /* Loop unwinding the stack. */
   1268 
   1269    while (True) {
   1270       if (debug) {
   1271          VG_(printf)("i: %d, pc: 0x%lx, sp: 0x%lx, ra: 0x%lx\n",
   1272                      i, uregs.pc, uregs.sp, uregs.ra);
   1273       }
   1274       if (i >= max_n_ips)
   1275          break;
   1276 
   1277       D3UnwindRegs uregs_copy = uregs;
   1278       if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
   1279          if (debug)
   1280             VG_(printf)("USING CFI: pc: 0x%lx, sp: 0x%lx, ra: 0x%lx\n",
   1281                         uregs.pc, uregs.sp, uregs.ra);
   1282          if (0 != uregs.pc && 1 != uregs.pc) {
   1283             if (sps) sps[i] = uregs.sp;
   1284             if (fps) fps[i] = uregs.fp;
   1285             ips[i++] = uregs.pc - 4;
   1286             uregs.pc = uregs.pc - 4;
   1287             if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
   1288             continue;
   1289          } else
   1290             uregs = uregs_copy;
   1291       }
   1292 
   1293       int seen_sp_adjust = 0;
   1294       long frame_offset = 0;
   1295       PtrdiffT offset;
   1296       if (VG_(get_inst_offset_in_function)(uregs.pc, &offset)) {
   1297          Addr start_pc = uregs.pc - offset;
   1298          Addr limit_pc = uregs.pc;
   1299          Addr cur_pc;
   1300          for (cur_pc = start_pc; cur_pc < limit_pc; cur_pc += 4) {
   1301             unsigned long inst, high_word, low_word;
   1302             unsigned long * cur_inst;
   1303             /* Fetch the instruction.   */
   1304             cur_inst = (unsigned long *)cur_pc;
   1305             inst = *((UInt *) cur_inst);
   1306             if(debug)
   1307                VG_(printf)("cur_pc: 0x%lx, inst: 0x%lx\n", cur_pc, inst);
   1308 
   1309             /* Save some code by pre-extracting some useful fields.  */
   1310             high_word = (inst >> 16) & 0xffff;
   1311             low_word = inst & 0xffff;
   1312 
   1313             if (high_word == 0x27bd        /* addiu $sp,$sp,-i */
   1314                 || high_word == 0x23bd     /* addi $sp,$sp,-i */
   1315                 || high_word == 0x67bd) {  /* daddiu $sp,$sp,-i */
   1316                if (low_word & 0x8000)	/* negative stack adjustment? */
   1317                   frame_offset += 0x10000 - low_word;
   1318                else
   1319                   /* Exit loop if a positive stack adjustment is found, which
   1320                      usually means that the stack cleanup code in the function
   1321                      epilogue is reached.  */
   1322                break;
   1323             seen_sp_adjust = 1;
   1324             }
   1325          }
   1326          if(debug)
   1327             VG_(printf)("offset: 0x%lx\n", frame_offset);
   1328       }
   1329       if (seen_sp_adjust) {
   1330          if (0 == uregs.pc || 1 == uregs.pc) break;
   1331          if (uregs.pc == uregs.ra - 8) break;
   1332          if (sps) {
   1333             sps[i] = uregs.sp + frame_offset;
   1334          }
   1335          uregs.sp = uregs.sp + frame_offset;
   1336 
   1337          if (fps) {
   1338             fps[i] = fps[0];
   1339             uregs.fp = fps[0];
   1340          }
   1341          if (0 == uregs.ra || 1 == uregs.ra) break;
   1342          uregs.pc = uregs.ra - 8;
   1343          ips[i++] = uregs.ra - 8;
   1344          if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
   1345          continue;
   1346       }
   1347 
   1348       if (i == 1) {
   1349          if (sps) {
   1350             sps[i] = sps[0];
   1351             uregs.sp = sps[0];
   1352          }
   1353          if (fps) {
   1354             fps[i] = fps[0];
   1355             uregs.fp = fps[0];
   1356          }
   1357          if (0 == uregs.ra || 1 == uregs.ra) break;
   1358          uregs.pc = uregs.ra - 8;
   1359          ips[i++] = uregs.ra - 8;
   1360          if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
   1361          continue;
   1362       }
   1363       /* No luck.  We have to give up. */
   1364       break;
   1365    }
   1366 
   1367    n_found = i;
   1368    return n_found;
   1369 }
   1370 
   1371 #endif
   1372 
   1373 
   1374 /*------------------------------------------------------------*/
   1375 /*---                                                      ---*/
   1376 /*--- END platform-dependent unwinder worker functions     ---*/
   1377 /*---                                                      ---*/
   1378 /*------------------------------------------------------------*/
   1379 
   1380 /*------------------------------------------------------------*/
   1381 /*--- Exported functions.                                  ---*/
   1382 /*------------------------------------------------------------*/
   1383 
   1384 UInt VG_(get_StackTrace) ( ThreadId tid,
   1385                            /*OUT*/StackTrace ips, UInt max_n_ips,
   1386                            /*OUT*/StackTrace sps,
   1387                            /*OUT*/StackTrace fps,
   1388                            Word first_ip_delta )
   1389 {
   1390    /* Get the register values with which to start the unwind. */
   1391    UnwindStartRegs startRegs;
   1392    VG_(memset)( &startRegs, 0, sizeof(startRegs) );
   1393    VG_(get_UnwindStartRegs)( &startRegs, tid );
   1394 
   1395    Addr stack_highest_word = VG_(threads)[tid].client_stack_highest_word;
   1396    Addr stack_lowest_word  = 0;
   1397 
   1398 #  if defined(VGP_x86_linux)
   1399    /* Nasty little hack to deal with syscalls - if libc is using its
   1400       _dl_sysinfo_int80 function for syscalls (the TLS version does),
   1401       then ip will always appear to be in that function when doing a
   1402       syscall, not the actual libc function doing the syscall.  This
   1403       check sees if IP is within that function, and pops the return
   1404       address off the stack so that ip is placed within the library
   1405       function calling the syscall.  This makes stack backtraces much
   1406       more useful.
   1407 
   1408       The function is assumed to look like this (from glibc-2.3.6 sources):
   1409          _dl_sysinfo_int80:
   1410             int $0x80
   1411             ret
   1412       That is 3 (2+1) bytes long.  We could be more thorough and check
   1413       the 3 bytes of the function are as expected, but I can't be
   1414       bothered.
   1415    */
   1416    if (VG_(client__dl_sysinfo_int80) != 0 /* we know its address */
   1417        && startRegs.r_pc >= VG_(client__dl_sysinfo_int80)
   1418        && startRegs.r_pc < VG_(client__dl_sysinfo_int80)+3
   1419        && VG_(am_is_valid_for_client)(startRegs.r_pc, sizeof(Addr),
   1420                                       VKI_PROT_READ)) {
   1421       startRegs.r_pc  = (ULong) *(Addr*)(UWord)startRegs.r_sp;
   1422       startRegs.r_sp += (ULong) sizeof(Addr);
   1423    }
   1424 #  endif
   1425 
   1426    /* See if we can get a better idea of the stack limits */
   1427    VG_(stack_limits)( (Addr)startRegs.r_sp,
   1428                       &stack_lowest_word, &stack_highest_word );
   1429 
   1430    /* Take into account the first_ip_delta. */
   1431    startRegs.r_pc += (Long)(Word)first_ip_delta;
   1432 
   1433    if (0)
   1434       VG_(printf)("tid %d: stack_highest=0x%08lx ip=0x%010llx "
   1435                   "sp=0x%010llx\n",
   1436 		  tid, stack_highest_word,
   1437                   startRegs.r_pc, startRegs.r_sp);
   1438 
   1439    return VG_(get_StackTrace_wrk)(tid, ips, max_n_ips,
   1440                                        sps, fps,
   1441                                        &startRegs,
   1442                                        stack_highest_word);
   1443 }
   1444 
   1445 static void printIpDesc(UInt n, Addr ip, void* uu_opaque)
   1446 {
   1447    #define BUF_LEN   4096
   1448 
   1449    static HChar buf[BUF_LEN];
   1450 
   1451    VG_(describe_IP)(ip, buf, BUF_LEN);
   1452 
   1453    if (VG_(clo_xml)) {
   1454       VG_(printf_xml)("    %s\n", buf);
   1455    } else {
   1456       VG_(message)(Vg_UserMsg, "   %s %s\n", ( n == 0 ? "at" : "by" ), buf);
   1457    }
   1458 }
   1459 
   1460 /* Print a StackTrace. */
   1461 void VG_(pp_StackTrace) ( StackTrace ips, UInt n_ips )
   1462 {
   1463    vg_assert( n_ips > 0 );
   1464 
   1465    if (VG_(clo_xml))
   1466       VG_(printf_xml)("  <stack>\n");
   1467 
   1468    VG_(apply_StackTrace)( printIpDesc, NULL, ips, n_ips );
   1469 
   1470    if (VG_(clo_xml))
   1471       VG_(printf_xml)("  </stack>\n");
   1472 }
   1473 
   1474 /* Get and immediately print a StackTrace. */
   1475 void VG_(get_and_pp_StackTrace) ( ThreadId tid, UInt max_n_ips )
   1476 {
   1477    Addr ips[max_n_ips];
   1478    UInt n_ips
   1479       = VG_(get_StackTrace)(tid, ips, max_n_ips,
   1480                             NULL/*array to dump SP values in*/,
   1481                             NULL/*array to dump FP values in*/,
   1482                             0/*first_ip_delta*/);
   1483    VG_(pp_StackTrace)(ips, n_ips);
   1484 }
   1485 
   1486 void VG_(apply_StackTrace)(
   1487         void(*action)(UInt n, Addr ip, void* opaque),
   1488         void* opaque,
   1489         StackTrace ips, UInt n_ips
   1490      )
   1491 {
   1492    Bool main_done = False;
   1493    Int i = 0;
   1494 
   1495    vg_assert(n_ips > 0);
   1496    do {
   1497       Addr ip = ips[i];
   1498 
   1499       // Stop after the first appearance of "main" or one of the other names
   1500       // (the appearance of which is a pretty good sign that we've gone past
   1501       // main without seeing it, for whatever reason)
   1502       if ( ! VG_(clo_show_below_main) ) {
   1503          Vg_FnNameKind kind = VG_(get_fnname_kind_from_IP)(ip);
   1504          if (Vg_FnNameMain == kind || Vg_FnNameBelowMain == kind) {
   1505             main_done = True;
   1506          }
   1507       }
   1508 
   1509       // Act on the ip
   1510       action(i, ip, opaque);
   1511 
   1512       i++;
   1513    } while (i < n_ips && !main_done);
   1514 }
   1515 
   1516 
   1517 /*--------------------------------------------------------------------*/
   1518 /*--- end                                                          ---*/
   1519 /*--------------------------------------------------------------------*/
   1520