Home | History | Annotate | Download | only in coregrind
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- An xtree, tree of stacktraces with data            m_xtree.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2016-2017 Philippe Waroquiers
     11 
     12    This code generalises the XTree idea that was implemented in
     13    the massif tool in Valgrind versions <= 3.12, which is
     14       Copyright (C) 2005-2017 Nicholas Nethercote
     15        njn (at) valgrind.org
     16 
     17    The XTree implementation in this file is however implemented completely
     18    differently. Some code has been re-used for the production of the
     19    massif file header (e.g. FP_cmd function).
     20 
     21    This program is free software; you can redistribute it and/or
     22    modify it under the terms of the GNU General Public License as
     23    published by the Free Software Foundation; either version 2 of the
     24    License, or (at your option) any later version.
     25 
     26    This program is distributed in the hope that it will be useful, but
     27    WITHOUT ANY WARRANTY; without even the implied warranty of
     28    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     29    General Public License for more details.
     30 
     31    You should have received a copy of the GNU General Public License
     32    along with this program; if not, write to the Free Software
     33    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     34    02111-1307, USA.
     35 
     36    The GNU General Public License is contained in the file COPYING.
     37 */
     38 
     39 #include "pub_core_basics.h"
     40 #include "pub_core_debuglog.h"
     41 #include "pub_core_clientstate.h"
     42 #include "pub_core_stacktrace.h"
     43 #include "pub_core_execontext.h"
     44 #include "pub_core_libcbase.h"
     45 #include "pub_core_libcassert.h"
     46 #include "pub_core_libcfile.h"
     47 #include "pub_core_libcprint.h"
     48 #include "pub_core_libcproc.h"
     49 #include "pub_core_hashtable.h"
     50 #include "pub_core_mallocfree.h"
     51 #include "pub_core_options.h"
     52 #include "pub_core_debuginfo.h"
     53 #include "pub_core_deduppoolalloc.h"
     54 #include "pub_core_xtree.h"    /* self */
     55 
     56 #define DMSG(level, ...) (level <= VG_(debugLog_getLevel)() ?         \
     57                           VG_(dmsg)(__VA_ARGS__)                      \
     58                           : 0)
     59 
     60 /* Defines the relevant part of an ec. This is shared between an xt
     61    and its snapshots (see XT_shared XArray of xec). */
     62 typedef
     63    struct _xec {
     64      ExeContext* ec;
     65      UShort top;        // The first ips of ec to take into account.
     66      UShort n_ips_sel;  // The nr of ips from top to take into account.
     67    }
     68    xec;
     69 
     70 /* XT_shared maintains the information shared between an XT and all
     71    its snapshots. */
     72 typedef
     73    struct _XT_shared {
     74       UWord nrRef; /* nr of XTrees referencing this shared memory. */
     75 
     76       Alloc_Fn_t alloc_fn;                /* alloc fn (nofail) */
     77       const HChar* cc;                    /* cost centre for alloc */
     78       Free_Fn_t free_fn;                  /* free fn */
     79 
     80       /* The data associated to each ec is stored in 2 arrays:
     81            an xec array, shared between an xt and all its snapshots.
     82            a  data array, private to each XTree.
     83          For an ec with an ECU ecu, d4ecu2xecu[ecu/4] gives the offset in
     84          xec and data arrays where the ec information is located (this
     85          indirection is used to avoid huge xec and data arrays, in
     86          case an XTree contains data only for a small number of ec.
     87          The offset in the xec and data array is used as xtree ec unique
     88          id i.e. an xecu. */
     89 
     90       UInt  d4ecu2xecu_sz; /* size of d4ecu2xecu (in nr of elements). */
     91       UInt* d4ecu2xecu;
     92 
     93       /* ec information common to an xt and its snapshots. */
     94       XArray* xec; /* XArray of xec, indexed by xecu (== d4ecu2xecu[ecu/4]). */
     95 
     96       /* XArray of xecu, sorted by StackTrace ips[top..top+n_ips_sel-1].
     97          See ips_order_cmp. */
     98       XArray* ips_order_xecu;
     99    } XT_shared;
    100 
    101 /* NO_OFFSET indicates in d4ecu2xecu  there is no data (yet) for this ec
    102    (with the index ecu/4). */
    103 #define NO_OFFSET 0xffffffff
    104 
    105 static XT_shared* new_XT_shared (Alloc_Fn_t alloc_fn,
    106                                  const  HChar* cc,
    107                                  void   (*free_fn)(void*))
    108 {
    109    XT_shared* shared;
    110 
    111    vg_assert(alloc_fn);
    112    vg_assert(cc);
    113    vg_assert(free_fn);
    114    shared = alloc_fn(cc, sizeof(*shared));
    115    shared->nrRef = 0;
    116    shared->alloc_fn = alloc_fn;
    117    shared->cc = cc;
    118    shared->free_fn = free_fn;
    119 
    120    shared->d4ecu2xecu_sz = 0;
    121    shared->d4ecu2xecu = NULL;
    122    shared->xec = VG_(newXA)(alloc_fn, cc, free_fn, sizeof(xec));
    123    shared->ips_order_xecu = NULL; // Allocated when needed.
    124 
    125    return shared;
    126 }
    127 
    128 static void delete_XT_shared (XT_shared* shared)
    129 {
    130    vg_assert(shared->nrRef == 0);
    131    shared->free_fn(shared->d4ecu2xecu);
    132    VG_(deleteXA)(shared->xec);
    133    if (shared->ips_order_xecu != NULL)
    134       VG_(deleteXA)(shared->ips_order_xecu);
    135    shared->free_fn(shared);
    136 }
    137 
    138 /* Compare 2 entries in ips_order_xecu by StackTrace elements.
    139    In case stack traces are of different length, an 'absent' ips is
    140    considered smaller than any other address. */
    141 static XArray* xec_data_for_sort; // Needed to translate an xecu into an xec
    142 static Int ips_order_cmp(const void* vleft, const void* vright)
    143 {
    144    const Xecu left_xecu  = *(const Xecu*)vleft;
    145    const Xecu right_xecu = *(const Xecu*)vright;
    146    const xec* left  = VG_(indexXA)(xec_data_for_sort, left_xecu);
    147    const xec* right = VG_(indexXA)(xec_data_for_sort, right_xecu);
    148    const StackTrace left_ips  = VG_(get_ExeContext_StackTrace)(left->ec)
    149       + left->top;
    150    const StackTrace right_ips = VG_(get_ExeContext_StackTrace)(right->ec)
    151       + right->top;
    152    UInt i;
    153 
    154    const UInt c_n_ips_sel = left->n_ips_sel < right->n_ips_sel
    155       ? left->n_ips_sel : right->n_ips_sel;
    156 
    157    // First see if we have a difference on the common nr of ips selected
    158    for (i = 0; i < c_n_ips_sel; i++) {
    159       if (left_ips[i] == right_ips[i]) continue;
    160       if (left_ips[i] < right_ips[i]) return -1;
    161       return  1;
    162    }
    163    // Common part is equal => compare lengths.
    164    if (left->n_ips_sel < right->n_ips_sel) return -1;
    165    if (left->n_ips_sel > right->n_ips_sel) return  1;
    166    return 0;
    167 }
    168 
    169 // If needed, build or refresh shared->ips_order_xecu
    170 static void ensure_ips_order_xecu_valid(XT_shared* shared)
    171 {
    172    UInt i;
    173    UInt n_xecu;
    174 
    175    if (shared->ips_order_xecu == NULL) {
    176       shared->ips_order_xecu = VG_(newXA)(shared->alloc_fn, shared->cc,
    177                                           shared->free_fn, sizeof(Xecu));
    178       VG_(hintSizeXA)(shared->ips_order_xecu, VG_(sizeXA)(shared->xec));
    179       VG_(setCmpFnXA)(shared->ips_order_xecu, ips_order_cmp);
    180    }
    181 
    182    if (VG_(sizeXA)(shared->xec) == VG_(sizeXA)(shared->ips_order_xecu))
    183       return;
    184 
    185    n_xecu = VG_(sizeXA)(shared->xec);
    186    for (i = VG_(sizeXA)(shared->ips_order_xecu); i < n_xecu; i++)
    187       VG_(addToXA)(shared->ips_order_xecu, &i);
    188 
    189    xec_data_for_sort = shared->xec;
    190    VG_(sortXA)(shared->ips_order_xecu);
    191 }
    192 
    193 static void addRef_XT_shared (XT_shared* shared)
    194 {
    195    shared->nrRef++;
    196 }
    197 
    198 static UWord release_XT_shared(XT_shared* shared)
    199 {
    200    UWord nrRef;
    201 
    202    vg_assert(shared->nrRef > 0);
    203    nrRef = --shared->nrRef;
    204    if (nrRef == 0)
    205       delete_XT_shared(shared);
    206    return nrRef;
    207 }
    208 
    209 
    210 struct _XTree {
    211    Alloc_Fn_t alloc_fn;                /* alloc fn (nofail) */
    212    const HChar* cc;                    /* cost centre for alloc */
    213    Free_Fn_t free_fn;                  /* free fn */
    214    Word  dataSzB;   /* data size in bytes */
    215    XT_init_data_t init_data_fn;
    216    XT_add_data_t add_data_fn;
    217    XT_sub_data_t sub_data_fn;
    218    XT_filter_IPs_t filter_IPs_fn;
    219 
    220    XT_shared* shared;
    221 
    222    HChar* tmp_data; /* temporary buffer, to insert new elements. */
    223    XArray* data; /* of elements of size dataSzB */
    224 };
    225 
    226 
    227 XTree* VG_(XT_create) ( Alloc_Fn_t alloc_fn,
    228                         const HChar* cc,
    229                         Free_Fn_t free_fn,
    230                         Word dataSzB,
    231                         XT_init_data_t init_data_fn,
    232                         XT_add_data_t add_data_fn,
    233                         XT_sub_data_t sub_data_fn,
    234                         XT_filter_IPs_t filter_IPs_fn)
    235 {
    236    XTree* xt;
    237 
    238    /* check user-supplied info .. */
    239    vg_assert(alloc_fn);
    240    vg_assert(free_fn);
    241    vg_assert(dataSzB >= 0);
    242    vg_assert(init_data_fn);
    243    vg_assert(add_data_fn);
    244    vg_assert(sub_data_fn);
    245 
    246    xt = alloc_fn(cc, sizeof(struct _XTree) );
    247    xt->alloc_fn  = alloc_fn;
    248    xt->cc        = cc;
    249    xt->free_fn   = free_fn;
    250    xt->dataSzB   = dataSzB;
    251    xt->init_data_fn = init_data_fn;
    252    xt->add_data_fn = add_data_fn;
    253    xt->sub_data_fn = sub_data_fn;
    254    xt->filter_IPs_fn = filter_IPs_fn;
    255 
    256    xt->shared = new_XT_shared(alloc_fn, cc, free_fn);
    257    addRef_XT_shared(xt->shared);
    258    xt->tmp_data = alloc_fn(cc, xt->dataSzB);
    259    xt->data =  VG_(newXA)(alloc_fn, cc, free_fn, dataSzB);
    260 
    261    return xt;
    262 }
    263 
    264 XTree* VG_(XT_snapshot)(XTree* xt)
    265 {
    266    XTree* nxt;
    267 
    268    vg_assert(xt);
    269 
    270    nxt = xt->alloc_fn(xt->cc, sizeof(struct _XTree) );
    271 
    272    *nxt = *xt;
    273    addRef_XT_shared(nxt->shared);
    274    nxt->tmp_data = nxt->alloc_fn(nxt->cc, nxt->dataSzB);
    275    nxt->data = VG_(cloneXA)(nxt->cc, xt->data);
    276 
    277    return nxt;
    278 }
    279 
    280 void VG_(XT_delete) ( XTree* xt )
    281 {
    282    vg_assert(xt);
    283 
    284    release_XT_shared(xt->shared);
    285    xt->free_fn(xt->tmp_data);
    286    VG_(deleteXA)(xt->data);
    287    xt->free_fn(xt);
    288 }
    289 
    290 static Xecu find_or_insert (XTree* xt, ExeContext* ec)
    291 {
    292 
    293    const UInt d4ecu = VG_(get_ECU_from_ExeContext)(ec) / 4;
    294    XT_shared* shared = xt->shared;
    295 
    296    /* First grow the d4ecu2xecu array if needed. */
    297    if (d4ecu >= shared->d4ecu2xecu_sz) {
    298       UInt old_sz = shared->d4ecu2xecu_sz;
    299       UInt new_sz = (3 * d4ecu) / 2;
    300 
    301       if (new_sz < 1000)
    302          new_sz = 1000;
    303       shared->d4ecu2xecu = VG_(realloc)(xt->cc, shared->d4ecu2xecu,
    304                                         new_sz * sizeof(UInt));
    305       shared->d4ecu2xecu_sz = new_sz;
    306       for (UInt i = old_sz; i < new_sz; i++)
    307          shared->d4ecu2xecu[i] = NO_OFFSET;
    308    }
    309 
    310    if (shared->d4ecu2xecu[d4ecu] == NO_OFFSET) {
    311       xec xe;
    312 
    313       xe.ec = ec;
    314       if (xt->filter_IPs_fn == NULL) {
    315          xe.top = 0;
    316          xe.n_ips_sel = (UShort)VG_(get_ExeContext_n_ips)(xe.ec);
    317       } else {
    318          UInt top;
    319          UInt n_ips_sel = VG_(get_ExeContext_n_ips)(xe.ec);
    320          xt->filter_IPs_fn(VG_(get_ExeContext_StackTrace)(xe.ec), n_ips_sel,
    321                            &top, &n_ips_sel);
    322          xe.top = (UShort)top;
    323          xe.n_ips_sel = (UShort)n_ips_sel;
    324       }
    325       xt->init_data_fn(xt->tmp_data);
    326       VG_(addToXA)(shared->xec, &xe);
    327       shared->d4ecu2xecu[d4ecu] = (UInt)VG_(addToXA)(xt->data, xt->tmp_data);
    328    }
    329 
    330    return shared->d4ecu2xecu[d4ecu];
    331 }
    332 
    333 Xecu VG_(XT_add_to_ec) (XTree* xt, ExeContext* ec, const void* value)
    334 {
    335    Xecu xecu = find_or_insert(xt, ec);
    336    void* data = VG_(indexXA)(xt->data, xecu);
    337 
    338    xt->add_data_fn(data, value);
    339    return xecu;
    340 }
    341 
    342 Xecu VG_(XT_sub_from_ec) (XTree* xt, ExeContext* ec, const void* value)
    343 {
    344    Xecu xecu = find_or_insert(xt, ec);
    345    void* data = VG_(indexXA)(xt->data, xecu);
    346 
    347    xt->sub_data_fn(data, value);
    348    return xecu;
    349 }
    350 
    351 void VG_(XT_add_to_xecu) (XTree* xt, Xecu xecu, const void* value)
    352 {
    353    void* data = VG_(indexXA)(xt->data, xecu);
    354    xt->add_data_fn(data, value);
    355 }
    356 
    357 void VG_(XT_sub_from_xecu) (XTree* xt, Xecu xecu, const void* value)
    358 {
    359    void* data = VG_(indexXA)(xt->data, xecu);
    360    xt->sub_data_fn(data, value);
    361 }
    362 
    363 UInt VG_(XT_n_ips_sel) (XTree* xt, Xecu xecu)
    364 {
    365    xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu);
    366    return (UInt)xe->n_ips_sel;
    367 }
    368 
    369 ExeContext* VG_(XT_get_ec_from_xecu) (XTree* xt, Xecu xecu)
    370 {
    371    xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu);
    372    return xe->ec;
    373 }
    374 
    375 static VgFile* xt_open (const HChar* outfilename)
    376 {
    377    VgFile* fp;
    378 
    379    fp = VG_(fopen)(outfilename, VKI_O_CREAT|VKI_O_WRONLY|VKI_O_TRUNC,
    380                    VKI_S_IRUSR|VKI_S_IWUSR);
    381    if (fp == NULL) {
    382       VG_(message)(Vg_UserMsg,
    383                    "Error: can not open xtree output file `%s'\n",
    384                    outfilename);
    385    }
    386    return fp;
    387 }
    388 
    389 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
    390 
    391 // Print "cmd:" line.
    392 static void FP_cmd(VgFile* fp)
    393 {
    394    UInt i;
    395 
    396    FP("cmd: ");
    397    FP("%s", VG_(args_the_exename));
    398    for (i = 0; i < VG_(sizeXA)(VG_(args_for_client)); i++) {
    399       HChar* arg = * (HChar**) VG_(indexXA)(VG_(args_for_client), i);
    400       FP(" %s", arg);
    401    }
    402    FP("\n");
    403 }
    404 
    405 /* ----------- Callgrind output ------------------------------------------- */
    406 
    407 /* Output a callgrind format element in compressed format:
    408      "name=(pos)" or "name=(pos) value" (if value_new)
    409    or not compressed format: "name=value"
    410    VG_(clo_xtree_compress_strings) indicates if the compressed format is used.
    411    name is the format element (e.g. fl, fn, cfi, cfn, ...).
    412    pos is the value dictionary position, used for compressed format.
    413    value_new is True if this is the first usage of value. */
    414 static void FP_pos_str(VgFile* fp, const HChar* name, UInt pos,
    415                        const HChar* value, Bool value_new)
    416 {
    417    if (!VG_(clo_xtree_compress_strings))
    418       FP("%s=%s\n", name, value);
    419    else if (value_new)
    420       FP("%s=(%d) %s\n", name, pos, value);
    421    else
    422       FP("%s=(%d)\n", name, pos);
    423 }
    424 
    425 void VG_(XT_callgrind_print)
    426      (XTree* xt,
    427       const HChar* outfilename,
    428       const HChar* events,
    429       const HChar* (*img_value)(const void* value))
    430 {
    431    UInt n_xecu;
    432    XT_shared* shared = xt->shared;
    433    VgFile* fp = xt_open(outfilename);
    434    DedupPoolAlloc* fnname_ddpa;
    435    DedupPoolAlloc* filename_ddpa;
    436    HChar* filename_buf = NULL;
    437    UInt filename_buf_size = 0;
    438    const HChar* filename_dir;
    439    const HChar* filename_name;
    440 
    441    if (fp == NULL)
    442       return;
    443 
    444    fnname_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn,
    445                                  "XT_callgrind_print.fn", xt->free_fn);
    446    filename_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn,
    447                                    "XT_callgrind_print.fl", xt->free_fn);
    448 
    449    FP("# callgrind format\n");
    450    FP("version: 1\n");
    451    FP("creator: xtree-1\n");
    452    FP("pid: %d\n", VG_(getpid)());
    453    FP_cmd(fp);
    454 
    455    /* Currently, we only need/support line positions. */
    456    FP("\npositions:%s\n", " line");
    457 
    458    /* Produce one "event:" line for each event, and the "events:" line. */
    459    {
    460       HChar strtok_events[VG_(strlen)(events)+1];
    461       HChar* e;
    462       HChar* ssaveptr;
    463       HChar* p;
    464 
    465       VG_(strcpy)(strtok_events, events);
    466       for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr);
    467            e != NULL;
    468            e = VG_(strtok_r)(NULL, ",", &ssaveptr))
    469          FP("event: %s\n", e);
    470       FP("events:");
    471       VG_(strcpy)(strtok_events, events);
    472       for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr);
    473            e != NULL;
    474            e = VG_(strtok_r)(NULL, ",", &ssaveptr)) {
    475          p = e;
    476          while (*p) {
    477             if (*p == ':')
    478                *p = 0;
    479             p++;
    480          }
    481          FP(" %s", e);
    482       }
    483       FP("\n");
    484    }
    485    xt->init_data_fn(xt->tmp_data); // to compute totals
    486 
    487    n_xecu = VG_(sizeXA)(xt->data);
    488    vg_assert (n_xecu <= VG_(sizeXA)(shared->xec));
    489    for (Xecu xecu = 0; xecu < n_xecu; xecu++) {
    490       xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
    491       if (xe->n_ips_sel == 0)
    492          continue;
    493 
    494       const HChar* img = img_value(VG_(indexXA)(xt->data, xecu));
    495 
    496       // CALLED_FLF gets the Dir+Filename/Line number/Function name for ips[n]
    497       // in the variables called_filename/called_linenum/called_fnname.
    498       // The booleans called_filename_new/called_fnname_new are set to True
    499       // the first time the called_filename/called_fnname are encountered.
    500       // The called_filename_nr/called_fnname_nr are numbers identifying
    501       // the strings  called_filename/called_fnname.
    502 #define CALLED_FLF(n)                                                   \
    503       if ((n) < 0                                                       \
    504           || !VG_(get_filename_linenum)(ips[(n)],                       \
    505                                         &filename_name,                 \
    506                                         &filename_dir,                  \
    507                                         &called_linenum)) {             \
    508          filename_name = "UnknownFile???";                              \
    509          called_linenum = 0;                                            \
    510       }                                                                 \
    511       if ((n) < 0                                                       \
    512           || !VG_(get_fnname)(ips[(n)], &called_fnname)) {              \
    513          called_fnname = "UnknownFn???";                                \
    514       }                                                                 \
    515       {                                                                 \
    516          UInt needed_size = VG_(strlen)(filename_dir) + 1               \
    517             + VG_(strlen)(filename_name) + 1;                           \
    518          if (filename_buf_size < needed_size) {                         \
    519             filename_buf_size = needed_size;                            \
    520             filename_buf = VG_(realloc)(xt->cc, filename_buf,           \
    521                                         filename_buf_size);             \
    522          }                                                              \
    523       }                                                                 \
    524       VG_(strcpy)(filename_buf, filename_dir);                          \
    525       if (filename_buf[0] != '\0') {                                    \
    526          VG_(strcat)(filename_buf, "/");                                \
    527       }                                                                 \
    528       VG_(strcat)(filename_buf, filename_name);                         \
    529       called_filename_nr = VG_(allocStrDedupPA)(filename_ddpa,          \
    530                                                 filename_buf,           \
    531                                                 &called_filename_new);  \
    532       called_filename = filename_buf;                                   \
    533       called_fnname_nr = VG_(allocStrDedupPA)(fnname_ddpa,              \
    534                                               called_fnname,            \
    535                                               &called_fnname_new);
    536 
    537       /* Instead of unknown fnname ???, CALLED_FLF could use instead:
    538          VG_(sprintf)(unknown_fn, "%p", (void*)ips[(n)]);
    539          but that creates a lot of (useless) nodes at least for
    540          valgrind self-hosting. */
    541 
    542       if (img) {
    543          const HChar* called_filename;
    544          UInt called_filename_nr;
    545          Bool called_filename_new; // True the first time we see this filename.
    546          const HChar* called_fnname;
    547          UInt called_fnname_nr;
    548          Bool called_fnname_new; // True the first time we see this fnname.
    549          UInt called_linenum;
    550          UInt prev_linenum;
    551 
    552          const Addr* ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top;
    553          Int ips_idx = xe->n_ips_sel - 1;
    554 
    555          if (0) {
    556             VG_(printf)("entry img %s\n", img);
    557             VG_(pp_ExeContext)(xe->ec);
    558             VG_(printf)("\n");
    559          }
    560          xt->add_data_fn(xt->tmp_data, VG_(indexXA)(xt->data, xecu));
    561          CALLED_FLF(ips_idx);
    562          for (;
    563               ips_idx >= 0;
    564               ips_idx--) {
    565             FP_pos_str(fp, "fl", called_filename_nr,
    566                        called_filename, called_filename_new);
    567             FP_pos_str(fp, "fn", called_fnname_nr,
    568                        called_fnname, called_fnname_new);
    569             if (ips_idx == 0)
    570                FP("%d %s\n", called_linenum, img);
    571             else
    572                FP("%d\n", called_linenum); //no self cost.
    573             prev_linenum = called_linenum;
    574             if (ips_idx >= 1) {
    575                CALLED_FLF(ips_idx-1);
    576                FP_pos_str(fp, "cfi", called_filename_nr,
    577                           called_filename, called_filename_new);
    578                FP_pos_str(fp, "cfn", called_fnname_nr,
    579                           called_fnname, called_fnname_new);
    580                called_filename_new = False;
    581                called_fnname_new = False;
    582                /* Giving a call count of 0 allows kcachegrind to hide the calls
    583                   column. A call count of 1 means kcachegrind would show in the
    584                   calls column the nr of stacktrace containing this arc, which
    585                   is very confusing. So, the less bad is to give a 0 call
    586                   count. */
    587                FP("calls=0 %d\n", called_linenum);
    588                FP("%d %s\n", prev_linenum, img);
    589             }
    590          }
    591          FP("\n");
    592       }
    593    }
    594    /* callgrind format is not really fully supporting (yet?) execution trees:
    595       in an execution tree, self and inclusive costs are identical, and
    596       cannot be added together.
    597       If no totals: line is given, callgrind_annotate calculates the addition
    598       of all costs, and so gives a wrong totals.
    599       Giving a totals: line solves this, but the user must give the option
    600       --inclusive=yes (kind of hack) to have all the functions given
    601       in the output file. */
    602    FP("totals: %s\n", img_value(xt->tmp_data));
    603    VG_(fclose)(fp);
    604    VG_(deleteDedupPA)(fnname_ddpa);
    605    VG_(deleteDedupPA)(filename_ddpa);
    606    VG_(free)(filename_buf);
    607 }
    608 
    609 
    610 /* ----------- Massif output ---------------------------------------------- */
    611 
    612 /* For Massif output, some functions from the execontext are not output, a.o.
    613    the allocation functions at the top of the stack and the functions below
    614    main. So, the StackTrace of the execontexts in the xtree must be filtered.
    615    Ms_Ec defines the subset of the stacktrace relevant for the report. */
    616 typedef
    617    struct {
    618       StackTrace ips; // ips and n_ips provides the subset of the xtree ec
    619       UInt n_ips;     // to use for a massif report.
    620 
    621       SizeT report_value; // The value to report for this stack trace.
    622    } Ms_Ec;
    623 
    624 /* Ms_Group defines (at a certain depth) a group of ec context that
    625    have the same IPs at the given depth, and have the same 'parent'.
    626    total is the sum of the values of all group elements.
    627    A Ms_Group can also represent a set of ec contexts that do not
    628    have the same IP, but that have each a total which is below the
    629    significant size. Such a group has a NULL ms_ec, a zero group_io.
    630    n_ec is the nr of insignificant ec that have been collected inside this
    631    insignificant group, and total is the sum of all non significant ec
    632    at the given depth. */
    633 typedef
    634    struct {
    635       Ms_Ec* ms_ec; // The group consists in ms_ec[0 .. n_ec-1]
    636       Addr group_ip;
    637       UInt n_ec;
    638       SizeT total;
    639    } Ms_Group;
    640 
    641 /* Compare 2 groups by total, to have bigger total first. */
    642 static Int ms_group_revcmp_total(const void* vleft, const void* vright)
    643 {
    644    const Ms_Group* left = (const Ms_Group*)vleft;
    645    const Ms_Group* right = (const Ms_Group*)vright;
    646 
    647    // First reverse compare total
    648    if (left->total > right->total) return -1;
    649    if (left->total < right->total) return  1;
    650 
    651    /* Equal size => compare IPs.
    652       This (somewhat?) helps to have deterministic test results.
    653       If this would change between platforms, then we should compare
    654       function names/filename/linenr */
    655    if (left->group_ip < right->group_ip) return -1;
    656    if (left->group_ip > right->group_ip) return  1;
    657    return 0;
    658 }
    659 
    660 /* Scan the addresses in ms_ec at the given depth.
    661    On return,
    662       *groups points to an array of Ms_Group sorted by total.
    663       *n_groups is the nr of groups
    664    The caller is responsible to free the allocated group array. */
    665 static void ms_make_groups (UInt depth, Ms_Ec* ms_ec, UInt n_ec, SizeT sig_sz,
    666                             UInt* n_groups, Ms_Group** groups)
    667 {
    668    UInt i, g;
    669    Addr cur_group_ip = 0;
    670 
    671    *n_groups = 0;
    672 
    673    /* Handle special case somewhat more efficiently */
    674    if (n_ec == 0) {
    675       *groups = NULL;
    676       return;
    677    }
    678 
    679    /* Compute how many groups we have. */
    680    for (i = 0; i < n_ec; i++) {
    681       if (ms_ec[i].n_ips > depth
    682           && (*n_groups == 0 || cur_group_ip != ms_ec[i].ips[depth])) {
    683          (*n_groups)++;
    684          cur_group_ip = ms_ec[i].ips[depth];
    685       }
    686    }
    687 
    688    /* make the group array. */
    689    *groups = VG_(malloc)("ms_make_groups", *n_groups * sizeof(Ms_Group));
    690    i = 0;
    691    for (g = 0; g < *n_groups; g++) {
    692       while (ms_ec[i].n_ips <= depth)
    693          i++;
    694       cur_group_ip = ms_ec[i].ips[depth];
    695       (*groups)[g].group_ip = cur_group_ip;
    696       (*groups)[g].ms_ec = &ms_ec[i];
    697       (*groups)[g].n_ec = 1;
    698       (*groups)[g].total = ms_ec[i].report_value;
    699       i++;
    700       while (i < n_ec
    701              && ms_ec[i].n_ips > depth
    702              && cur_group_ip == ms_ec[i].ips[depth]) {
    703          (*groups)[g].total += ms_ec[i].report_value;
    704          i++;
    705          (*groups)[g].n_ec++;
    706       }
    707    }
    708 
    709    /* Search for insignificant groups, collect them all together
    710       in the first insignificant group, and compact the group array. */
    711    {
    712       UInt insig1; // Position of first insignificant group.
    713       UInt n_insig = 0; // Nr of insignificant groups found.
    714 
    715       for (g = 0; g < *n_groups; g++) {
    716          if ((*groups)[g].total < sig_sz) {
    717             if (n_insig == 0) {
    718                // First insig group => transform it into the special group
    719                (*groups)[g].ms_ec = NULL;
    720                (*groups)[g].group_ip = 0;
    721                (*groups)[g].n_ec = 0;
    722                // start the sum of insig total as total
    723                insig1 = g;
    724             } else {
    725                // Add this insig group total into insig1 first group
    726                (*groups)[insig1].total += (*groups)[g].total;
    727             }
    728             n_insig++;
    729          } else {
    730             if (n_insig > 1)
    731                (*groups)[g - n_insig + 1] = (*groups)[g];
    732          }
    733       }
    734       if (n_insig > 0) {
    735          (*groups)[insig1].n_ec = n_insig;
    736          *n_groups -= n_insig - 1;
    737       }
    738       DMSG(1, "depth %u n_groups %u n_insig %u\n", depth, *n_groups, n_insig);
    739    }
    740 
    741    /* Sort on total size, bigger size first. */
    742    VG_(ssort)(*groups, *n_groups, sizeof(Ms_Group), ms_group_revcmp_total);
    743 }
    744 
    745 static void ms_output_group (VgFile* fp, UInt depth, Ms_Group* group,
    746                              SizeT sig_sz, double sig_pct_threshold)
    747 {
    748    UInt i;
    749    Ms_Group* groups;
    750    UInt n_groups;
    751 
    752    // If this is an insignificant group, handle it specially
    753    if (group->ms_ec == NULL) {
    754       const HChar* s = ( 1 ==  group->n_ec? "," : "s, all" );
    755       vg_assert(group->group_ip == 0);
    756       FP("%*sn0: %lu in %d place%s below massif's threshold (%.2f%%)\n",
    757          depth+1, "", group->total, group->n_ec, s, sig_pct_threshold);
    758       return;
    759    }
    760 
    761    // Normal group => output the group and its subgroups.
    762    ms_make_groups(depth+1, group->ms_ec, group->n_ec, sig_sz,
    763                   &n_groups, &groups);
    764 
    765    FP("%*s" "n%u: %ld %s\n",
    766       depth + 1, "",
    767       n_groups,
    768       group->total,
    769       VG_(describe_IP)(group->ms_ec->ips[depth] - 1, NULL));
    770    /* XTREE??? Massif original code removes 1 to get the IP description. I am
    771       wondering if this is not something that predates revision r8818,
    772       which introduced a -1 in the stack unwind (see m_stacktrace.c)
    773       Kept for the moment to allow exact comparison with massif output, but
    774       probably we should remove this, as we very probably end up 2 bytes before
    775       the RA Return Address. */
    776 
    777    /* Output sub groups of this group. */
    778    for (i = 0; i < n_groups; i++)
    779       ms_output_group(fp, depth+1, &groups[i], sig_sz, sig_pct_threshold);
    780 
    781    VG_(free)(groups);
    782 }
    783 
    784 /* Allocate and build an array of Ms_Ec sorted by addresses in the
    785    Ms_Ec StackTrace. */
    786 static void prepare_ms_ec (XTree* xt,
    787                            ULong (*report_value)(const void* value),
    788                            ULong* top_total, Ms_Ec** vms_ec, UInt* vn_ec)
    789 {
    790    XT_shared* shared = xt->shared;
    791    const UInt n_xecu = VG_(sizeXA)(shared->xec);
    792    const UInt n_data_xecu = VG_(sizeXA)(xt->data);
    793    Ms_Ec* ms_ec = VG_(malloc)("XT_massif_print.ms_ec", n_xecu * sizeof(Ms_Ec));
    794    UInt n_xecu_sel = 0; // Nr of xecu that are selected for output.
    795 
    796    vg_assert(n_data_xecu <= n_xecu);
    797 
    798    // Ensure we have in shared->ips_order_xecu our xecu sorted by StackTrace.
    799    ensure_ips_order_xecu_valid(shared);
    800 
    801    *top_total = 0;
    802    DMSG(1, "iteration %u\n", n_xecu);
    803    for (UInt i = 0; i < n_xecu; i++) {
    804       Xecu xecu = *(Xecu*)VG_(indexXA)(shared->ips_order_xecu, i);
    805       xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
    806 
    807       if (xecu >= n_data_xecu)
    808          continue; // No data for this xecu in xt->data.
    809       ms_ec[n_xecu_sel].n_ips = xe->n_ips_sel;
    810       if (ms_ec[n_xecu_sel].n_ips == 0)
    811          continue;
    812 
    813       ms_ec[n_xecu_sel].ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top;
    814       ms_ec[n_xecu_sel].report_value
    815          = (*report_value)(VG_(indexXA)(xt->data, xecu));
    816       *top_total += ms_ec[n_xecu_sel].report_value;
    817 
    818       n_xecu_sel++;
    819    }
    820    vg_assert(n_xecu_sel <= n_xecu);
    821 
    822    *vms_ec = ms_ec;
    823    *vn_ec = n_xecu_sel;
    824 }
    825 
    826 MsFile* VG_(XT_massif_open)
    827      (const HChar* outfilename,
    828       const HChar* desc,
    829       const XArray* desc_args,
    830       const HChar* time_unit)
    831 {
    832    UInt i;
    833    VgFile* fp = xt_open(outfilename);
    834 
    835    if (fp == NULL)
    836       return NULL; // xt_open reported the error.
    837 
    838    /* ------ file header ------------------------------- */
    839    FP("desc:");
    840    if (desc)
    841       FP(" %s", desc);
    842    i = 0;
    843    if (desc_args) {
    844       for (i = 0; i < VG_(sizeXA)(desc_args); i++) {
    845          HChar* arg = *(HChar**)VG_(indexXA)(desc_args, i);
    846          FP(" %s", arg);
    847       }
    848    }
    849    if (0 == i && desc == NULL) FP(" (none)");
    850    FP("\n");
    851 
    852    FP_cmd(fp);
    853 
    854    FP("time_unit: %s\n", time_unit);
    855 
    856    return fp;
    857 }
    858 
    859 void VG_(XT_massif_close)(MsFile* fp)
    860 {
    861    if (fp == NULL)
    862       return; // Error should have been reported by  VG_(XT_massif_open)
    863 
    864    VG_(fclose)(fp);
    865 }
    866 
    867 void VG_(XT_massif_print)
    868      (MsFile* fp,
    869       XTree* xt,
    870       const Massif_Header* header,
    871       ULong (*report_value)(const void* value))
    872 {
    873    UInt i;
    874 
    875    if (fp == NULL)
    876       return; // Normally  VG_(XT_massif_open) already reported an error.
    877 
    878    /* Compute/prepare Snapshot totals/data/... */
    879    ULong top_total;
    880 
    881    /* Following variables only used for detailed snapshot. */
    882    UInt n_ec = 0;
    883    Ms_Ec* ms_ec = NULL;
    884    const HChar* kind =
    885       header->detailed ? (header->peak ? "peak" : "detailed") : "empty";
    886 
    887    DMSG(1, "XT_massif_print %s\n", kind);
    888    if (header->detailed) {
    889       /* Prepare the Ms_Ec sorted array of stacktraces and the groups
    890          at level 0. */
    891       prepare_ms_ec(xt, report_value, &top_total, &ms_ec, &n_ec);
    892       DMSG(1, "XT_print_massif ms_ec n_ec %u\n", n_ec);
    893    } else if (xt == NULL) {
    894       /* Non detailed, no xt => use the sz provided in the header. */
    895       top_total = header->sz_B;
    896    } else {
    897       /* For non detailed snapshot, compute total directly from the xec. */
    898       const XT_shared* shared = xt->shared;
    899       const UInt n_xecu = VG_(sizeXA)(xt->data);
    900       top_total = 0;
    901 
    902       for (UInt xecu = 0; xecu < n_xecu; xecu++) {
    903          xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
    904          if (xe->n_ips_sel == 0)
    905             continue;
    906          top_total += (*report_value)(VG_(indexXA)(xt->data, xecu));
    907       }
    908    }
    909 
    910    /* ------ snapshot header --------------------------- */
    911    FP("#-----------\n");
    912    FP("snapshot=%d\n", header->snapshot_n);
    913    FP("#-----------\n");
    914    FP("time=%lld\n", header->time);
    915 
    916    FP("mem_heap_B=%llu\n", top_total); // without extra_B and without stacks_B
    917    FP("mem_heap_extra_B=%llu\n", header->extra_B);
    918    FP("mem_stacks_B=%llu\n", header->stacks_B);
    919    FP("heap_tree=%s\n", kind);
    920 
    921    /* ------ detailed snapshot data ----------------------------- */
    922    if (header->detailed) {
    923       UInt n_groups;
    924       Ms_Group* groups;
    925 
    926       ULong sig_sz;
    927       // Work out how big a child must be to be significant.  If the current
    928       // top_total is zero, then we set it to 1, which means everything will be
    929       // judged insignificant -- this is sensible, as there's no point showing
    930       // any detail for this case.  Unless they used threshold=0, in which
    931       // case we show them everything because that's what they asked for.
    932       //
    933       // Nb: We do this once now, rather than once per child, because if we do
    934       // that the cost of all the divisions adds up to something significant.
    935       if (0 == top_total && 0 != header->sig_threshold)
    936          sig_sz = 1;
    937       else
    938          sig_sz = ((top_total + header->extra_B + header->stacks_B)
    939                    * header->sig_threshold) / 100;
    940 
    941       /* Produce the groups at depth 0 */
    942       DMSG(1, "XT_massif_print producing depth 0 groups\n");
    943       ms_make_groups(0, ms_ec, n_ec, sig_sz, &n_groups, &groups);
    944 
    945       /* Output the top node. */
    946       FP("n%u: %llu %s\n", n_groups, top_total, header->top_node_desc);
    947 
    948       /* Output depth 0 groups. */
    949       DMSG(1, "XT_massif_print outputing %u depth 0 groups\n", n_groups);
    950       for (i = 0; i < n_groups; i++)
    951          ms_output_group(fp, 0, &groups[i], sig_sz, header->sig_threshold);
    952 
    953       VG_(free)(groups);
    954       VG_(free)(ms_ec);
    955    }
    956 }
    957 
    958 Int VG_(XT_offset_main_or_below_main)(Addr* ips, Int n_ips)
    959 {
    960    /* Search for main or below main function.
    961       To limit the nr of ips to examine, we maintain the deepest
    962       offset where main was found, and we first search main
    963       from there.
    964       If no main is found, we will then do a search for main or
    965       below main function till the top. */
    966    static Int deepest_main = 0;
    967    Vg_FnNameKind kind = Vg_FnNameNormal;
    968    Int mbm = n_ips - 1; // Position of deepest main or below main.
    969    Vg_FnNameKind mbmkind = Vg_FnNameNormal;
    970    Int i;
    971 
    972    for (i = n_ips - 1 - deepest_main;
    973         i < n_ips;
    974         i++) {
    975       mbmkind = VG_(get_fnname_kind_from_IP)(ips[i]);
    976       if (mbmkind != Vg_FnNameNormal) {
    977          mbm = i;
    978          break;
    979       }
    980    }
    981 
    982    /* Search for main or below main function till top. */
    983    for (i = mbm - 1;
    984         i >= 0 && mbmkind != Vg_FnNameMain;
    985         i--) {
    986       kind = VG_(get_fnname_kind_from_IP)(ips[i]);
    987       if (kind != Vg_FnNameNormal) {
    988          mbm = i;
    989          mbmkind = kind;
    990       }
    991    }
    992    if (Vg_FnNameMain == mbmkind || Vg_FnNameBelowMain == mbmkind) {
    993       if (mbmkind == Vg_FnNameMain && (n_ips - 1 - mbm) > deepest_main)
    994          deepest_main = n_ips - 1 - mbm;
    995       return mbm;
    996    } else
    997       return n_ips-1;
    998 }
    999 
   1000 void VG_(XT_filter_1top_and_maybe_below_main)
   1001      (Addr* ips, Int n_ips,
   1002       UInt* top, UInt* n_ips_sel)
   1003 {
   1004    Int mbm;
   1005 
   1006    *n_ips_sel = n_ips;
   1007    if (n_ips == 0) {
   1008       *top = 0;
   1009       return;
   1010    }
   1011 
   1012    /* Filter top function. */
   1013    *top = 1;
   1014 
   1015    if (VG_(clo_show_below_main))
   1016       mbm = n_ips - 1;
   1017    else
   1018       mbm = VG_(XT_offset_main_or_below_main)(ips, n_ips);
   1019 
   1020    *n_ips_sel = mbm - *top + 1;
   1021 }
   1022 
   1023 void VG_(XT_filter_maybe_below_main)
   1024      (Addr* ips, Int n_ips,
   1025       UInt* top, UInt* n_ips_sel)
   1026 {
   1027    Int mbm;
   1028 
   1029    *n_ips_sel = n_ips;
   1030    *top = 0;
   1031    if (n_ips == 0)
   1032       return;
   1033 
   1034    if (VG_(clo_show_below_main))
   1035       mbm = n_ips - 1;
   1036    else
   1037       mbm = VG_(XT_offset_main_or_below_main)(ips, n_ips);
   1038 
   1039    *n_ips_sel = mbm - *top + 1;
   1040 }
   1041 
   1042 /*--------------------------------------------------------------------*/
   1043 /*--- end                                                m_xtree.c ---*/
   1044 /*--------------------------------------------------------------------*/
   1045