Home | History | Annotate | Download | only in exp-dhat
      1 
      2 //--------------------------------------------------------------------*/
      3 //--- DHAT: a Dynamic Heap Analysis Tool                 dh_main.c ---*/
      4 //--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of DHAT, a Valgrind tool for profiling the
      8    heap usage of programs.
      9 
     10    Copyright (C) 2010-2013 Mozilla Inc
     11 
     12    This program is free software; you can redistribute it and/or
     13    modify it under the terms of the GNU General Public License as
     14    published by the Free Software Foundation; either version 2 of the
     15    License, or (at your option) any later version.
     16 
     17    This program is distributed in the hope that it will be useful, but
     18    WITHOUT ANY WARRANTY; without even the implied warranty of
     19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     20    General Public License for more details.
     21 
     22    You should have received a copy of the GNU General Public License
     23    along with this program; if not, write to the Free Software
     24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     25    02111-1307, USA.
     26 
     27    The GNU General Public License is contained in the file COPYING.
     28 */
     29 
     30 /* Contributed by Julian Seward <jseward (at) acm.org> */
     31 
     32 
     33 #include "pub_tool_basics.h"
     34 #include "pub_tool_libcbase.h"
     35 #include "pub_tool_libcassert.h"
     36 #include "pub_tool_libcprint.h"
     37 #include "pub_tool_machine.h"      // VG_(fnptr_to_fnentry)
     38 #include "pub_tool_mallocfree.h"
     39 #include "pub_tool_options.h"
     40 #include "pub_tool_replacemalloc.h"
     41 #include "pub_tool_tooliface.h"
     42 #include "pub_tool_wordfm.h"
     43 
     44 #define HISTOGRAM_SIZE_LIMIT 1024
     45 
     46 
     47 //------------------------------------------------------------//
     48 //--- Globals                                              ---//
     49 //------------------------------------------------------------//
     50 
     51 // Number of guest instructions executed so far.  This is
     52 // incremented directly from the generated code.
     53 static ULong g_guest_instrs_executed = 0;
     54 
     55 // Summary statistics for the entire run.
     56 static ULong g_tot_blocks = 0;   // total blocks allocated
     57 static ULong g_tot_bytes  = 0;   // total bytes allocated
     58 
     59 static ULong g_cur_blocks_live = 0; // curr # blocks live
     60 static ULong g_cur_bytes_live  = 0; // curr # bytes live
     61 
     62 static ULong g_max_blocks_live = 0; // bytes and blocks at
     63 static ULong g_max_bytes_live  = 0; // the max residency point
     64 
     65 
     66 //------------------------------------------------------------//
     67 //--- an Interval Tree of live blocks                      ---//
     68 //------------------------------------------------------------//
     69 
     70 /* Tracks information about live blocks. */
     71 typedef
     72    struct {
     73       Addr        payload;
     74       SizeT       req_szB;
     75       ExeContext* ap;  /* allocation ec */
     76       ULong       allocd_at; /* instruction number */
     77       ULong       n_reads;
     78       ULong       n_writes;
     79       /* Approx histogram, one byte per payload byte.  Counts latch up
     80          therefore at 0xFFFF.  Can be NULL if the block is resized or if
     81          the block is larger than HISTOGRAM_SIZE_LIMIT. */
     82       UShort*     histoW; /* [0 .. req_szB-1] */
     83    }
     84    Block;
     85 
     86 /* May not contain zero-sized blocks.  May not contain
     87    overlapping blocks. */
     88 static WordFM* interval_tree = NULL;  /* WordFM* Block* void */
     89 
     90 /* Here's the comparison function.  Since the tree is required
     91 to contain non-zero sized, non-overlapping blocks, it's good
     92 enough to consider any overlap as a match. */
     93 static Word interval_tree_Cmp ( UWord k1, UWord k2 )
     94 {
     95    Block* b1 = (Block*)k1;
     96    Block* b2 = (Block*)k2;
     97    tl_assert(b1->req_szB > 0);
     98    tl_assert(b2->req_szB > 0);
     99    if (b1->payload + b1->req_szB <= b2->payload) return -1;
    100    if (b2->payload + b2->req_szB <= b1->payload) return  1;
    101    return 0;
    102 }
    103 
    104 // 2-entry cache for find_Block_containing
    105 static Block* fbc_cache0 = NULL;
    106 static Block* fbc_cache1 = NULL;
    107 
    108 static UWord stats__n_fBc_cached = 0;
    109 static UWord stats__n_fBc_uncached = 0;
    110 static UWord stats__n_fBc_notfound = 0;
    111 
    112 static Block* find_Block_containing ( Addr a )
    113 {
    114    if (LIKELY(fbc_cache0
    115               && fbc_cache0->payload <= a
    116               && a < fbc_cache0->payload + fbc_cache0->req_szB)) {
    117       // found at 0
    118       stats__n_fBc_cached++;
    119       return fbc_cache0;
    120    }
    121    if (LIKELY(fbc_cache1
    122               && fbc_cache1->payload <= a
    123               && a < fbc_cache1->payload + fbc_cache1->req_szB)) {
    124       // found at 1; swap 0 and 1
    125       Block* tmp = fbc_cache0;
    126       fbc_cache0 = fbc_cache1;
    127       fbc_cache1 = tmp;
    128       stats__n_fBc_cached++;
    129       return fbc_cache0;
    130    }
    131    Block fake;
    132    fake.payload = a;
    133    fake.req_szB = 1;
    134    UWord foundkey = 1;
    135    UWord foundval = 1;
    136    Bool found = VG_(lookupFM)( interval_tree,
    137                                &foundkey, &foundval, (UWord)&fake );
    138    if (!found) {
    139       stats__n_fBc_notfound++;
    140       return NULL;
    141    }
    142    tl_assert(foundval == 0); // we don't store vals in the interval tree
    143    tl_assert(foundkey != 1);
    144    Block* res = (Block*)foundkey;
    145    tl_assert(res != &fake);
    146    // put at the top position
    147    fbc_cache1 = fbc_cache0;
    148    fbc_cache0 = res;
    149    stats__n_fBc_uncached++;
    150    return res;
    151 }
    152 
    153 // delete a block; asserts if not found.  (viz, 'a' must be
    154 // known to be present.)
    155 static void delete_Block_starting_at ( Addr a )
    156 {
    157    Block fake;
    158    fake.payload = a;
    159    fake.req_szB = 1;
    160    Bool found = VG_(delFromFM)( interval_tree,
    161                                 NULL, NULL, (Addr)&fake );
    162    tl_assert(found);
    163    fbc_cache0 = fbc_cache1 = NULL;
    164 }
    165 
    166 
    167 //------------------------------------------------------------//
    168 //--- a FM of allocation points (APs)                      ---//
    169 //------------------------------------------------------------//
    170 
    171 typedef
    172    struct {
    173       // the allocation point that we're summarising stats for
    174       ExeContext* ap;
    175       // used when printing results
    176       Bool shown;
    177       // The current number of blocks and bytes live for this AP
    178       ULong cur_blocks_live;
    179       ULong cur_bytes_live;
    180       // The number of blocks and bytes live at the max-liveness
    181       // point.  Note this is a bit subtle.  max_blocks_live is not
    182       // the maximum number of live blocks, but rather the number of
    183       // blocks live at the point of maximum byte liveness.  These are
    184       // not necessarily the same thing.
    185       ULong max_blocks_live;
    186       ULong max_bytes_live;
    187       // Total number of blocks and bytes allocated by this AP.
    188       ULong tot_blocks;
    189       ULong tot_bytes;
    190       // Sum of death ages for all blocks allocated by this AP,
    191       // that have subsequently been freed.
    192       ULong death_ages_sum;
    193       ULong deaths;
    194       // Total number of reads and writes in all blocks allocated
    195       // by this AP.
    196       ULong n_reads;
    197       ULong n_writes;
    198       /* Histogram information.  We maintain a histogram aggregated for
    199          all retiring Blocks allocated by this AP, but only if:
    200          - this AP has only ever allocated objects of one size
    201          - that size is <= HISTOGRAM_SIZE_LIMIT
    202          What we need therefore is a mechanism to see if this AP
    203          has only ever allocated blocks of one size.
    204 
    205          3 states:
    206             Unknown          because no retirement yet
    207             Exactly xsize    all retiring blocks are of this size
    208             Mixed            multiple different sizes seen
    209       */
    210       enum { Unknown=999, Exactly, Mixed } xsize_tag;
    211       SizeT xsize;
    212       UInt* histo; /* [0 .. xsize-1] */
    213    }
    214    APInfo;
    215 
    216 /* maps ExeContext*'s to APInfo*'s.  Note that the keys must match the
    217    .ap field in the values. */
    218 static WordFM* apinfo = NULL;  /* WordFM* ExeContext* APInfo* */
    219 
    220 
    221 /* 'bk' is being introduced (has just been allocated).  Find the
    222    relevant APInfo entry for it, or create one, based on the block's
    223    allocation EC.  Then, update the APInfo to the extent that we
    224    actually can, to reflect the allocation. */
    225 static void intro_Block ( Block* bk )
    226 {
    227    tl_assert(bk);
    228    tl_assert(bk->ap);
    229 
    230    APInfo* api   = NULL;
    231    UWord   keyW  = 0;
    232    UWord   valW  = 0;
    233    Bool    found = VG_(lookupFM)( apinfo,
    234                                   &keyW, &valW, (UWord)bk->ap );
    235    if (found) {
    236       api = (APInfo*)valW;
    237       tl_assert(keyW == (UWord)bk->ap);
    238    } else {
    239       api = VG_(malloc)( "dh.main.intro_Block.1", sizeof(APInfo) );
    240       VG_(memset)(api, 0, sizeof(*api));
    241       api->ap = bk->ap;
    242       Bool present = VG_(addToFM)( apinfo,
    243                                    (UWord)bk->ap, (UWord)api );
    244       tl_assert(!present);
    245       // histo stuff
    246       tl_assert(api->deaths == 0);
    247       api->xsize_tag = Unknown;
    248       api->xsize = 0;
    249       if (0) VG_(printf)("api %p   -->  Unknown\n", api);
    250    }
    251 
    252    tl_assert(api->ap == bk->ap);
    253 
    254    /* So: update stats to reflect an allocation */
    255 
    256    // # live blocks
    257    api->cur_blocks_live++;
    258 
    259    // # live bytes
    260    api->cur_bytes_live += bk->req_szB;
    261    if (api->cur_bytes_live > api->max_bytes_live) {
    262       api->max_bytes_live  = api->cur_bytes_live;
    263       api->max_blocks_live = api->cur_blocks_live;
    264    }
    265 
    266    // total blocks and bytes allocated here
    267    api->tot_blocks++;
    268    api->tot_bytes += bk->req_szB;
    269 
    270    // update summary globals
    271    g_tot_blocks++;
    272    g_tot_bytes += bk->req_szB;
    273 
    274    g_cur_blocks_live++;
    275    g_cur_bytes_live += bk->req_szB;
    276    if (g_cur_bytes_live > g_max_bytes_live) {
    277       g_max_bytes_live = g_cur_bytes_live;
    278       g_max_blocks_live = g_cur_blocks_live;
    279    }
    280 }
    281 
    282 
    283 /* 'bk' is retiring (being freed).  Find the relevant APInfo entry for
    284    it, which must already exist.  Then, fold info from 'bk' into that
    285    entry.  'because_freed' is True if the block is retiring because
    286    the client has freed it.  If it is False then the block is retiring
    287    because the program has finished, in which case we want to skip the
    288    updates of the total blocks live etc for this AP, but still fold in
    289    the access counts and histo data that have so far accumulated for
    290    the block. */
    291 static void retire_Block ( Block* bk, Bool because_freed )
    292 {
    293    tl_assert(bk);
    294    tl_assert(bk->ap);
    295 
    296    APInfo* api   = NULL;
    297    UWord   keyW  = 0;
    298    UWord   valW  = 0;
    299    Bool    found = VG_(lookupFM)( apinfo,
    300                                   &keyW, &valW, (UWord)bk->ap );
    301 
    302    tl_assert(found);
    303    api = (APInfo*)valW;
    304    tl_assert(api->ap == bk->ap);
    305 
    306    // update stats following this free.
    307    if (0)
    308    VG_(printf)("ec %p  api->c_by_l %llu  bk->rszB %llu\n",
    309                bk->ap, api->cur_bytes_live, (ULong)bk->req_szB);
    310 
    311    // update total blocks live etc for this AP
    312    if (because_freed) {
    313       tl_assert(api->cur_blocks_live >= 1);
    314       tl_assert(api->cur_bytes_live >= bk->req_szB);
    315       api->cur_blocks_live--;
    316       api->cur_bytes_live -= bk->req_szB;
    317 
    318       api->deaths++;
    319 
    320       tl_assert(bk->allocd_at <= g_guest_instrs_executed);
    321       api->death_ages_sum += (g_guest_instrs_executed - bk->allocd_at);
    322 
    323       // update global summary stats
    324       tl_assert(g_cur_blocks_live > 0);
    325       g_cur_blocks_live--;
    326       tl_assert(g_cur_bytes_live >= bk->req_szB);
    327       g_cur_bytes_live -= bk->req_szB;
    328    }
    329 
    330    // access counts
    331    api->n_reads  += bk->n_reads;
    332    api->n_writes += bk->n_writes;
    333 
    334    // histo stuff.  First, do state transitions for xsize/xsize_tag.
    335    switch (api->xsize_tag) {
    336 
    337       case Unknown:
    338          tl_assert(api->xsize == 0);
    339          tl_assert(api->deaths == 1 || api->deaths == 0);
    340          tl_assert(!api->histo);
    341          api->xsize_tag = Exactly;
    342          api->xsize = bk->req_szB;
    343          if (0) VG_(printf)("api %p   -->  Exactly(%lu)\n", api, api->xsize);
    344          // and allocate the histo
    345          if (bk->histoW) {
    346             api->histo = VG_(malloc)("dh.main.retire_Block.1",
    347                                      api->xsize * sizeof(UInt));
    348             VG_(memset)(api->histo, 0, api->xsize * sizeof(UInt));
    349          }
    350          break;
    351 
    352       case Exactly:
    353          //tl_assert(api->deaths > 1);
    354          if (bk->req_szB != api->xsize) {
    355             if (0) VG_(printf)("api %p   -->  Mixed(%lu -> %lu)\n",
    356                                api, api->xsize, bk->req_szB);
    357             api->xsize_tag = Mixed;
    358             api->xsize = 0;
    359             // deallocate the histo, if any
    360             if (api->histo) {
    361                VG_(free)(api->histo);
    362                api->histo = NULL;
    363             }
    364          }
    365          break;
    366 
    367       case Mixed:
    368          //tl_assert(api->deaths > 1);
    369          break;
    370 
    371       default:
    372         tl_assert(0);
    373    }
    374 
    375    // See if we can fold the histo data from this block into
    376    // the data for the AP
    377    if (api->xsize_tag == Exactly && api->histo && bk->histoW) {
    378       tl_assert(api->xsize == bk->req_szB);
    379       UWord i;
    380       for (i = 0; i < api->xsize; i++) {
    381          // FIXME: do something better in case of overflow of api->histo[..]
    382          // Right now, at least don't let it overflow/wrap around
    383          if (api->histo[i] <= 0xFFFE0000)
    384             api->histo[i] += (UInt)bk->histoW[i];
    385       }
    386       if (0) VG_(printf)("fold in, AP = %p\n", api);
    387    }
    388 
    389 
    390 
    391 #if 0
    392    if (bk->histoB) {
    393       VG_(printf)("block retiring, histo %lu: ", bk->req_szB);
    394       UWord i;
    395       for (i = 0; i < bk->req_szB; i++)
    396         VG_(printf)("%u ", (UInt)bk->histoB[i]);
    397       VG_(printf)("\n");
    398    } else {
    399       VG_(printf)("block retiring, no histo %lu\n", bk->req_szB);
    400    }
    401 #endif
    402 }
    403 
    404 /* This handles block resizing.  When a block with AP 'ec' has a
    405    size change of 'delta', call here to update the APInfo. */
    406 static void apinfo_change_cur_bytes_live( ExeContext* ec, Long delta )
    407 {
    408    APInfo* api   = NULL;
    409    UWord   keyW  = 0;
    410    UWord   valW  = 0;
    411    Bool    found = VG_(lookupFM)( apinfo,
    412                                   &keyW, &valW, (UWord)ec );
    413 
    414    tl_assert(found);
    415    api = (APInfo*)valW;
    416    tl_assert(api->ap == ec);
    417 
    418    if (delta < 0) {
    419       tl_assert(api->cur_bytes_live >= -delta);
    420       tl_assert(g_cur_bytes_live >= -delta);
    421    }
    422 
    423    // adjust current live size
    424    api->cur_bytes_live += delta;
    425    g_cur_bytes_live += delta;
    426 
    427    if (delta > 0 && api->cur_bytes_live > api->max_bytes_live) {
    428       api->max_bytes_live  = api->cur_bytes_live;
    429       api->max_blocks_live = api->cur_blocks_live;
    430    }
    431 
    432    // update global summary stats
    433    if (delta > 0 && g_cur_bytes_live > g_max_bytes_live) {
    434       g_max_bytes_live = g_cur_bytes_live;
    435       g_max_blocks_live = g_cur_blocks_live;
    436    }
    437    if (delta > 0)
    438       g_tot_bytes += delta;
    439 
    440    // adjust total allocation size
    441    if (delta > 0)
    442       api->tot_bytes += delta;
    443 }
    444 
    445 
    446 //------------------------------------------------------------//
    447 //--- update both Block and APInfos after {m,re}alloc/free ---//
    448 //------------------------------------------------------------//
    449 
    450 static
    451 void* new_block ( ThreadId tid, void* p, SizeT req_szB, SizeT req_alignB,
    452                   Bool is_zeroed )
    453 {
    454    tl_assert(p == NULL); // don't handle custom allocators right now
    455    SizeT actual_szB /*, slop_szB*/;
    456 
    457    if ((SSizeT)req_szB < 0) return NULL;
    458 
    459    if (req_szB == 0)
    460       req_szB = 1;  /* can't allow zero-sized blocks in the interval tree */
    461 
    462    // Allocate and zero if necessary
    463    if (!p) {
    464       p = VG_(cli_malloc)( req_alignB, req_szB );
    465       if (!p) {
    466          return NULL;
    467       }
    468       if (is_zeroed) VG_(memset)(p, 0, req_szB);
    469       actual_szB = VG_(malloc_usable_size)(p);
    470       tl_assert(actual_szB >= req_szB);
    471       /* slop_szB = actual_szB - req_szB; */
    472    } else {
    473       /* slop_szB = 0; */
    474    }
    475 
    476    // Make new HP_Chunk node, add to malloc_list
    477    Block* bk = VG_(malloc)("dh.new_block.1", sizeof(Block));
    478    bk->payload   = (Addr)p;
    479    bk->req_szB   = req_szB;
    480    bk->ap        = VG_(record_ExeContext)(tid, 0/*first word delta*/);
    481    bk->allocd_at = g_guest_instrs_executed;
    482    bk->n_reads   = 0;
    483    bk->n_writes  = 0;
    484    // set up histogram array, if the block isn't too large
    485    bk->histoW = NULL;
    486    if (req_szB <= HISTOGRAM_SIZE_LIMIT) {
    487       bk->histoW = VG_(malloc)("dh.new_block.2", req_szB * sizeof(UShort));
    488       VG_(memset)(bk->histoW, 0, req_szB * sizeof(UShort));
    489    }
    490 
    491    Bool present = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
    492    tl_assert(!present);
    493    fbc_cache0 = fbc_cache1 = NULL;
    494 
    495    intro_Block(bk);
    496 
    497    if (0) VG_(printf)("ALLOC %ld -> %p\n", req_szB, p);
    498 
    499    return p;
    500 }
    501 
    502 static
    503 void die_block ( void* p, Bool custom_free )
    504 {
    505    tl_assert(!custom_free);  // at least for now
    506 
    507    Block* bk = find_Block_containing( (Addr)p );
    508 
    509    if (!bk) {
    510      return; // bogus free
    511    }
    512 
    513    tl_assert(bk->req_szB > 0);
    514    // assert the block finder is behaving sanely
    515    tl_assert(bk->payload <= (Addr)p);
    516    tl_assert( (Addr)p < bk->payload + bk->req_szB );
    517 
    518    if (bk->payload != (Addr)p) {
    519       return; // bogus free
    520    }
    521 
    522    if (0) VG_(printf)(" FREE %p %llu\n",
    523                       p, g_guest_instrs_executed - bk->allocd_at);
    524 
    525    retire_Block(bk, True/*because_freed*/);
    526 
    527    VG_(cli_free)( (void*)bk->payload );
    528    delete_Block_starting_at( bk->payload );
    529    if (bk->histoW) {
    530       VG_(free)( bk->histoW );
    531       bk->histoW = NULL;
    532    }
    533    VG_(free)( bk );
    534 }
    535 
    536 
    537 static
    538 void* renew_block ( ThreadId tid, void* p_old, SizeT new_req_szB )
    539 {
    540    if (0) VG_(printf)("REALL %p %ld\n", p_old, new_req_szB);
    541    void* p_new = NULL;
    542 
    543    tl_assert(new_req_szB > 0); // map 0 to 1
    544 
    545    // Find the old block.
    546    Block* bk = find_Block_containing( (Addr)p_old );
    547    if (!bk) {
    548       return NULL;   // bogus realloc
    549    }
    550 
    551    tl_assert(bk->req_szB > 0);
    552    // assert the block finder is behaving sanely
    553    tl_assert(bk->payload <= (Addr)p_old);
    554    tl_assert( (Addr)p_old < bk->payload + bk->req_szB );
    555 
    556    if (bk->payload != (Addr)p_old) {
    557       return NULL; // bogus realloc
    558    }
    559 
    560    // Keeping the histogram alive in any meaningful way across
    561    // block resizing is too darn complicated.  Just throw it away.
    562    if (bk->histoW) {
    563       VG_(free)(bk->histoW);
    564       bk->histoW = NULL;
    565    }
    566 
    567    // Actually do the allocation, if necessary.
    568    if (new_req_szB <= bk->req_szB) {
    569 
    570       // New size is smaller or same; block not moved.
    571       apinfo_change_cur_bytes_live(bk->ap,
    572                                    (Long)new_req_szB - (Long)bk->req_szB);
    573       bk->req_szB = new_req_szB;
    574       return p_old;
    575 
    576    } else {
    577 
    578       // New size is bigger;  make new block, copy shared contents, free old.
    579       p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
    580       if (!p_new) {
    581          // Nb: if realloc fails, NULL is returned but the old block is not
    582          // touched.  What an awful function.
    583          return NULL;
    584       }
    585       tl_assert(p_new != p_old);
    586 
    587       VG_(memcpy)(p_new, p_old, bk->req_szB);
    588       VG_(cli_free)(p_old);
    589 
    590       // Since the block has moved, we need to re-insert it into the
    591       // interval tree at the new place.  Do this by removing
    592       // and re-adding it.
    593       delete_Block_starting_at( (Addr)p_old );
    594       // now 'bk' is no longer in the tree, but the Block itself
    595       // is still alive
    596 
    597       // Update the metadata.
    598       apinfo_change_cur_bytes_live(bk->ap,
    599                                    (Long)new_req_szB - (Long)bk->req_szB);
    600       bk->payload = (Addr)p_new;
    601       bk->req_szB = new_req_szB;
    602 
    603       // and re-add
    604       Bool present
    605          = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
    606       tl_assert(!present);
    607       fbc_cache0 = fbc_cache1 = NULL;
    608 
    609       return p_new;
    610    }
    611    /*NOTREACHED*/
    612    tl_assert(0);
    613 }
    614 
    615 
    616 //------------------------------------------------------------//
    617 //--- malloc() et al replacement wrappers                  ---//
    618 //------------------------------------------------------------//
    619 
    620 static void* dh_malloc ( ThreadId tid, SizeT szB )
    621 {
    622    return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
    623 }
    624 
    625 static void* dh___builtin_new ( ThreadId tid, SizeT szB )
    626 {
    627    return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
    628 }
    629 
    630 static void* dh___builtin_vec_new ( ThreadId tid, SizeT szB )
    631 {
    632    return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
    633 }
    634 
    635 static void* dh_calloc ( ThreadId tid, SizeT m, SizeT szB )
    636 {
    637    return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True );
    638 }
    639 
    640 static void *dh_memalign ( ThreadId tid, SizeT alignB, SizeT szB )
    641 {
    642    return new_block( tid, NULL, szB, alignB, False );
    643 }
    644 
    645 static void dh_free ( ThreadId tid __attribute__((unused)), void* p )
    646 {
    647    die_block( p, /*custom_free*/False );
    648 }
    649 
    650 static void dh___builtin_delete ( ThreadId tid, void* p )
    651 {
    652    die_block( p, /*custom_free*/False);
    653 }
    654 
    655 static void dh___builtin_vec_delete ( ThreadId tid, void* p )
    656 {
    657    die_block( p, /*custom_free*/False );
    658 }
    659 
    660 static void* dh_realloc ( ThreadId tid, void* p_old, SizeT new_szB )
    661 {
    662    if (p_old == NULL) {
    663       return dh_malloc(tid, new_szB);
    664    }
    665    if (new_szB == 0) {
    666       dh_free(tid, p_old);
    667       return NULL;
    668    }
    669    return renew_block(tid, p_old, new_szB);
    670 }
    671 
    672 static SizeT dh_malloc_usable_size ( ThreadId tid, void* p )
    673 {
    674    tl_assert(0);
    675 //zz   HP_Chunk* hc = VG_(HT_lookup)( malloc_list, (UWord)p );
    676 //zz
    677 //zz   return ( hc ? hc->req_szB + hc->slop_szB : 0 );
    678 }
    679 
    680 //------------------------------------------------------------//
    681 //--- memory references                                    ---//
    682 //------------------------------------------------------------//
    683 
    684 static
    685 void inc_histo_for_block ( Block* bk, Addr addr, UWord szB )
    686 {
    687    UWord i, offMin, offMax1;
    688    offMin = addr - bk->payload;
    689    tl_assert(offMin < bk->req_szB);
    690    offMax1 = offMin + szB;
    691    if (offMax1 > bk->req_szB)
    692       offMax1 = bk->req_szB;
    693    //VG_(printf)("%lu %lu   (size of block %lu)\n", offMin, offMax1, bk->req_szB);
    694    for (i = offMin; i < offMax1; i++) {
    695       UShort n = bk->histoW[i];
    696       if (n < 0xFFFF) n++;
    697       bk->histoW[i] = n;
    698    }
    699 }
    700 
    701 static VG_REGPARM(2)
    702 void dh_handle_write ( Addr addr, UWord szB )
    703 {
    704    Block* bk = find_Block_containing(addr);
    705    if (bk) {
    706       bk->n_writes += szB;
    707       if (bk->histoW)
    708          inc_histo_for_block(bk, addr, szB);
    709    }
    710 }
    711 
    712 static VG_REGPARM(2)
    713 void dh_handle_read ( Addr addr, UWord szB )
    714 {
    715    Block* bk = find_Block_containing(addr);
    716    if (bk) {
    717       bk->n_reads += szB;
    718       if (bk->histoW)
    719          inc_histo_for_block(bk, addr, szB);
    720    }
    721 }
    722 
    723 
    724 // Handle reads and writes by syscalls (read == kernel
    725 // reads user space, write == kernel writes user space).
    726 // Assumes no such read or write spans a heap block
    727 // boundary and so we can treat it just as one giant
    728 // read or write.
    729 static
    730 void dh_handle_noninsn_read ( CorePart part, ThreadId tid, const HChar* s,
    731                               Addr base, SizeT size )
    732 {
    733    switch (part) {
    734       case Vg_CoreSysCall:
    735          dh_handle_read(base, size);
    736          break;
    737       case Vg_CoreSysCallArgInMem:
    738          break;
    739       case Vg_CoreTranslate:
    740          break;
    741       default:
    742          tl_assert(0);
    743    }
    744 }
    745 
    746 static
    747 void dh_handle_noninsn_write ( CorePart part, ThreadId tid,
    748                                Addr base, SizeT size )
    749 {
    750    switch (part) {
    751       case Vg_CoreSysCall:
    752          dh_handle_write(base, size);
    753          break;
    754       case Vg_CoreSignal:
    755          break;
    756       default:
    757          tl_assert(0);
    758    }
    759 }
    760 
    761 
    762 //------------------------------------------------------------//
    763 //--- Instrumentation                                      ---//
    764 //------------------------------------------------------------//
    765 
    766 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
    767 #define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
    768 #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
    769 #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
    770 #define assign(_t, _e)           IRStmt_WrTmp((_t), (_e))
    771 
    772 static
    773 void add_counter_update(IRSB* sbOut, Int n)
    774 {
    775    #if defined(VG_BIGENDIAN)
    776    # define END Iend_BE
    777    #elif defined(VG_LITTLEENDIAN)
    778    # define END Iend_LE
    779    #else
    780    # error "Unknown endianness"
    781    #endif
    782    // Add code to increment 'g_guest_instrs_executed' by 'n', like this:
    783    //   WrTmp(t1, Load64(&g_guest_instrs_executed))
    784    //   WrTmp(t2, Add64(RdTmp(t1), Const(n)))
    785    //   Store(&g_guest_instrs_executed, t2)
    786    IRTemp t1 = newIRTemp(sbOut->tyenv, Ity_I64);
    787    IRTemp t2 = newIRTemp(sbOut->tyenv, Ity_I64);
    788    IRExpr* counter_addr = mkIRExpr_HWord( (HWord)&g_guest_instrs_executed );
    789 
    790    IRStmt* st1 = assign(t1, IRExpr_Load(END, Ity_I64, counter_addr));
    791    IRStmt* st2 = assign(t2, binop(Iop_Add64, mkexpr(t1), mkU64(n)));
    792    IRStmt* st3 = IRStmt_Store(END, counter_addr, mkexpr(t2));
    793 
    794    addStmtToIRSB( sbOut, st1 );
    795    addStmtToIRSB( sbOut, st2 );
    796    addStmtToIRSB( sbOut, st3 );
    797 }
    798 
    799 static
    800 void addMemEvent(IRSB* sbOut, Bool isWrite, Int szB, IRExpr* addr,
    801                  Int goff_sp)
    802 {
    803    IRType   tyAddr   = Ity_INVALID;
    804    const HChar* hName= NULL;
    805    void*    hAddr    = NULL;
    806    IRExpr** argv     = NULL;
    807    IRDirty* di       = NULL;
    808 
    809    const Int THRESH = 4096 * 4; // somewhat arbitrary
    810    const Int rz_szB = VG_STACK_REDZONE_SZB;
    811 
    812    tyAddr = typeOfIRExpr( sbOut->tyenv, addr );
    813    tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
    814 
    815    if (isWrite) {
    816       hName = "dh_handle_write";
    817       hAddr = &dh_handle_write;
    818    } else {
    819       hName = "dh_handle_read";
    820       hAddr = &dh_handle_read;
    821    }
    822 
    823    argv = mkIRExprVec_2( addr, mkIRExpr_HWord(szB) );
    824 
    825    /* Add the helper. */
    826    tl_assert(hName);
    827    tl_assert(hAddr);
    828    tl_assert(argv);
    829    di = unsafeIRDirty_0_N( 2/*regparms*/,
    830                            hName, VG_(fnptr_to_fnentry)( hAddr ),
    831                            argv );
    832 
    833    /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
    834       some arbitrary N.  If that fails then addr is in the range (SP -
    835       RZ .. SP + N - RZ).  If N is smallish (a page?) then we can say
    836       addr is within a page of SP and so can't possibly be a heap
    837       access, and so can be skipped. */
    838    IRTemp sp = newIRTemp(sbOut->tyenv, tyAddr);
    839    addStmtToIRSB( sbOut, assign(sp, IRExpr_Get(goff_sp, tyAddr)));
    840 
    841    IRTemp sp_minus_rz = newIRTemp(sbOut->tyenv, tyAddr);
    842    addStmtToIRSB(
    843       sbOut,
    844       assign(sp_minus_rz,
    845              tyAddr == Ity_I32
    846                 ? binop(Iop_Sub32, mkexpr(sp), mkU32(rz_szB))
    847                 : binop(Iop_Sub64, mkexpr(sp), mkU64(rz_szB)))
    848    );
    849 
    850    IRTemp diff = newIRTemp(sbOut->tyenv, tyAddr);
    851    addStmtToIRSB(
    852       sbOut,
    853       assign(diff,
    854              tyAddr == Ity_I32
    855                 ? binop(Iop_Sub32, addr, mkexpr(sp_minus_rz))
    856                 : binop(Iop_Sub64, addr, mkexpr(sp_minus_rz)))
    857    );
    858 
    859    IRTemp guard = newIRTemp(sbOut->tyenv, Ity_I1);
    860    addStmtToIRSB(
    861       sbOut,
    862       assign(guard,
    863              tyAddr == Ity_I32
    864                 ? binop(Iop_CmpLT32U, mkU32(THRESH), mkexpr(diff))
    865                 : binop(Iop_CmpLT64U, mkU64(THRESH), mkexpr(diff)))
    866    );
    867    di->guard = mkexpr(guard);
    868 
    869    addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    870 }
    871 
    872 static
    873 IRSB* dh_instrument ( VgCallbackClosure* closure,
    874                       IRSB* sbIn,
    875                       VexGuestLayout* layout,
    876                       VexGuestExtents* vge,
    877                       VexArchInfo* archinfo_host,
    878                       IRType gWordTy, IRType hWordTy )
    879 {
    880    Int   i, n = 0;
    881    IRSB* sbOut;
    882    IRTypeEnv* tyenv = sbIn->tyenv;
    883 
    884    const Int goff_sp = layout->offset_SP;
    885 
    886    // We increment the instruction count in two places:
    887    // - just before any Ist_Exit statements;
    888    // - just before the IRSB's end.
    889    // In the former case, we zero 'n' and then continue instrumenting.
    890 
    891    sbOut = deepCopyIRSBExceptStmts(sbIn);
    892 
    893    // Copy verbatim any IR preamble preceding the first IMark
    894    i = 0;
    895    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
    896       addStmtToIRSB( sbOut, sbIn->stmts[i] );
    897       i++;
    898    }
    899 
    900    for (/*use current i*/; i < sbIn->stmts_used; i++) {
    901       IRStmt* st = sbIn->stmts[i];
    902 
    903       if (!st || st->tag == Ist_NoOp) continue;
    904 
    905       switch (st->tag) {
    906 
    907          case Ist_IMark: {
    908             n++;
    909             break;
    910          }
    911 
    912          case Ist_Exit: {
    913             if (n > 0) {
    914                // Add an increment before the Exit statement, then reset 'n'.
    915                add_counter_update(sbOut, n);
    916                n = 0;
    917             }
    918             break;
    919          }
    920 
    921          case Ist_WrTmp: {
    922             IRExpr* data = st->Ist.WrTmp.data;
    923             if (data->tag == Iex_Load) {
    924                IRExpr* aexpr = data->Iex.Load.addr;
    925                // Note also, endianness info is ignored.  I guess
    926                // that's not interesting.
    927                addMemEvent( sbOut, False/*!isWrite*/,
    928                             sizeofIRType(data->Iex.Load.ty),
    929                             aexpr, goff_sp );
    930             }
    931             break;
    932          }
    933 
    934          case Ist_Store: {
    935             IRExpr* data  = st->Ist.Store.data;
    936             IRExpr* aexpr = st->Ist.Store.addr;
    937             addMemEvent( sbOut, True/*isWrite*/,
    938                          sizeofIRType(typeOfIRExpr(tyenv, data)),
    939                          aexpr, goff_sp );
    940             break;
    941          }
    942 
    943          case Ist_Dirty: {
    944             Int      dataSize;
    945             IRDirty* d = st->Ist.Dirty.details;
    946             if (d->mFx != Ifx_None) {
    947                /* This dirty helper accesses memory.  Collect the details. */
    948                tl_assert(d->mAddr != NULL);
    949                tl_assert(d->mSize != 0);
    950                dataSize = d->mSize;
    951                // Large (eg. 28B, 108B, 512B on x86) data-sized
    952                // instructions will be done inaccurately, but they're
    953                // very rare and this avoids errors from hitting more
    954                // than two cache lines in the simulation.
    955                if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
    956                   addMemEvent( sbOut, False/*!isWrite*/,
    957                                dataSize, d->mAddr, goff_sp );
    958                if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
    959                   addMemEvent( sbOut, True/*isWrite*/,
    960                                dataSize, d->mAddr, goff_sp );
    961             } else {
    962                tl_assert(d->mAddr == NULL);
    963                tl_assert(d->mSize == 0);
    964             }
    965             break;
    966          }
    967 
    968          case Ist_CAS: {
    969             /* We treat it as a read and a write of the location.  I
    970                think that is the same behaviour as it was before IRCAS
    971                was introduced, since prior to that point, the Vex
    972                front ends would translate a lock-prefixed instruction
    973                into a (normal) read followed by a (normal) write. */
    974             Int    dataSize;
    975             IRCAS* cas = st->Ist.CAS.details;
    976             tl_assert(cas->addr != NULL);
    977             tl_assert(cas->dataLo != NULL);
    978             dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
    979             if (cas->dataHi != NULL)
    980                dataSize *= 2; /* since it's a doubleword-CAS */
    981             addMemEvent( sbOut, False/*!isWrite*/,
    982                          dataSize, cas->addr, goff_sp );
    983             addMemEvent( sbOut, True/*isWrite*/,
    984                          dataSize, cas->addr, goff_sp );
    985             break;
    986          }
    987 
    988          case Ist_LLSC: {
    989             IRType dataTy;
    990             if (st->Ist.LLSC.storedata == NULL) {
    991                /* LL */
    992                dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
    993                addMemEvent( sbOut, False/*!isWrite*/,
    994                             sizeofIRType(dataTy),
    995                             st->Ist.LLSC.addr, goff_sp );
    996             } else {
    997                /* SC */
    998                dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
    999                addMemEvent( sbOut, True/*isWrite*/,
   1000                             sizeofIRType(dataTy),
   1001                             st->Ist.LLSC.addr, goff_sp );
   1002             }
   1003             break;
   1004          }
   1005 
   1006          default:
   1007             break;
   1008       }
   1009 
   1010       addStmtToIRSB( sbOut, st );
   1011    }
   1012 
   1013    if (n > 0) {
   1014       // Add an increment before the SB end.
   1015       add_counter_update(sbOut, n);
   1016    }
   1017    return sbOut;
   1018 }
   1019 
   1020 #undef binop
   1021 #undef mkexpr
   1022 #undef mkU32
   1023 #undef mkU64
   1024 #undef assign
   1025 
   1026 
   1027 //------------------------------------------------------------//
   1028 //--- Command line args                                    ---//
   1029 //------------------------------------------------------------//
   1030 
   1031 // FORWARDS
   1032 static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
   1033                               /*OUT*/Bool* increasingP,
   1034                               const HChar* metric_name );
   1035 
   1036 static Int    clo_show_top_n = 10;
   1037 static const HChar *clo_sort_by = "max-bytes-live";
   1038 
   1039 static Bool dh_process_cmd_line_option(const HChar* arg)
   1040 {
   1041    if VG_BINT_CLO(arg, "--show-top-n", clo_show_top_n, 1, 100000) {}
   1042 
   1043    else if VG_STR_CLO(arg, "--sort-by", clo_sort_by) {
   1044        ULong (*dummyFn)(APInfo*);
   1045        Bool dummyB;
   1046        Bool ok = identify_metric( &dummyFn, &dummyB, clo_sort_by);
   1047        if (!ok)
   1048           return False;
   1049        // otherwise it's OK, in which case leave it alone.
   1050        // show_top_n_apinfos will later convert the string by a
   1051        // second call to identify_metric.
   1052    }
   1053 
   1054    else
   1055       return VG_(replacement_malloc_process_cmd_line_option)(arg);
   1056 
   1057    return True;
   1058 }
   1059 
   1060 
   1061 static void dh_print_usage(void)
   1062 {
   1063    VG_(printf)(
   1064 "    --show-top-n=number       show the top <number> alloc points [10]\n"
   1065 "    --sort-by=string\n"
   1066 "            sort the allocation points by the metric\n"
   1067 "            defined by <string>, thusly:\n"
   1068 "                max-bytes-live    maximum live bytes [default]\n"
   1069 "                tot-bytes-allocd  total allocation (turnover)\n"
   1070 "                max-blocks-live   maximum live blocks\n"
   1071    );
   1072 }
   1073 
   1074 static void dh_print_debug_usage(void)
   1075 {
   1076    VG_(printf)(
   1077 "    (none)\n"
   1078    );
   1079 }
   1080 
   1081 
   1082 //------------------------------------------------------------//
   1083 //--- Finalisation                                         ---//
   1084 //------------------------------------------------------------//
   1085 
   1086 static void show_N_div_100( /*OUT*/HChar* buf, ULong n )
   1087 {
   1088    ULong nK = n / 100;
   1089    ULong nR = n % 100;
   1090    VG_(sprintf)(buf, "%llu.%s%llu", nK,
   1091                 nR < 10 ? "0" : "",
   1092                 nR);
   1093 }
   1094 
   1095 static void show_APInfo ( APInfo* api )
   1096 {
   1097    HChar bufA[80];
   1098    VG_(memset)(bufA, 0, sizeof(bufA));
   1099    if (api->tot_blocks > 0) {
   1100       show_N_div_100( bufA, ((ULong)api->tot_bytes * 100ULL)
   1101                               / (ULong)api->tot_blocks );
   1102    } else {
   1103       bufA[0] = 'N'; bufA[1] = 'a'; bufA[2] = 'N';
   1104    }
   1105 
   1106    VG_(umsg)("max-live:    %'llu in %'llu blocks\n",
   1107              api->max_bytes_live, api->max_blocks_live);
   1108    VG_(umsg)("tot-alloc:   %'llu in %'llu blocks (avg size %s)\n",
   1109              api->tot_bytes, api->tot_blocks, bufA);
   1110 
   1111    tl_assert(api->tot_blocks >= api->max_blocks_live);
   1112    tl_assert(api->tot_bytes >= api->max_bytes_live);
   1113 
   1114    if (api->deaths > 0) {
   1115       // Average Age at Death
   1116       ULong aad = api->deaths == 0
   1117                   ? 0 : (api->death_ages_sum / api->deaths);
   1118       // AAD as a fraction of the total program lifetime (so far)
   1119       // measured in ten-thousand-ths (aad_frac_10k == 10000 means the
   1120       // complete lifetime of the program.
   1121       ULong aad_frac_10k
   1122          = g_guest_instrs_executed == 0
   1123            ? 0 : (10000ULL * aad) / g_guest_instrs_executed;
   1124       HChar buf[16];
   1125       show_N_div_100(buf, aad_frac_10k);
   1126       VG_(umsg)("deaths:      %'llu, at avg age %'llu "
   1127                 "(%s%% of prog lifetime)\n",
   1128                 api->deaths, aad, buf );
   1129    } else {
   1130       VG_(umsg)("deaths:      none (none of these blocks were freed)\n");
   1131    }
   1132 
   1133    HChar bufR[80], bufW[80];
   1134    VG_(memset)(bufR, 0, sizeof(bufR));
   1135    VG_(memset)(bufW, 0, sizeof(bufW));
   1136    if (api->tot_bytes > 0) {
   1137       show_N_div_100(bufR, (100ULL * api->n_reads) / api->tot_bytes);
   1138       show_N_div_100(bufW, (100ULL * api->n_writes) / api->tot_bytes);
   1139    } else {
   1140       VG_(strcat)(bufR, "Inf");
   1141       VG_(strcat)(bufW, "Inf");
   1142    }
   1143 
   1144    VG_(umsg)("acc-ratios:  %s rd, %s wr "
   1145              " (%'llu b-read, %'llu b-written)\n",
   1146              bufR, bufW,
   1147              api->n_reads, api->n_writes);
   1148 
   1149    VG_(pp_ExeContext)(api->ap);
   1150 
   1151    if (api->histo && api->xsize_tag == Exactly) {
   1152       VG_(umsg)("\nAggregated access counts by offset:\n");
   1153       VG_(umsg)("\n");
   1154       UWord i;
   1155       if (api->xsize > 0)
   1156          VG_(umsg)("[   0]  ");
   1157       for (i = 0; i < api->xsize; i++) {
   1158          if (i > 0 && (i % 16) == 0 && i != api->xsize-1) {
   1159             VG_(umsg)("\n");
   1160             VG_(umsg)("[%4lu]  ", i);
   1161          }
   1162          VG_(umsg)("%u ", api->histo[i]);
   1163       }
   1164       VG_(umsg)("\n");
   1165    }
   1166 }
   1167 
   1168 
   1169 /* Metric-access functions for APInfos. */
   1170 static ULong get_metric__max_bytes_live ( APInfo* api ) {
   1171    return api->max_bytes_live;
   1172 }
   1173 static ULong get_metric__tot_bytes ( APInfo* api ) {
   1174    return api->tot_bytes;
   1175 }
   1176 static ULong get_metric__max_blocks_live ( APInfo* api ) {
   1177    return api->max_blocks_live;
   1178 }
   1179 
   1180 /* Given a string, return the metric-access function and also a Bool
   1181    indicating whether we want increasing or decreasing values of the
   1182    metric.  This is used twice, once in command line processing, and
   1183    then again in show_top_n_apinfos.  Returns False if the given
   1184    string could not be identified.*/
   1185 static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
   1186                               /*OUT*/Bool* increasingP,
   1187                               const HChar* metric_name )
   1188 {
   1189    if (0 == VG_(strcmp)(metric_name, "max-bytes-live")) {
   1190       *get_metricP = get_metric__max_bytes_live;
   1191       *increasingP = False;
   1192       return True;
   1193    }
   1194    if (0 == VG_(strcmp)(metric_name, "tot-bytes-allocd")) {
   1195       *get_metricP = get_metric__tot_bytes;
   1196       *increasingP = False;
   1197       return True;
   1198    }
   1199    if (0 == VG_(strcmp)(metric_name, "max-blocks-live")) {
   1200       *get_metricP = get_metric__max_blocks_live;
   1201       *increasingP = False;
   1202       return True;
   1203    }
   1204    return False;
   1205 }
   1206 
   1207 
   1208 static void show_top_n_apinfos ( void )
   1209 {
   1210    Int   i;
   1211    UWord keyW, valW;
   1212    ULong (*get_metric)(APInfo*);
   1213    Bool  increasing;
   1214 
   1215    const HChar* metric_name = clo_sort_by;
   1216    tl_assert(metric_name); // ensured by clo processing
   1217 
   1218    Bool ok = identify_metric( &get_metric, &increasing, metric_name );
   1219    tl_assert(ok); // ensured by clo processing
   1220 
   1221    VG_(umsg)("\n");
   1222    VG_(umsg)("======== ORDERED BY %s \"%s\": "
   1223              "top %d allocators ========\n",
   1224              increasing ? "increasing" : "decreasing",
   1225              metric_name, clo_show_top_n );
   1226 
   1227    // Clear all .shown bits
   1228    VG_(initIterFM)( apinfo );
   1229    while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
   1230       APInfo* api = (APInfo*)valW;
   1231       tl_assert(api && api->ap == (ExeContext*)keyW);
   1232       api->shown = False;
   1233    }
   1234    VG_(doneIterFM)( apinfo );
   1235 
   1236    // Now print the top N entries.  Each one requires a
   1237    // complete scan of the set.  Duh.
   1238    for (i = 0; i < clo_show_top_n; i++) {
   1239       ULong   best_metric = increasing ? ~0ULL : 0ULL;
   1240       APInfo* best_api    = NULL;
   1241 
   1242       VG_(initIterFM)( apinfo );
   1243       while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
   1244          APInfo* api = (APInfo*)valW;
   1245          if (api->shown)
   1246             continue;
   1247          ULong metric = get_metric(api);
   1248          if (increasing ? (metric < best_metric) : (metric > best_metric)) {
   1249             best_metric = metric;
   1250             best_api = api;
   1251          }
   1252       }
   1253       VG_(doneIterFM)( apinfo );
   1254 
   1255       if (!best_api)
   1256          break; // all APIs have been shown.  Stop.
   1257 
   1258       VG_(umsg)("\n");
   1259       VG_(umsg)("-------------------- %d of %d --------------------\n",
   1260                 i+1, clo_show_top_n );
   1261       show_APInfo(best_api);
   1262       best_api->shown = True;
   1263    }
   1264 
   1265    VG_(umsg)("\n");
   1266 }
   1267 
   1268 
   1269 static void dh_fini(Int exit_status)
   1270 {
   1271    // Before printing statistics, we must harvest access counts for
   1272    // all the blocks that are still alive.  Not doing so gives
   1273    // access ratios which are too low (zero, in the worst case)
   1274    // for such blocks, since the accesses that do get made will
   1275    // (if we skip this step) not get folded into the AP summaries.
   1276    UWord keyW, valW;
   1277    VG_(initIterFM)( interval_tree );
   1278    while (VG_(nextIterFM)( interval_tree, &keyW, &valW )) {
   1279       Block* bk = (Block*)keyW;
   1280       tl_assert(valW == 0);
   1281       tl_assert(bk);
   1282       retire_Block(bk, False/*!because_freed*/);
   1283    }
   1284    VG_(doneIterFM)( interval_tree );
   1285 
   1286    // show results
   1287    VG_(umsg)("======== SUMMARY STATISTICS ========\n");
   1288    VG_(umsg)("\n");
   1289    VG_(umsg)("guest_insns:  %'llu\n", g_guest_instrs_executed);
   1290    VG_(umsg)("\n");
   1291    VG_(umsg)("max_live:     %'llu in %'llu blocks\n",
   1292              g_max_bytes_live, g_max_blocks_live);
   1293    VG_(umsg)("\n");
   1294    VG_(umsg)("tot_alloc:    %'llu in %'llu blocks\n",
   1295              g_tot_bytes, g_tot_blocks);
   1296    VG_(umsg)("\n");
   1297    if (g_tot_bytes > 0) {
   1298       VG_(umsg)("insns per allocated byte: %'llu\n",
   1299                 g_guest_instrs_executed / g_tot_bytes);
   1300       VG_(umsg)("\n");
   1301    }
   1302 
   1303    show_top_n_apinfos();
   1304 
   1305    VG_(umsg)("\n");
   1306    VG_(umsg)("\n");
   1307    VG_(umsg)("==============================================================\n");
   1308    VG_(umsg)("\n");
   1309    VG_(umsg)("Some hints: (see --help for command line option details):\n");
   1310    VG_(umsg)("\n");
   1311    VG_(umsg)("* summary stats for whole program are at the top of this output\n");
   1312    VG_(umsg)("\n");
   1313    VG_(umsg)("* --show-top-n=  controls how many alloc points are shown.\n");
   1314    VG_(umsg)("                 You probably want to set it much higher than\n");
   1315    VG_(umsg)("                 the default value (10)\n");
   1316    VG_(umsg)("\n");
   1317    VG_(umsg)("* --sort-by=     specifies the sort key for output.\n");
   1318    VG_(umsg)("                 See --help for details.\n");
   1319    VG_(umsg)("\n");
   1320    VG_(umsg)("* Each allocation stack, by default 12 frames, counts as\n");
   1321    VG_(umsg)("  a separate alloc point.  This causes the data to be spread out\n");
   1322    VG_(umsg)("  over far too many alloc points.  I strongly suggest using\n");
   1323    VG_(umsg)("  --num-callers=4 or some such, to reduce the spreading.\n");
   1324    VG_(umsg)("\n");
   1325 
   1326    if (VG_(clo_stats)) {
   1327       VG_(dmsg)(" dhat: find_Block_containing:\n");
   1328       VG_(dmsg)("             found: %'lu (%'lu cached + %'lu uncached)\n",
   1329                 stats__n_fBc_cached + stats__n_fBc_uncached,
   1330                 stats__n_fBc_cached,
   1331                 stats__n_fBc_uncached);
   1332       VG_(dmsg)("          notfound: %'lu\n", stats__n_fBc_notfound);
   1333       VG_(dmsg)("\n");
   1334    }
   1335 }
   1336 
   1337 
   1338 //------------------------------------------------------------//
   1339 //--- Initialisation                                       ---//
   1340 //------------------------------------------------------------//
   1341 
   1342 static void dh_post_clo_init(void)
   1343 {
   1344 }
   1345 
   1346 static void dh_pre_clo_init(void)
   1347 {
   1348    VG_(details_name)            ("DHAT");
   1349    VG_(details_version)         (NULL);
   1350    VG_(details_description)     ("a dynamic heap analysis tool");
   1351    VG_(details_copyright_author)(
   1352       "Copyright (C) 2010-2013, and GNU GPL'd, by Mozilla Inc");
   1353    VG_(details_bug_reports_to)  (VG_BUGS_TO);
   1354 
   1355    // Basic functions.
   1356    VG_(basic_tool_funcs)          (dh_post_clo_init,
   1357                                    dh_instrument,
   1358                                    dh_fini);
   1359 //zz
   1360    // Needs.
   1361    VG_(needs_libc_freeres)();
   1362    VG_(needs_command_line_options)(dh_process_cmd_line_option,
   1363                                    dh_print_usage,
   1364                                    dh_print_debug_usage);
   1365 //zz   VG_(needs_client_requests)     (dh_handle_client_request);
   1366 //zz   VG_(needs_sanity_checks)       (dh_cheap_sanity_check,
   1367 //zz                                   dh_expensive_sanity_check);
   1368    VG_(needs_malloc_replacement)  (dh_malloc,
   1369                                    dh___builtin_new,
   1370                                    dh___builtin_vec_new,
   1371                                    dh_memalign,
   1372                                    dh_calloc,
   1373                                    dh_free,
   1374                                    dh___builtin_delete,
   1375                                    dh___builtin_vec_delete,
   1376                                    dh_realloc,
   1377                                    dh_malloc_usable_size,
   1378                                    0 );
   1379 
   1380    VG_(track_pre_mem_read)        ( dh_handle_noninsn_read );
   1381    //VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
   1382    VG_(track_post_mem_write)      ( dh_handle_noninsn_write );
   1383 
   1384    tl_assert(!interval_tree);
   1385    tl_assert(!fbc_cache0);
   1386    tl_assert(!fbc_cache1);
   1387 
   1388    interval_tree = VG_(newFM)( VG_(malloc),
   1389                                "dh.main.interval_tree.1",
   1390                                VG_(free),
   1391                                interval_tree_Cmp );
   1392 
   1393    apinfo = VG_(newFM)( VG_(malloc),
   1394                         "dh.main.apinfo.1",
   1395                         VG_(free),
   1396                         NULL/*unboxedcmp*/ );
   1397 }
   1398 
   1399 VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init)
   1400 
   1401 //--------------------------------------------------------------------//
   1402 //--- end                                                dh_main.c ---//
   1403 //--------------------------------------------------------------------//
   1404