Home | History | Annotate | Download | only in exp-dhat
      1 
      2 //--------------------------------------------------------------------*/
      3 //--- DHAT: a Dynamic Heap Analysis Tool                 dh_main.c ---*/
      4 //--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of DHAT, a Valgrind tool for profiling the
      8    heap usage of programs.
      9 
     10    Copyright (C) 2010-2010 Mozilla Inc
     11 
     12    This program is free software; you can redistribute it and/or
     13    modify it under the terms of the GNU General Public License as
     14    published by the Free Software Foundation; either version 2 of the
     15    License, or (at your option) any later version.
     16 
     17    This program is distributed in the hope that it will be useful, but
     18    WITHOUT ANY WARRANTY; without even the implied warranty of
     19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     20    General Public License for more details.
     21 
     22    You should have received a copy of the GNU General Public License
     23    along with this program; if not, write to the Free Software
     24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     25    02111-1307, USA.
     26 
     27    The GNU General Public License is contained in the file COPYING.
     28 */
     29 
     30 /* Contributed by Julian Seward <jseward (at) acm.org> */
     31 
     32 
     33 #include "pub_tool_basics.h"
     34 #include "pub_tool_libcbase.h"
     35 #include "pub_tool_libcassert.h"
     36 #include "pub_tool_libcprint.h"
     37 #include "pub_tool_machine.h"      // VG_(fnptr_to_fnentry)
     38 #include "pub_tool_mallocfree.h"
     39 #include "pub_tool_options.h"
     40 #include "pub_tool_replacemalloc.h"
     41 #include "pub_tool_tooliface.h"
     42 #include "pub_tool_wordfm.h"
     43 
     44 #define HISTOGRAM_SIZE_LIMIT 1024
     45 
     46 
     47 //------------------------------------------------------------//
     48 //--- Globals                                              ---//
     49 //------------------------------------------------------------//
     50 
     51 // Number of guest instructions executed so far.  This is
     52 // incremented directly from the generated code.
     53 static ULong g_guest_instrs_executed = 0;
     54 
     55 // Summary statistics for the entire run.
     56 static ULong g_tot_blocks = 0;   // total blocks allocated
     57 static ULong g_tot_bytes  = 0;   // total bytes allocated
     58 
     59 static ULong g_cur_blocks_live = 0; // curr # blocks live
     60 static ULong g_cur_bytes_live  = 0; // curr # bytes live
     61 
     62 static ULong g_max_blocks_live = 0; // bytes and blocks at
     63 static ULong g_max_bytes_live  = 0; // the max residency point
     64 
     65 
     66 //------------------------------------------------------------//
     67 //--- an Interval Tree of live blocks                      ---//
     68 //------------------------------------------------------------//
     69 
     70 /* Tracks information about live blocks. */
     71 typedef
     72    struct {
     73       Addr        payload;
     74       SizeT       req_szB;
     75       ExeContext* ap;  /* allocation ec */
     76       ULong       allocd_at; /* instruction number */
     77       ULong       n_reads;
     78       ULong       n_writes;
     79       /* Approx histogram, one byte per payload byte.  Counts latch up
     80          therefore at 0xFFFF.  Can be NULL if the block is resized or if
     81          the block is larger than HISTOGRAM_SIZE_LIMIT. */
     82       UShort*     histoW; /* [0 .. req_szB-1] */
     83    }
     84    Block;
     85 
     86 /* May not contain zero-sized blocks.  May not contain
     87    overlapping blocks. */
     88 static WordFM* interval_tree = NULL;  /* WordFM* Block* void */
     89 
     90 /* Here's the comparison function.  Since the tree is required
     91 to contain non-zero sized, non-overlapping blocks, it's good
     92 enough to consider any overlap as a match. */
     93 static Word interval_tree_Cmp ( UWord k1, UWord k2 )
     94 {
     95    Block* b1 = (Block*)k1;
     96    Block* b2 = (Block*)k2;
     97    tl_assert(b1->req_szB > 0);
     98    tl_assert(b2->req_szB > 0);
     99    if (b1->payload + b1->req_szB <= b2->payload) return -1;
    100    if (b2->payload + b2->req_szB <= b1->payload) return  1;
    101    return 0;
    102 }
    103 
    104 // 2-entry cache for find_Block_containing
    105 static Block* fbc_cache0 = NULL;
    106 static Block* fbc_cache1 = NULL;
    107 
    108 static UWord stats__n_fBc_cached = 0;
    109 static UWord stats__n_fBc_uncached = 0;
    110 static UWord stats__n_fBc_notfound = 0;
    111 
    112 static Block* find_Block_containing ( Addr a )
    113 {
    114    if (LIKELY(fbc_cache0
    115               && fbc_cache0->payload <= a
    116               && a < fbc_cache0->payload + fbc_cache0->req_szB)) {
    117       // found at 0
    118       stats__n_fBc_cached++;
    119       return fbc_cache0;
    120    }
    121    if (LIKELY(fbc_cache1
    122               && fbc_cache1->payload <= a
    123               && a < fbc_cache1->payload + fbc_cache1->req_szB)) {
    124       // found at 1; swap 0 and 1
    125       Block* tmp = fbc_cache0;
    126       fbc_cache0 = fbc_cache1;
    127       fbc_cache1 = tmp;
    128       stats__n_fBc_cached++;
    129       return fbc_cache0;
    130    }
    131    Block fake;
    132    fake.payload = a;
    133    fake.req_szB = 1;
    134    UWord foundkey = 1;
    135    UWord foundval = 1;
    136    Bool found = VG_(lookupFM)( interval_tree,
    137                                &foundkey, &foundval, (UWord)&fake );
    138    if (!found) {
    139       stats__n_fBc_notfound++;
    140       return NULL;
    141    }
    142    tl_assert(foundval == 0); // we don't store vals in the interval tree
    143    tl_assert(foundkey != 1);
    144    Block* res = (Block*)foundkey;
    145    tl_assert(res != &fake);
    146    // put at the top position
    147    fbc_cache1 = fbc_cache0;
    148    fbc_cache0 = res;
    149    stats__n_fBc_uncached++;
    150    return res;
    151 }
    152 
    153 // delete a block; asserts if not found.  (viz, 'a' must be
    154 // known to be present.)
    155 static void delete_Block_starting_at ( Addr a )
    156 {
    157    Block fake;
    158    fake.payload = a;
    159    fake.req_szB = 1;
    160    Bool found = VG_(delFromFM)( interval_tree,
    161                                 NULL, NULL, (Addr)&fake );
    162    tl_assert(found);
    163    fbc_cache0 = fbc_cache1 = NULL;
    164 }
    165 
    166 
    167 //------------------------------------------------------------//
    168 //--- a FM of allocation points (APs)                      ---//
    169 //------------------------------------------------------------//
    170 
    171 typedef
    172    struct {
    173       // the allocation point that we're summarising stats for
    174       ExeContext* ap;
    175       // used when printing results
    176       Bool shown;
    177       // The current number of blocks and bytes live for this AP
    178       ULong cur_blocks_live;
    179       ULong cur_bytes_live;
    180       // The number of blocks and bytes live at the max-liveness
    181       // point.  Note this is a bit subtle.  max_blocks_live is not
    182       // the maximum number of live blocks, but rather the number of
    183       // blocks live at the point of maximum byte liveness.  These are
    184       // not necessarily the same thing.
    185       ULong max_blocks_live;
    186       ULong max_bytes_live;
    187       // Total number of blocks and bytes allocated by this AP.
    188       ULong tot_blocks;
    189       ULong tot_bytes;
    190       // Sum of death ages for all blocks allocated by this AP,
    191       // that have subsequently been freed.
    192       ULong death_ages_sum;
    193       ULong deaths;
    194       // Total number of reads and writes in all blocks allocated
    195       // by this AP.
    196       ULong n_reads;
    197       ULong n_writes;
    198       /* Histogram information.  We maintain a histogram aggregated for
    199          all retiring Blocks allocated by this AP, but only if:
    200          - this AP has only ever allocated objects of one size
    201          - that size is <= HISTOGRAM_SIZE_LIMIT
    202          What we need therefore is a mechanism to see if this AP
    203          has only ever allocated blocks of one size.
    204 
    205          3 states:
    206             Unknown          because no retirement yet
    207             Exactly xsize    all retiring blocks are of this size
    208             Mixed            multiple different sizes seen
    209       */
    210       enum { Unknown=999, Exactly, Mixed } xsize_tag;
    211       SizeT xsize;
    212       UInt* histo; /* [0 .. xsize-1] */
    213    }
    214    APInfo;
    215 
    216 /* maps ExeContext*'s to APInfo*'s.  Note that the keys must match the
    217    .ap field in the values. */
    218 static WordFM* apinfo = NULL;  /* WordFM* ExeContext* APInfo* */
    219 
    220 
    221 /* 'bk' is being introduced (has just been allocated).  Find the
    222    relevant APInfo entry for it, or create one, based on the block's
    223    allocation EC.  Then, update the APInfo to the extent that we
    224    actually can, to reflect the allocation. */
    225 static void intro_Block ( Block* bk )
    226 {
    227    tl_assert(bk);
    228    tl_assert(bk->ap);
    229 
    230    APInfo* api   = NULL;
    231    UWord   keyW  = 0;
    232    UWord   valW  = 0;
    233    Bool    found = VG_(lookupFM)( apinfo,
    234                                   &keyW, &valW, (UWord)bk->ap );
    235    if (found) {
    236       api = (APInfo*)valW;
    237       tl_assert(keyW == (UWord)bk->ap);
    238    } else {
    239       api = VG_(malloc)( "dh.main.intro_Block.1", sizeof(APInfo) );
    240       VG_(memset)(api, 0, sizeof(*api));
    241       api->ap = bk->ap;
    242       Bool present = VG_(addToFM)( apinfo,
    243                                    (UWord)bk->ap, (UWord)api );
    244       tl_assert(!present);
    245       // histo stuff
    246       tl_assert(api->deaths == 0);
    247       api->xsize_tag = Unknown;
    248       api->xsize = 0;
    249       if (0) VG_(printf)("api %p   -->  Unknown\n", api);
    250    }
    251 
    252    tl_assert(api->ap == bk->ap);
    253 
    254    /* So: update stats to reflect an allocation */
    255 
    256    // # live blocks
    257    api->cur_blocks_live++;
    258 
    259    // # live bytes
    260    api->cur_bytes_live += bk->req_szB;
    261    if (api->cur_bytes_live > api->max_bytes_live) {
    262       api->max_bytes_live  = api->cur_bytes_live;
    263       api->max_blocks_live = api->cur_blocks_live;
    264    }
    265 
    266    // total blocks and bytes allocated here
    267    api->tot_blocks++;
    268    api->tot_bytes += bk->req_szB;
    269 
    270    // update summary globals
    271    g_tot_blocks++;
    272    g_tot_bytes += bk->req_szB;
    273 
    274    g_cur_blocks_live++;
    275    g_cur_bytes_live += bk->req_szB;
    276    if (g_cur_bytes_live > g_max_bytes_live) {
    277       g_max_bytes_live = g_cur_bytes_live;
    278       g_max_blocks_live = g_cur_blocks_live;
    279    }
    280 }
    281 
    282 
    283 /* 'bk' is retiring (being freed).  Find the relevant APInfo entry for
    284    it, which must already exist.  Then, fold info from 'bk' into that
    285    entry.  'because_freed' is True if the block is retiring because
    286    the client has freed it.  If it is False then the block is retiring
    287    because the program has finished, in which case we want to skip the
    288    updates of the total blocks live etc for this AP, but still fold in
    289    the access counts and histo data that have so far accumulated for
    290    the block. */
    291 static void retire_Block ( Block* bk, Bool because_freed )
    292 {
    293    tl_assert(bk);
    294    tl_assert(bk->ap);
    295 
    296    APInfo* api   = NULL;
    297    UWord   keyW  = 0;
    298    UWord   valW  = 0;
    299    Bool    found = VG_(lookupFM)( apinfo,
    300                                   &keyW, &valW, (UWord)bk->ap );
    301 
    302    tl_assert(found);
    303    api = (APInfo*)valW;
    304    tl_assert(api->ap == bk->ap);
    305 
    306    // update stats following this free.
    307    if (0)
    308    VG_(printf)("ec %p  api->c_by_l %llu  bk->rszB %llu\n",
    309                bk->ap, api->cur_bytes_live, (ULong)bk->req_szB);
    310 
    311    // update total blocks live etc for this AP
    312    if (because_freed) {
    313       tl_assert(api->cur_blocks_live >= 1);
    314       tl_assert(api->cur_bytes_live >= bk->req_szB);
    315       api->cur_blocks_live--;
    316       api->cur_bytes_live -= bk->req_szB;
    317 
    318       api->deaths++;
    319 
    320       tl_assert(bk->allocd_at <= g_guest_instrs_executed);
    321       api->death_ages_sum += (g_guest_instrs_executed - bk->allocd_at);
    322 
    323       // update global summary stats
    324       tl_assert(g_cur_blocks_live > 0);
    325       g_cur_blocks_live--;
    326       tl_assert(g_cur_bytes_live >= bk->req_szB);
    327       g_cur_bytes_live -= bk->req_szB;
    328    }
    329 
    330    // access counts
    331    api->n_reads  += bk->n_reads;
    332    api->n_writes += bk->n_writes;
    333 
    334    // histo stuff.  First, do state transitions for xsize/xsize_tag.
    335    switch (api->xsize_tag) {
    336 
    337       case Unknown:
    338          tl_assert(api->xsize == 0);
    339          tl_assert(api->deaths == 1 || api->deaths == 0);
    340          tl_assert(!api->histo);
    341          api->xsize_tag = Exactly;
    342          api->xsize = bk->req_szB;
    343          if (0) VG_(printf)("api %p   -->  Exactly(%lu)\n", api, api->xsize);
    344          // and allocate the histo
    345          if (bk->histoW) {
    346             api->histo = VG_(malloc)("dh.main.retire_Block.1",
    347                                      api->xsize * sizeof(UInt));
    348             VG_(memset)(api->histo, 0, api->xsize * sizeof(UInt));
    349          }
    350          break;
    351 
    352       case Exactly:
    353          //tl_assert(api->deaths > 1);
    354          if (bk->req_szB != api->xsize) {
    355             if (0) VG_(printf)("api %p   -->  Mixed(%lu -> %lu)\n",
    356                                api, api->xsize, bk->req_szB);
    357             api->xsize_tag = Mixed;
    358             api->xsize = 0;
    359             // deallocate the histo, if any
    360             if (api->histo) {
    361                VG_(free)(api->histo);
    362                api->histo = NULL;
    363             }
    364          }
    365          break;
    366 
    367       case Mixed:
    368          //tl_assert(api->deaths > 1);
    369          break;
    370 
    371       default:
    372         tl_assert(0);
    373    }
    374 
    375    // See if we can fold the histo data from this block into
    376    // the data for the AP
    377    if (api->xsize_tag == Exactly && api->histo && bk->histoW) {
    378       tl_assert(api->xsize == bk->req_szB);
    379       UWord i;
    380       for (i = 0; i < api->xsize; i++) {
    381          // FIXME: do something better in case of overflow of api->histo[..]
    382          // Right now, at least don't let it overflow/wrap around
    383          if (api->histo[i] <= 0xFFFE0000)
    384             api->histo[i] += (UInt)bk->histoW[i];
    385       }
    386       if (0) VG_(printf)("fold in, AP = %p\n", api);
    387    }
    388 
    389 
    390 
    391 #if 0
    392    if (bk->histoB) {
    393       VG_(printf)("block retiring, histo %lu: ", bk->req_szB);
    394       UWord i;
    395       for (i = 0; i < bk->req_szB; i++)
    396         VG_(printf)("%u ", (UInt)bk->histoB[i]);
    397       VG_(printf)("\n");
    398    } else {
    399       VG_(printf)("block retiring, no histo %lu\n", bk->req_szB);
    400    }
    401 #endif
    402 }
    403 
    404 /* This handles block resizing.  When a block with AP 'ec' has a
    405    size change of 'delta', call here to update the APInfo. */
    406 static void apinfo_change_cur_bytes_live( ExeContext* ec, Long delta )
    407 {
    408    APInfo* api   = NULL;
    409    UWord   keyW  = 0;
    410    UWord   valW  = 0;
    411    Bool    found = VG_(lookupFM)( apinfo,
    412                                   &keyW, &valW, (UWord)ec );
    413 
    414    tl_assert(found);
    415    api = (APInfo*)valW;
    416    tl_assert(api->ap == ec);
    417 
    418    if (delta < 0) {
    419       tl_assert(api->cur_bytes_live >= -delta);
    420       tl_assert(g_cur_bytes_live >= -delta);
    421    }
    422 
    423    // adjust current live size
    424    api->cur_bytes_live += delta;
    425    g_cur_bytes_live += delta;
    426 
    427    if (delta > 0 && api->cur_bytes_live > api->max_bytes_live) {
    428       api->max_bytes_live  = api->cur_bytes_live;
    429       api->max_blocks_live = api->cur_blocks_live;
    430    }
    431 
    432    // update global summary stats
    433    if (delta > 0 && g_cur_bytes_live > g_max_bytes_live) {
    434       g_max_bytes_live = g_cur_bytes_live;
    435       g_max_blocks_live = g_cur_blocks_live;
    436    }
    437    if (delta > 0)
    438       g_tot_bytes += delta;
    439 
    440    // adjust total allocation size
    441    if (delta > 0)
    442       api->tot_bytes += delta;
    443 }
    444 
    445 
    446 //------------------------------------------------------------//
    447 //--- update both Block and APInfos after {m,re}alloc/free ---//
    448 //------------------------------------------------------------//
    449 
    450 static
    451 void* new_block ( ThreadId tid, void* p, SizeT req_szB, SizeT req_alignB,
    452                   Bool is_zeroed )
    453 {
    454    tl_assert(p == NULL); // don't handle custom allocators right now
    455    SizeT actual_szB, slop_szB;
    456 
    457    if ((SSizeT)req_szB < 0) return NULL;
    458 
    459    if (req_szB == 0)
    460       req_szB = 1;  /* can't allow zero-sized blocks in the interval tree */
    461 
    462    // Allocate and zero if necessary
    463    if (!p) {
    464       p = VG_(cli_malloc)( req_alignB, req_szB );
    465       if (!p) {
    466          return NULL;
    467       }
    468       if (is_zeroed) VG_(memset)(p, 0, req_szB);
    469       actual_szB = VG_(malloc_usable_size)(p);
    470       tl_assert(actual_szB >= req_szB);
    471       slop_szB = actual_szB - req_szB;
    472    } else {
    473       slop_szB = 0;
    474    }
    475 
    476    // Make new HP_Chunk node, add to malloc_list
    477    Block* bk = VG_(malloc)("dh.new_block.1", sizeof(Block));
    478    bk->payload   = (Addr)p;
    479    bk->req_szB   = req_szB;
    480    bk->ap        = VG_(record_ExeContext)(tid, 0/*first word delta*/);
    481    bk->allocd_at = g_guest_instrs_executed;
    482    bk->n_reads   = 0;
    483    bk->n_writes  = 0;
    484    // set up histogram array, if the block isn't too large
    485    bk->histoW = NULL;
    486    if (req_szB <= HISTOGRAM_SIZE_LIMIT) {
    487       bk->histoW = VG_(malloc)("dh.new_block.2", req_szB * sizeof(UShort));
    488       VG_(memset)(bk->histoW, 0, req_szB * sizeof(UShort));
    489    }
    490 
    491    Bool present = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
    492    tl_assert(!present);
    493    fbc_cache0 = fbc_cache1 = NULL;
    494 
    495    intro_Block(bk);
    496 
    497    if (0) VG_(printf)("ALLOC %ld -> %p\n", req_szB, p);
    498 
    499    return p;
    500 }
    501 
    502 static
    503 void die_block ( void* p, Bool custom_free )
    504 {
    505    tl_assert(!custom_free);  // at least for now
    506 
    507    Block* bk = find_Block_containing( (Addr)p );
    508 
    509    if (!bk) {
    510      return; // bogus free
    511    }
    512 
    513    tl_assert(bk->req_szB > 0);
    514    // assert the block finder is behaving sanely
    515    tl_assert(bk->payload <= (Addr)p);
    516    tl_assert( (Addr)p < bk->payload + bk->req_szB );
    517 
    518    if (bk->payload != (Addr)p) {
    519       return; // bogus free
    520    }
    521 
    522    if (0) VG_(printf)(" FREE %p %llu\n",
    523                       p, g_guest_instrs_executed - bk->allocd_at);
    524 
    525    retire_Block(bk, True/*because_freed*/);
    526 
    527    VG_(cli_free)( (void*)bk->payload );
    528    delete_Block_starting_at( bk->payload );
    529    if (bk->histoW) {
    530       VG_(free)( bk->histoW );
    531       bk->histoW = NULL;
    532    }
    533    VG_(free)( bk );
    534 }
    535 
    536 
    537 static
    538 void* renew_block ( ThreadId tid, void* p_old, SizeT new_req_szB )
    539 {
    540    if (0) VG_(printf)("REALL %p %ld\n", p_old, new_req_szB);
    541    void* p_new = NULL;
    542 
    543    tl_assert(new_req_szB > 0); // map 0 to 1
    544 
    545    // Find the old block.
    546    Block* bk = find_Block_containing( (Addr)p_old );
    547    if (!bk) {
    548       return NULL;   // bogus realloc
    549    }
    550 
    551    tl_assert(bk->req_szB > 0);
    552    // assert the block finder is behaving sanely
    553    tl_assert(bk->payload <= (Addr)p_old);
    554    tl_assert( (Addr)p_old < bk->payload + bk->req_szB );
    555 
    556    if (bk->payload != (Addr)p_old) {
    557       return NULL; // bogus realloc
    558    }
    559 
    560    // Keeping the histogram alive in any meaningful way across
    561    // block resizing is too darn complicated.  Just throw it away.
    562    if (bk->histoW) {
    563       VG_(free)(bk->histoW);
    564       bk->histoW = NULL;
    565    }
    566 
    567    // Actually do the allocation, if necessary.
    568    if (new_req_szB <= bk->req_szB) {
    569 
    570       // New size is smaller or same; block not moved.
    571       apinfo_change_cur_bytes_live(bk->ap,
    572                                    (Long)new_req_szB - (Long)bk->req_szB);
    573       bk->req_szB = new_req_szB;
    574       return p_old;
    575 
    576    } else {
    577 
    578       // New size is bigger;  make new block, copy shared contents, free old.
    579       p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
    580       if (!p_new) {
    581          // Nb: if realloc fails, NULL is returned but the old block is not
    582          // touched.  What an awful function.
    583          return NULL;
    584       }
    585       tl_assert(p_new != p_old);
    586 
    587       VG_(memcpy)(p_new, p_old, bk->req_szB);
    588       VG_(cli_free)(p_old);
    589 
    590       // Since the block has moved, we need to re-insert it into the
    591       // interval tree at the new place.  Do this by removing
    592       // and re-adding it.
    593       delete_Block_starting_at( (Addr)p_old );
    594       // now 'bk' is no longer in the tree, but the Block itself
    595       // is still alive
    596 
    597       // Update the metadata.
    598       apinfo_change_cur_bytes_live(bk->ap,
    599                                    (Long)new_req_szB - (Long)bk->req_szB);
    600       bk->payload = (Addr)p_new;
    601       bk->req_szB = new_req_szB;
    602 
    603       // and re-add
    604       Bool present
    605          = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
    606       tl_assert(!present);
    607       fbc_cache0 = fbc_cache1 = NULL;
    608 
    609       return p_new;
    610    }
    611    /*NOTREACHED*/
    612    tl_assert(0);
    613 }
    614 
    615 
    616 //------------------------------------------------------------//
    617 //--- malloc() et al replacement wrappers                  ---//
    618 //------------------------------------------------------------//
    619 
    620 static void* dh_malloc ( ThreadId tid, SizeT szB )
    621 {
    622    return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
    623 }
    624 
    625 static void* dh___builtin_new ( ThreadId tid, SizeT szB )
    626 {
    627    return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
    628 }
    629 
    630 static void* dh___builtin_vec_new ( ThreadId tid, SizeT szB )
    631 {
    632    return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
    633 }
    634 
    635 static void* dh_calloc ( ThreadId tid, SizeT m, SizeT szB )
    636 {
    637    return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True );
    638 }
    639 
    640 static void *dh_memalign ( ThreadId tid, SizeT alignB, SizeT szB )
    641 {
    642    return new_block( tid, NULL, szB, alignB, False );
    643 }
    644 
    645 static void dh_free ( ThreadId tid __attribute__((unused)), void* p )
    646 {
    647    die_block( p, /*custom_free*/False );
    648 }
    649 
    650 static void dh___builtin_delete ( ThreadId tid, void* p )
    651 {
    652    die_block( p, /*custom_free*/False);
    653 }
    654 
    655 static void dh___builtin_vec_delete ( ThreadId tid, void* p )
    656 {
    657    die_block( p, /*custom_free*/False );
    658 }
    659 
    660 static void* dh_realloc ( ThreadId tid, void* p_old, SizeT new_szB )
    661 {
    662    if (p_old == NULL) {
    663       return dh_malloc(tid, new_szB);
    664    }
    665    if (new_szB == 0) {
    666       dh_free(tid, p_old);
    667       return NULL;
    668    }
    669    return renew_block(tid, p_old, new_szB);
    670 }
    671 
    672 static SizeT dh_malloc_usable_size ( ThreadId tid, void* p )
    673 {
    674    tl_assert(0);
    675 //zz   HP_Chunk* hc = VG_(HT_lookup)( malloc_list, (UWord)p );
    676 //zz
    677 //zz   return ( hc ? hc->req_szB + hc->slop_szB : 0 );
    678 }
    679 
    680 //------------------------------------------------------------//
    681 //--- memory references                                    ---//
    682 //------------------------------------------------------------//
    683 
    684 static
    685 void inc_histo_for_block ( Block* bk, Addr addr, UWord szB )
    686 {
    687    UWord i, offMin, offMax1;
    688    offMin = addr - bk->payload;
    689    tl_assert(offMin < bk->req_szB);
    690    offMax1 = offMin + szB;
    691    if (offMax1 > bk->req_szB)
    692       offMax1 = bk->req_szB;
    693    //VG_(printf)("%lu %lu   (size of block %lu)\n", offMin, offMax1, bk->req_szB);
    694    for (i = offMin; i < offMax1; i++) {
    695       UShort n = bk->histoW[i];
    696       if (n < 0xFFFF) n++;
    697       bk->histoW[i] = n;
    698    }
    699 }
    700 
    701 static VG_REGPARM(2)
    702 void dh_handle_write ( Addr addr, UWord szB )
    703 {
    704    Block* bk = find_Block_containing(addr);
    705    if (bk) {
    706       bk->n_writes += szB;
    707       if (bk->histoW)
    708          inc_histo_for_block(bk, addr, szB);
    709    }
    710 }
    711 
    712 static VG_REGPARM(2)
    713 void dh_handle_read ( Addr addr, UWord szB )
    714 {
    715    Block* bk = find_Block_containing(addr);
    716    if (bk) {
    717       bk->n_reads += szB;
    718       if (bk->histoW)
    719          inc_histo_for_block(bk, addr, szB);
    720    }
    721 }
    722 
    723 
    724 // Handle reads and writes by syscalls (read == kernel
    725 // reads user space, write == kernel writes user space).
    726 // Assumes no such read or write spans a heap block
    727 // boundary and so we can treat it just as one giant
    728 // read or write.
    729 static
    730 void dh_handle_noninsn_read ( CorePart part, ThreadId tid, Char* s,
    731                               Addr base, SizeT size )
    732 {
    733    switch (part) {
    734       case Vg_CoreSysCall:
    735          dh_handle_read(base, size);
    736          break;
    737       case Vg_CoreSysCallArgInMem:
    738          break;
    739       case Vg_CoreTranslate:
    740          break;
    741       default:
    742          tl_assert(0);
    743    }
    744 }
    745 
    746 static
    747 void dh_handle_noninsn_write ( CorePart part, ThreadId tid,
    748                                Addr base, SizeT size )
    749 {
    750    switch (part) {
    751       case Vg_CoreSysCall:
    752          dh_handle_write(base, size);
    753          break;
    754       case Vg_CoreSignal:
    755          break;
    756       default:
    757          tl_assert(0);
    758    }
    759 }
    760 
    761 
    762 //------------------------------------------------------------//
    763 //--- Instrumentation                                      ---//
    764 //------------------------------------------------------------//
    765 
    766 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
    767 #define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
    768 #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
    769 #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
    770 #define assign(_t, _e)           IRStmt_WrTmp((_t), (_e))
    771 
    772 static
    773 void add_counter_update(IRSB* sbOut, Int n)
    774 {
    775    #if defined(VG_BIGENDIAN)
    776    # define END Iend_BE
    777    #elif defined(VG_LITTLEENDIAN)
    778    # define END Iend_LE
    779    #else
    780    # error "Unknown endianness"
    781    #endif
    782    // Add code to increment 'g_guest_instrs_executed' by 'n', like this:
    783    //   WrTmp(t1, Load64(&g_guest_instrs_executed))
    784    //   WrTmp(t2, Add64(RdTmp(t1), Const(n)))
    785    //   Store(&g_guest_instrs_executed, t2)
    786    IRTemp t1 = newIRTemp(sbOut->tyenv, Ity_I64);
    787    IRTemp t2 = newIRTemp(sbOut->tyenv, Ity_I64);
    788    IRExpr* counter_addr = mkIRExpr_HWord( (HWord)&g_guest_instrs_executed );
    789 
    790    IRStmt* st1 = assign(t1, IRExpr_Load(END, Ity_I64, counter_addr));
    791    IRStmt* st2 = assign(t2, binop(Iop_Add64, mkexpr(t1), mkU64(n)));
    792    IRStmt* st3 = IRStmt_Store(END, counter_addr, mkexpr(t2));
    793 
    794    addStmtToIRSB( sbOut, st1 );
    795    addStmtToIRSB( sbOut, st2 );
    796    addStmtToIRSB( sbOut, st3 );
    797 }
    798 
    799 static
    800 void addMemEvent(IRSB* sbOut, Bool isWrite, Int szB, IRExpr* addr,
    801                  Int goff_sp)
    802 {
    803    IRType   tyAddr   = Ity_INVALID;
    804    HChar*   hName    = NULL;
    805    void*    hAddr    = NULL;
    806    IRExpr** argv     = NULL;
    807    IRDirty* di       = NULL;
    808 
    809    const Int THRESH = 4096 * 4; // somewhat arbitrary
    810    const Int rz_szB = VG_STACK_REDZONE_SZB;
    811 
    812    tyAddr = typeOfIRExpr( sbOut->tyenv, addr );
    813    tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
    814 
    815    if (isWrite) {
    816       hName = "dh_handle_write";
    817       hAddr = &dh_handle_write;
    818    } else {
    819       hName = "dh_handle_read";
    820       hAddr = &dh_handle_read;
    821    }
    822 
    823    argv = mkIRExprVec_2( addr, mkIRExpr_HWord(szB) );
    824 
    825    /* Add the helper. */
    826    tl_assert(hName);
    827    tl_assert(hAddr);
    828    tl_assert(argv);
    829    di = unsafeIRDirty_0_N( 2/*regparms*/,
    830                            hName, VG_(fnptr_to_fnentry)( hAddr ),
    831                            argv );
    832 
    833    /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
    834       some arbitrary N.  If that fails then addr is in the range (SP -
    835       RZ .. SP + N - RZ).  If N is smallish (a page?) then we can say
    836       addr is within a page of SP and so can't possibly be a heap
    837       access, and so can be skipped. */
    838    IRTemp sp = newIRTemp(sbOut->tyenv, tyAddr);
    839    addStmtToIRSB( sbOut, assign(sp, IRExpr_Get(goff_sp, tyAddr)));
    840 
    841    IRTemp sp_minus_rz = newIRTemp(sbOut->tyenv, tyAddr);
    842    addStmtToIRSB(
    843       sbOut,
    844       assign(sp_minus_rz,
    845              tyAddr == Ity_I32
    846                 ? binop(Iop_Sub32, mkexpr(sp), mkU32(rz_szB))
    847                 : binop(Iop_Sub64, mkexpr(sp), mkU64(rz_szB)))
    848    );
    849 
    850    IRTemp diff = newIRTemp(sbOut->tyenv, tyAddr);
    851    addStmtToIRSB(
    852       sbOut,
    853       assign(diff,
    854              tyAddr == Ity_I32
    855                 ? binop(Iop_Sub32, addr, mkexpr(sp_minus_rz))
    856                 : binop(Iop_Sub64, addr, mkexpr(sp_minus_rz)))
    857    );
    858 
    859    IRTemp guard = newIRTemp(sbOut->tyenv, Ity_I1);
    860    addStmtToIRSB(
    861       sbOut,
    862       assign(guard,
    863              tyAddr == Ity_I32
    864                 ? binop(Iop_CmpLT32U, mkU32(THRESH), mkexpr(diff))
    865                 : binop(Iop_CmpLT64U, mkU64(THRESH), mkexpr(diff)))
    866    );
    867    di->guard = mkexpr(guard);
    868 
    869    addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    870 }
    871 
    872 static
    873 IRSB* dh_instrument ( VgCallbackClosure* closure,
    874                       IRSB* sbIn,
    875                       VexGuestLayout* layout,
    876                       VexGuestExtents* vge,
    877                       IRType gWordTy, IRType hWordTy )
    878 {
    879    Int   i, n = 0;
    880    IRSB* sbOut;
    881    IRTypeEnv* tyenv = sbIn->tyenv;
    882 
    883    const Int goff_sp = layout->offset_SP;
    884 
    885    // We increment the instruction count in two places:
    886    // - just before any Ist_Exit statements;
    887    // - just before the IRSB's end.
    888    // In the former case, we zero 'n' and then continue instrumenting.
    889 
    890    sbOut = deepCopyIRSBExceptStmts(sbIn);
    891 
    892    // Copy verbatim any IR preamble preceding the first IMark
    893    i = 0;
    894    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
    895       addStmtToIRSB( sbOut, sbIn->stmts[i] );
    896       i++;
    897    }
    898 
    899    for (/*use current i*/; i < sbIn->stmts_used; i++) {
    900       IRStmt* st = sbIn->stmts[i];
    901 
    902       if (!st || st->tag == Ist_NoOp) continue;
    903 
    904       switch (st->tag) {
    905 
    906          case Ist_IMark: {
    907             n++;
    908             break;
    909          }
    910 
    911          case Ist_Exit: {
    912             if (n > 0) {
    913                // Add an increment before the Exit statement, then reset 'n'.
    914                add_counter_update(sbOut, n);
    915                n = 0;
    916             }
    917             break;
    918          }
    919 
    920          case Ist_WrTmp: {
    921             IRExpr* data = st->Ist.WrTmp.data;
    922             if (data->tag == Iex_Load) {
    923                IRExpr* aexpr = data->Iex.Load.addr;
    924                // Note also, endianness info is ignored.  I guess
    925                // that's not interesting.
    926                addMemEvent( sbOut, False/*!isWrite*/,
    927                             sizeofIRType(data->Iex.Load.ty),
    928                             aexpr, goff_sp );
    929             }
    930             break;
    931          }
    932 
    933          case Ist_Store: {
    934             IRExpr* data  = st->Ist.Store.data;
    935             IRExpr* aexpr = st->Ist.Store.addr;
    936             addMemEvent( sbOut, True/*isWrite*/,
    937                          sizeofIRType(typeOfIRExpr(tyenv, data)),
    938                          aexpr, goff_sp );
    939             break;
    940          }
    941 
    942          case Ist_Dirty: {
    943             Int      dataSize;
    944             IRDirty* d = st->Ist.Dirty.details;
    945             if (d->mFx != Ifx_None) {
    946                /* This dirty helper accesses memory.  Collect the details. */
    947                tl_assert(d->mAddr != NULL);
    948                tl_assert(d->mSize != 0);
    949                dataSize = d->mSize;
    950                // Large (eg. 28B, 108B, 512B on x86) data-sized
    951                // instructions will be done inaccurately, but they're
    952                // very rare and this avoids errors from hitting more
    953                // than two cache lines in the simulation.
    954                if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
    955                   addMemEvent( sbOut, False/*!isWrite*/,
    956                                dataSize, d->mAddr, goff_sp );
    957                if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
    958                   addMemEvent( sbOut, True/*isWrite*/,
    959                                dataSize, d->mAddr, goff_sp );
    960             } else {
    961                tl_assert(d->mAddr == NULL);
    962                tl_assert(d->mSize == 0);
    963             }
    964             break;
    965          }
    966 
    967          case Ist_CAS: {
    968             /* We treat it as a read and a write of the location.  I
    969                think that is the same behaviour as it was before IRCAS
    970                was introduced, since prior to that point, the Vex
    971                front ends would translate a lock-prefixed instruction
    972                into a (normal) read followed by a (normal) write. */
    973             Int    dataSize;
    974             IRCAS* cas = st->Ist.CAS.details;
    975             tl_assert(cas->addr != NULL);
    976             tl_assert(cas->dataLo != NULL);
    977             dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
    978             if (cas->dataHi != NULL)
    979                dataSize *= 2; /* since it's a doubleword-CAS */
    980             addMemEvent( sbOut, False/*!isWrite*/,
    981                          dataSize, cas->addr, goff_sp );
    982             addMemEvent( sbOut, True/*isWrite*/,
    983                          dataSize, cas->addr, goff_sp );
    984             break;
    985          }
    986 
    987          case Ist_LLSC: {
    988             IRType dataTy;
    989             if (st->Ist.LLSC.storedata == NULL) {
    990                /* LL */
    991                dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
    992                addMemEvent( sbOut, False/*!isWrite*/,
    993                             sizeofIRType(dataTy),
    994                             st->Ist.LLSC.addr, goff_sp );
    995             } else {
    996                /* SC */
    997                dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
    998                addMemEvent( sbOut, True/*isWrite*/,
    999                             sizeofIRType(dataTy),
   1000                             st->Ist.LLSC.addr, goff_sp );
   1001             }
   1002             break;
   1003          }
   1004 
   1005          default:
   1006             break;
   1007       }
   1008 
   1009       addStmtToIRSB( sbOut, st );
   1010    }
   1011 
   1012    if (n > 0) {
   1013       // Add an increment before the SB end.
   1014       add_counter_update(sbOut, n);
   1015    }
   1016    return sbOut;
   1017 }
   1018 
   1019 #undef binop
   1020 #undef mkexpr
   1021 #undef mkU32
   1022 #undef mkU64
   1023 #undef assign
   1024 
   1025 
   1026 //------------------------------------------------------------//
   1027 //--- Command line args                                    ---//
   1028 //------------------------------------------------------------//
   1029 
   1030 // FORWARDS
   1031 static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
   1032                               /*OUT*/Bool* increasingP,
   1033                               Char* metric_name );
   1034 
   1035 static Int    clo_show_top_n = 10;
   1036 static HChar* clo_sort_by    = "max-bytes-live";
   1037 
   1038 static Bool dh_process_cmd_line_option(Char* arg)
   1039 {
   1040    if VG_BINT_CLO(arg, "--show-top-n", clo_show_top_n, 1, 100000) {}
   1041 
   1042    else if VG_STR_CLO(arg, "--sort-by", clo_sort_by) {
   1043        ULong (*dummyFn)(APInfo*);
   1044        Bool dummyB;
   1045        Bool ok = identify_metric( &dummyFn, &dummyB, clo_sort_by);
   1046        if (!ok)
   1047           return False;
   1048        // otherwise it's OK, in which case leave it alone.
   1049        // show_top_n_apinfos will later convert the string by a
   1050        // second call to identify_metric.
   1051    }
   1052 
   1053    else
   1054       return VG_(replacement_malloc_process_cmd_line_option)(arg);
   1055 
   1056    return True;
   1057 }
   1058 
   1059 
   1060 static void dh_print_usage(void)
   1061 {
   1062    VG_(printf)(
   1063 "    --show-top-n=number       show the top <number> alloc points [10]\n"
   1064 "    --sort-by=string\n"
   1065 "            sort the allocation points by the metric\n"
   1066 "            defined by <string>, thusly:\n"
   1067 "                max-bytes-live    maximum live bytes [default]\n"
   1068 "                tot-bytes-allocd  total allocation (turnover)\n"
   1069 "                max-blocks-live   maximum live blocks\n"
   1070    );
   1071 }
   1072 
   1073 static void dh_print_debug_usage(void)
   1074 {
   1075    VG_(printf)(
   1076 "    (none)\n"
   1077    );
   1078 }
   1079 
   1080 
   1081 //------------------------------------------------------------//
   1082 //--- Finalisation                                         ---//
   1083 //------------------------------------------------------------//
   1084 
   1085 static void show_N_div_100( /*OUT*/HChar* buf, ULong n )
   1086 {
   1087    ULong nK = n / 100;
   1088    ULong nR = n % 100;
   1089    VG_(sprintf)(buf, "%llu.%s%llu", nK,
   1090                 nR < 10 ? "0" : "",
   1091                 nR);
   1092 }
   1093 
   1094 static void show_APInfo ( APInfo* api )
   1095 {
   1096    HChar bufA[80];
   1097    VG_(memset)(bufA, 0, sizeof(bufA));
   1098    if (api->tot_blocks > 0) {
   1099       show_N_div_100( bufA, ((ULong)api->tot_bytes * 100ULL)
   1100                               / (ULong)api->tot_blocks );
   1101    } else {
   1102       bufA[0] = 'N'; bufA[1] = 'a'; bufA[2] = 'N';
   1103    }
   1104 
   1105    VG_(umsg)("max-live:    %'llu in %'llu blocks\n",
   1106              api->max_bytes_live, api->max_blocks_live);
   1107    VG_(umsg)("tot-alloc:   %'llu in %'llu blocks (avg size %s)\n",
   1108              api->tot_bytes, api->tot_blocks, bufA);
   1109 
   1110    tl_assert(api->tot_blocks >= api->max_blocks_live);
   1111    tl_assert(api->tot_bytes >= api->max_bytes_live);
   1112 
   1113    if (api->deaths > 0) {
   1114       // Average Age at Death
   1115       ULong aad = api->deaths == 0
   1116                   ? 0 : (api->death_ages_sum / api->deaths);
   1117       // AAD as a fraction of the total program lifetime (so far)
   1118       // measured in ten-thousand-ths (aad_frac_10k == 10000 means the
   1119       // complete lifetime of the program.
   1120       ULong aad_frac_10k
   1121          = g_guest_instrs_executed == 0
   1122            ? 0 : (10000ULL * aad) / g_guest_instrs_executed;
   1123       HChar buf[16];
   1124       show_N_div_100(buf, aad_frac_10k);
   1125       VG_(umsg)("deaths:      %'llu, at avg age %'llu "
   1126                 "(%s%% of prog lifetime)\n",
   1127                 api->deaths, aad, buf );
   1128    } else {
   1129       VG_(umsg)("deaths:      none (none of these blocks were freed)\n");
   1130    }
   1131 
   1132    HChar bufR[80], bufW[80];
   1133    VG_(memset)(bufR, 0, sizeof(bufR));
   1134    VG_(memset)(bufW, 0, sizeof(bufW));
   1135    if (api->tot_bytes > 0) {
   1136       show_N_div_100(bufR, (100ULL * api->n_reads) / api->tot_bytes);
   1137       show_N_div_100(bufW, (100ULL * api->n_writes) / api->tot_bytes);
   1138    } else {
   1139       VG_(strcat)(bufR, "Inf");
   1140       VG_(strcat)(bufW, "Inf");
   1141    }
   1142 
   1143    VG_(umsg)("acc-ratios:  %s rd, %s wr "
   1144              " (%'llu b-read, %'llu b-written)\n",
   1145              bufR, bufW,
   1146              api->n_reads, api->n_writes);
   1147 
   1148    VG_(pp_ExeContext)(api->ap);
   1149 
   1150    if (api->histo && api->xsize_tag == Exactly) {
   1151       VG_(umsg)("\nAggregated access counts by offset:\n");
   1152       VG_(umsg)("\n");
   1153       UWord i;
   1154       if (api->xsize > 0)
   1155          VG_(umsg)("[   0]  ");
   1156       for (i = 0; i < api->xsize; i++) {
   1157          if (i > 0 && (i % 16) == 0 && i != api->xsize-1) {
   1158             VG_(umsg)("\n");
   1159             VG_(umsg)("[%4lu]  ", i);
   1160          }
   1161          VG_(umsg)("%u ", api->histo[i]);
   1162       }
   1163       VG_(umsg)("\n");
   1164    }
   1165 }
   1166 
   1167 
   1168 /* Metric-access functions for APInfos. */
   1169 static ULong get_metric__max_bytes_live ( APInfo* api ) {
   1170    return api->max_bytes_live;
   1171 }
   1172 static ULong get_metric__tot_bytes ( APInfo* api ) {
   1173    return api->tot_bytes;
   1174 }
   1175 static ULong get_metric__max_blocks_live ( APInfo* api ) {
   1176    return api->max_blocks_live;
   1177 }
   1178 
   1179 /* Given a string, return the metric-access function and also a Bool
   1180    indicating whether we want increasing or decreasing values of the
   1181    metric.  This is used twice, once in command line processing, and
   1182    then again in show_top_n_apinfos.  Returns False if the given
   1183    string could not be identified.*/
   1184 static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
   1185                               /*OUT*/Bool* increasingP,
   1186                               Char* metric_name )
   1187 {
   1188    if (0 == VG_(strcmp)(metric_name, "max-bytes-live")) {
   1189       *get_metricP = get_metric__max_bytes_live;
   1190       *increasingP = False;
   1191       return True;
   1192    }
   1193    if (0 == VG_(strcmp)(metric_name, "tot-bytes-allocd")) {
   1194       *get_metricP = get_metric__tot_bytes;
   1195       *increasingP = False;
   1196       return True;
   1197    }
   1198    if (0 == VG_(strcmp)(metric_name, "max-blocks-live")) {
   1199       *get_metricP = get_metric__max_blocks_live;
   1200       *increasingP = False;
   1201       return True;
   1202    }
   1203    return False;
   1204 }
   1205 
   1206 
   1207 static void show_top_n_apinfos ( void )
   1208 {
   1209    Int   i;
   1210    UWord keyW, valW;
   1211    ULong (*get_metric)(APInfo*);
   1212    Bool  increasing;
   1213 
   1214    HChar* metric_name = clo_sort_by;
   1215    tl_assert(metric_name); // ensured by clo processing
   1216 
   1217    Bool ok = identify_metric( &get_metric, &increasing, metric_name );
   1218    tl_assert(ok); // ensured by clo processing
   1219 
   1220    VG_(umsg)("\n");
   1221    VG_(umsg)("======== ORDERED BY %s \"%s\": "
   1222              "top %d allocators ========\n",
   1223              increasing ? "increasing" : "decreasing",
   1224              metric_name, clo_show_top_n );
   1225 
   1226    // Clear all .shown bits
   1227    VG_(initIterFM)( apinfo );
   1228    while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
   1229       APInfo* api = (APInfo*)valW;
   1230       tl_assert(api && api->ap == (ExeContext*)keyW);
   1231       api->shown = False;
   1232    }
   1233    VG_(doneIterFM)( apinfo );
   1234 
   1235    // Now print the top N entries.  Each one requires a
   1236    // complete scan of the set.  Duh.
   1237    for (i = 0; i < clo_show_top_n; i++) {
   1238       ULong   best_metric = increasing ? ~0ULL : 0ULL;
   1239       APInfo* best_api    = NULL;
   1240 
   1241       VG_(initIterFM)( apinfo );
   1242       while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
   1243          APInfo* api = (APInfo*)valW;
   1244          if (api->shown)
   1245             continue;
   1246          ULong metric = get_metric(api);
   1247          if (increasing ? (metric < best_metric) : (metric > best_metric)) {
   1248             best_metric = metric;
   1249             best_api = api;
   1250          }
   1251       }
   1252       VG_(doneIterFM)( apinfo );
   1253 
   1254       if (!best_api)
   1255          break; // all APIs have been shown.  Stop.
   1256 
   1257       VG_(umsg)("\n");
   1258       VG_(umsg)("-------------------- %d of %d --------------------\n",
   1259                 i+1, clo_show_top_n );
   1260       show_APInfo(best_api);
   1261       best_api->shown = True;
   1262    }
   1263 
   1264    VG_(umsg)("\n");
   1265 }
   1266 
   1267 
   1268 static void dh_fini(Int exit_status)
   1269 {
   1270    // Before printing statistics, we must harvest access counts for
   1271    // all the blocks that are still alive.  Not doing so gives
   1272    // access ratios which are too low (zero, in the worst case)
   1273    // for such blocks, since the accesses that do get made will
   1274    // (if we skip this step) not get folded into the AP summaries.
   1275    UWord keyW, valW;
   1276    VG_(initIterFM)( interval_tree );
   1277    while (VG_(nextIterFM)( interval_tree, &keyW, &valW )) {
   1278       Block* bk = (Block*)keyW;
   1279       tl_assert(valW == 0);
   1280       tl_assert(bk);
   1281       retire_Block(bk, False/*!because_freed*/);
   1282    }
   1283    VG_(doneIterFM)( interval_tree );
   1284 
   1285    // show results
   1286    VG_(umsg)("======== SUMMARY STATISTICS ========\n");
   1287    VG_(umsg)("\n");
   1288    VG_(umsg)("guest_insns:  %'llu\n", g_guest_instrs_executed);
   1289    VG_(umsg)("\n");
   1290    VG_(umsg)("max_live:     %'llu in %'llu blocks\n",
   1291              g_max_bytes_live, g_max_blocks_live);
   1292    VG_(umsg)("\n");
   1293    VG_(umsg)("tot_alloc:    %'llu in %'llu blocks\n",
   1294              g_tot_bytes, g_tot_blocks);
   1295    VG_(umsg)("\n");
   1296    if (g_tot_bytes > 0) {
   1297       VG_(umsg)("insns per allocated byte: %'llu\n",
   1298                 g_guest_instrs_executed / g_tot_bytes);
   1299       VG_(umsg)("\n");
   1300    }
   1301 
   1302    show_top_n_apinfos();
   1303 
   1304    VG_(umsg)("\n");
   1305    VG_(umsg)("\n");
   1306    VG_(umsg)("==============================================================\n");
   1307    VG_(umsg)("\n");
   1308    VG_(umsg)("Some hints: (see --help for command line option details):\n");
   1309    VG_(umsg)("\n");
   1310    VG_(umsg)("* summary stats for whole program are at the top of this output\n");
   1311    VG_(umsg)("\n");
   1312    VG_(umsg)("* --show-top-n=  controls how many alloc points are shown.\n");
   1313    VG_(umsg)("                 You probably want to set it much higher than\n");
   1314    VG_(umsg)("                 the default value (10)\n");
   1315    VG_(umsg)("\n");
   1316    VG_(umsg)("* --sort-by=     specifies the sort key for output.\n");
   1317    VG_(umsg)("                 See --help for details.\n");
   1318    VG_(umsg)("\n");
   1319    VG_(umsg)("* Each allocation stack, by default 12 frames, counts as\n");
   1320    VG_(umsg)("  a separate alloc point.  This causes the data to be spread out\n");
   1321    VG_(umsg)("  over far too many alloc points.  I strongly suggest using\n");
   1322    VG_(umsg)("  --num-callers=4 or some such, to reduce the spreading.\n");
   1323    VG_(umsg)("\n");
   1324 
   1325    if (VG_(clo_stats)) {
   1326       VG_(dmsg)(" dhat: find_Block_containing:\n");
   1327       VG_(dmsg)("             found: %'lu (%'lu cached + %'lu uncached)\n",
   1328                 stats__n_fBc_cached + stats__n_fBc_uncached,
   1329                 stats__n_fBc_cached,
   1330                 stats__n_fBc_uncached);
   1331       VG_(dmsg)("          notfound: %'lu\n", stats__n_fBc_notfound);
   1332       VG_(dmsg)("\n");
   1333    }
   1334 }
   1335 
   1336 
   1337 //------------------------------------------------------------//
   1338 //--- Initialisation                                       ---//
   1339 //------------------------------------------------------------//
   1340 
   1341 static void dh_post_clo_init(void)
   1342 {
   1343 }
   1344 
   1345 static void dh_pre_clo_init(void)
   1346 {
   1347    VG_(details_name)            ("DHAT");
   1348    VG_(details_version)         (NULL);
   1349    VG_(details_description)     ("a dynamic heap analysis tool");
   1350    VG_(details_copyright_author)(
   1351       "Copyright (C) 2010-2010, and GNU GPL'd, by Mozilla Inc");
   1352    VG_(details_bug_reports_to)  (VG_BUGS_TO);
   1353 
   1354    // Basic functions.
   1355    VG_(basic_tool_funcs)          (dh_post_clo_init,
   1356                                    dh_instrument,
   1357                                    dh_fini);
   1358 //zz
   1359    // Needs.
   1360    VG_(needs_libc_freeres)();
   1361    VG_(needs_command_line_options)(dh_process_cmd_line_option,
   1362                                    dh_print_usage,
   1363                                    dh_print_debug_usage);
   1364 //zz   VG_(needs_client_requests)     (dh_handle_client_request);
   1365 //zz   VG_(needs_sanity_checks)       (dh_cheap_sanity_check,
   1366 //zz                                   dh_expensive_sanity_check);
   1367    VG_(needs_malloc_replacement)  (dh_malloc,
   1368                                    dh___builtin_new,
   1369                                    dh___builtin_vec_new,
   1370                                    dh_memalign,
   1371                                    dh_calloc,
   1372                                    dh_free,
   1373                                    dh___builtin_delete,
   1374                                    dh___builtin_vec_delete,
   1375                                    dh_realloc,
   1376                                    dh_malloc_usable_size,
   1377                                    0 );
   1378 
   1379    VG_(track_pre_mem_read)        ( dh_handle_noninsn_read );
   1380    //VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
   1381    VG_(track_post_mem_write)      ( dh_handle_noninsn_write );
   1382 
   1383    tl_assert(!interval_tree);
   1384    tl_assert(!fbc_cache0);
   1385    tl_assert(!fbc_cache1);
   1386 
   1387    interval_tree = VG_(newFM)( VG_(malloc),
   1388                                "dh.main.interval_tree.1",
   1389                                VG_(free),
   1390                                interval_tree_Cmp );
   1391 
   1392    apinfo = VG_(newFM)( VG_(malloc),
   1393                         "dh.main.apinfo.1",
   1394                         VG_(free),
   1395                         NULL/*unboxedcmp*/ );
   1396 }
   1397 
   1398 VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init)
   1399 
   1400 //--------------------------------------------------------------------//
   1401 //--- end                                                dh_main.c ---//
   1402 //--------------------------------------------------------------------//
   1403