Home | History | Annotate | Download | only in callgrind
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Callgrind                                                    ---*/
      4 /*---                                                       main.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Callgrind, a Valgrind tool for call graph
      9    profiling programs.
     10 
     11    Copyright (C) 2002-2010, Josef Weidendorfer (Josef.Weidendorfer (at) gmx.de)
     12 
     13    This tool is derived from and contains code from Cachegrind
     14    Copyright (C) 2002-2010 Nicholas Nethercote (njn (at) valgrind.org)
     15 
     16    This program is free software; you can redistribute it and/or
     17    modify it under the terms of the GNU General Public License as
     18    published by the Free Software Foundation; either version 2 of the
     19    License, or (at your option) any later version.
     20 
     21    This program is distributed in the hope that it will be useful, but
     22    WITHOUT ANY WARRANTY; without even the implied warranty of
     23    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     24    General Public License for more details.
     25 
     26    You should have received a copy of the GNU General Public License
     27    along with this program; if not, write to the Free Software
     28    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     29    02111-1307, USA.
     30 
     31    The GNU General Public License is contained in the file COPYING.
     32 */
     33 
     34 #include "config.h"
     35 #include "callgrind.h"
     36 #include "global.h"
     37 
     38 #include <pub_tool_threadstate.h>
     39 
     40 #include "cg_branchpred.c"
     41 
     42 /*------------------------------------------------------------*/
     43 /*--- Global variables                                     ---*/
     44 /*------------------------------------------------------------*/
     45 
     46 /* for all threads */
     47 CommandLineOptions CLG_(clo);
     48 Statistics CLG_(stat);
     49 Bool CLG_(instrument_state) = True; /* Instrumentation on ? */
     50 
     51 /* thread and signal handler specific */
     52 exec_state CLG_(current_state);
     53 
     54 
     55 /*------------------------------------------------------------*/
     56 /*--- Statistics                                           ---*/
     57 /*------------------------------------------------------------*/
     58 
     59 static void CLG_(init_statistics)(Statistics* s)
     60 {
     61   s->call_counter        = 0;
     62   s->jcnd_counter        = 0;
     63   s->jump_counter        = 0;
     64   s->rec_call_counter    = 0;
     65   s->ret_counter         = 0;
     66   s->bb_executions       = 0;
     67 
     68   s->context_counter     = 0;
     69   s->bb_retranslations   = 0;
     70 
     71   s->distinct_objs       = 0;
     72   s->distinct_files      = 0;
     73   s->distinct_fns        = 0;
     74   s->distinct_contexts   = 0;
     75   s->distinct_bbs        = 0;
     76   s->distinct_bbccs      = 0;
     77   s->distinct_instrs     = 0;
     78   s->distinct_skips      = 0;
     79 
     80   s->bb_hash_resizes     = 0;
     81   s->bbcc_hash_resizes   = 0;
     82   s->jcc_hash_resizes    = 0;
     83   s->cxt_hash_resizes    = 0;
     84   s->fn_array_resizes    = 0;
     85   s->call_stack_resizes  = 0;
     86   s->fn_stack_resizes    = 0;
     87 
     88   s->full_debug_BBs      = 0;
     89   s->file_line_debug_BBs = 0;
     90   s->fn_name_debug_BBs   = 0;
     91   s->no_debug_BBs        = 0;
     92   s->bbcc_lru_misses     = 0;
     93   s->jcc_lru_misses      = 0;
     94   s->cxt_lru_misses      = 0;
     95   s->bbcc_clones         = 0;
     96 }
     97 
     98 
     99 /*------------------------------------------------------------*/
    100 /*--- Simple callbacks (not cache similator)               ---*/
    101 /*------------------------------------------------------------*/
    102 
    103 VG_REGPARM(1)
    104 static void log_global_event(InstrInfo* ii)
    105 {
    106     ULong* cost_Bus;
    107 
    108     CLG_DEBUG(6, "log_global_event:  Ir  %#lx/%u\n",
    109               CLG_(bb_base) + ii->instr_offset, ii->instr_size);
    110 
    111     if (!CLG_(current_state).collect) return;
    112 
    113     CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BUS))>0 );
    114 
    115     CLG_(current_state).cost[ fullOffset(EG_BUS) ]++;
    116 
    117     if (CLG_(current_state).nonskipped)
    118         cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS);
    119     else
    120         cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS];
    121     cost_Bus[0]++;
    122 }
    123 
    124 
    125 /* For branches, we consult two different predictors, one which
    126    predicts taken/untaken for conditional branches, and the other
    127    which predicts the branch target address for indirect branches
    128    (jump-to-register style ones). */
    129 
    130 static VG_REGPARM(2)
    131 void log_cond_branch(InstrInfo* ii, Word taken)
    132 {
    133     Bool miss;
    134     Int fullOffset_Bc;
    135     ULong* cost_Bc;
    136 
    137     CLG_DEBUG(6, "log_cond_branch:  Ir %#lx, taken %lu\n",
    138               CLG_(bb_base) + ii->instr_offset, taken);
    139 
    140     miss = 1 & do_cond_branch_predict(CLG_(bb_base) + ii->instr_offset, taken);
    141 
    142     if (!CLG_(current_state).collect) return;
    143 
    144     CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BC))>0 );
    145 
    146     if (CLG_(current_state).nonskipped)
    147         cost_Bc = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BC);
    148     else
    149         cost_Bc = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BC];
    150 
    151     fullOffset_Bc = fullOffset(EG_BC);
    152     CLG_(current_state).cost[ fullOffset_Bc ]++;
    153     cost_Bc[0]++;
    154     if (miss) {
    155         CLG_(current_state).cost[ fullOffset_Bc+1 ]++;
    156         cost_Bc[1]++;
    157     }
    158 }
    159 
    160 static VG_REGPARM(2)
    161 void log_ind_branch(InstrInfo* ii, UWord actual_dst)
    162 {
    163     Bool miss;
    164     Int fullOffset_Bi;
    165     ULong* cost_Bi;
    166 
    167     CLG_DEBUG(6, "log_ind_branch:  Ir  %#lx, dst %#lx\n",
    168               CLG_(bb_base) + ii->instr_offset, actual_dst);
    169 
    170     miss = 1 & do_ind_branch_predict(CLG_(bb_base) + ii->instr_offset, actual_dst);
    171 
    172     if (!CLG_(current_state).collect) return;
    173 
    174     CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BI))>0 );
    175 
    176     if (CLG_(current_state).nonskipped)
    177         cost_Bi = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BI);
    178     else
    179         cost_Bi = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BI];
    180 
    181     fullOffset_Bi = fullOffset(EG_BI);
    182     CLG_(current_state).cost[ fullOffset_Bi ]++;
    183     cost_Bi[0]++;
    184     if (miss) {
    185         CLG_(current_state).cost[ fullOffset_Bi+1 ]++;
    186         cost_Bi[1]++;
    187     }
    188 }
    189 
    190 /*------------------------------------------------------------*/
    191 /*--- Instrumentation structures and event queue handling  ---*/
    192 /*------------------------------------------------------------*/
    193 
    194 /* Maintain an ordered list of memory events which are outstanding, in
    195    the sense that no IR has yet been generated to do the relevant
    196    helper calls.  The BB is scanned top to bottom and memory events
    197    are added to the end of the list, merging with the most recent
    198    notified event where possible (Dw immediately following Dr and
    199    having the same size and EA can be merged).
    200 
    201    This merging is done so that for architectures which have
    202    load-op-store instructions (x86, amd64), the insn is treated as if
    203    it makes just one memory reference (a modify), rather than two (a
    204    read followed by a write at the same address).
    205 
    206    At various points the list will need to be flushed, that is, IR
    207    generated from it.  That must happen before any possible exit from
    208    the block (the end, or an IRStmt_Exit).  Flushing also takes place
    209    when there is no space to add a new event.
    210 
    211    If we require the simulation statistics to be up to date with
    212    respect to possible memory exceptions, then the list would have to
    213    be flushed before each memory reference.  That would however lose
    214    performance by inhibiting event-merging during flushing.
    215 
    216    Flushing the list consists of walking it start to end and emitting
    217    instrumentation IR for each event, in the order in which they
    218    appear.  It may be possible to emit a single call for two adjacent
    219    events in order to reduce the number of helper function calls made.
    220    For example, it could well be profitable to handle two adjacent Ir
    221    events with a single helper call.  */
    222 
    223 typedef
    224    IRExpr
    225    IRAtom;
    226 
    227 typedef
    228    enum {
    229       Ev_Ir,  // Instruction read
    230       Ev_Dr,  // Data read
    231       Ev_Dw,  // Data write
    232       Ev_Dm,  // Data modify (read then write)
    233       Ev_Bc,  // branch conditional
    234       Ev_Bi,  // branch indirect (to unknown destination)
    235       Ev_G    // Global bus event
    236    }
    237    EventTag;
    238 
    239 typedef
    240    struct {
    241       EventTag   tag;
    242       InstrInfo* inode;
    243       union {
    244 	 struct {
    245 	 } Ir;
    246 	 struct {
    247 	    IRAtom* ea;
    248 	    Int     szB;
    249 	 } Dr;
    250 	 struct {
    251 	    IRAtom* ea;
    252 	    Int     szB;
    253 	 } Dw;
    254 	 struct {
    255 	    IRAtom* ea;
    256 	    Int     szB;
    257 	 } Dm;
    258          struct {
    259             IRAtom* taken; /* :: Ity_I1 */
    260          } Bc;
    261          struct {
    262             IRAtom* dst;
    263          } Bi;
    264 	 struct {
    265 	 } G;
    266       } Ev;
    267    }
    268    Event;
    269 
    270 static void init_Event ( Event* ev ) {
    271    VG_(memset)(ev, 0, sizeof(Event));
    272 }
    273 
    274 static IRAtom* get_Event_dea ( Event* ev ) {
    275    switch (ev->tag) {
    276       case Ev_Dr: return ev->Ev.Dr.ea;
    277       case Ev_Dw: return ev->Ev.Dw.ea;
    278       case Ev_Dm: return ev->Ev.Dm.ea;
    279       default:    tl_assert(0);
    280    }
    281 }
    282 
    283 static Int get_Event_dszB ( Event* ev ) {
    284    switch (ev->tag) {
    285       case Ev_Dr: return ev->Ev.Dr.szB;
    286       case Ev_Dw: return ev->Ev.Dw.szB;
    287       case Ev_Dm: return ev->Ev.Dm.szB;
    288       default:    tl_assert(0);
    289    }
    290 }
    291 
    292 
    293 /* Up to this many unnotified events are allowed.  Number is
    294    arbitrary.  Larger numbers allow more event merging to occur, but
    295    potentially induce more spilling due to extending live ranges of
    296    address temporaries. */
    297 #define N_EVENTS 16
    298 
    299 
    300 /* A struct which holds all the running state during instrumentation.
    301    Mostly to avoid passing loads of parameters everywhere. */
    302 typedef struct {
    303     /* The current outstanding-memory-event list. */
    304     Event events[N_EVENTS];
    305     Int   events_used;
    306 
    307     /* The array of InstrInfo's is part of BB struct. */
    308     BB* bb;
    309 
    310     /* BB seen before (ie. re-instrumentation) */
    311     Bool seen_before;
    312 
    313     /* Number InstrInfo bins 'used' so far. */
    314     UInt ii_index;
    315 
    316     // current offset of guest instructions from BB start
    317     UInt instr_offset;
    318 
    319     /* The output SB being constructed. */
    320     IRSB* sbOut;
    321 } ClgState;
    322 
    323 
    324 static void showEvent ( Event* ev )
    325 {
    326    switch (ev->tag) {
    327       case Ev_Ir:
    328 	 VG_(printf)("Ir (InstrInfo %p) at +%d\n",
    329 		     ev->inode, ev->inode->instr_offset);
    330 	 break;
    331       case Ev_Dr:
    332 	 VG_(printf)("Dr (InstrInfo %p) at +%d %d EA=",
    333 		     ev->inode, ev->inode->instr_offset, ev->Ev.Dr.szB);
    334 	 ppIRExpr(ev->Ev.Dr.ea);
    335 	 VG_(printf)("\n");
    336 	 break;
    337       case Ev_Dw:
    338 	 VG_(printf)("Dw (InstrInfo %p) at +%d %d EA=",
    339 		     ev->inode, ev->inode->instr_offset, ev->Ev.Dw.szB);
    340 	 ppIRExpr(ev->Ev.Dw.ea);
    341 	 VG_(printf)("\n");
    342 	 break;
    343       case Ev_Dm:
    344 	 VG_(printf)("Dm (InstrInfo %p) at +%d %d EA=",
    345 		     ev->inode, ev->inode->instr_offset, ev->Ev.Dm.szB);
    346 	 ppIRExpr(ev->Ev.Dm.ea);
    347 	 VG_(printf)("\n");
    348 	 break;
    349       case Ev_Bc:
    350          VG_(printf)("Bc %p   GA=", ev->inode);
    351          ppIRExpr(ev->Ev.Bc.taken);
    352          VG_(printf)("\n");
    353          break;
    354       case Ev_Bi:
    355          VG_(printf)("Bi %p  DST=", ev->inode);
    356          ppIRExpr(ev->Ev.Bi.dst);
    357          VG_(printf)("\n");
    358          break;
    359       case Ev_G:
    360          VG_(printf)("G  %p\n", ev->inode);
    361          break;
    362       default:
    363 	 tl_assert(0);
    364 	 break;
    365    }
    366 }
    367 
    368 /* Generate code for all outstanding memory events, and mark the queue
    369    empty.  Code is generated into cgs->sbOut, and this activity
    370    'consumes' slots in cgs->bb. */
    371 
    372 static void flushEvents ( ClgState* clgs )
    373 {
    374    Int        i, regparms, inew;
    375    Char*      helperName;
    376    void*      helperAddr;
    377    IRExpr**   argv;
    378    IRExpr*    i_node_expr;
    379    IRDirty*   di;
    380    Event*     ev;
    381    Event*     ev2;
    382    Event*     ev3;
    383 
    384    if (!clgs->seen_before) {
    385        // extend event sets as needed
    386        // available sets: D0 Dr
    387        for(i=0; i<clgs->events_used; i++) {
    388 	   ev  = &clgs->events[i];
    389 	   switch(ev->tag) {
    390 	   case Ev_Ir:
    391 	       // Ir event always is first for a guest instruction
    392 	       CLG_ASSERT(ev->inode->eventset == 0);
    393 	       ev->inode->eventset = CLG_(sets).base;
    394 	       break;
    395 	   case Ev_Dr:
    396                // extend event set by Dr counters
    397 	       ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
    398 							   EG_DR);
    399 	       break;
    400 	   case Ev_Dw:
    401 	   case Ev_Dm:
    402                // extend event set by Dw counters
    403 	       ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
    404 							   EG_DW);
    405 	       break;
    406            case Ev_Bc:
    407                // extend event set by Bc counters
    408                ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
    409                                                            EG_BC);
    410                break;
    411            case Ev_Bi:
    412                // extend event set by Bi counters
    413                ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
    414                                                            EG_BI);
    415                break;
    416 	   case Ev_G:
    417                // extend event set by Bus counter
    418 	       ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
    419 							   EG_BUS);
    420 	       break;
    421 	   default:
    422 	       tl_assert(0);
    423 	   }
    424        }
    425    }
    426 
    427    for(i = 0; i < clgs->events_used; i = inew) {
    428 
    429       helperName = NULL;
    430       helperAddr = NULL;
    431       argv       = NULL;
    432       regparms   = 0;
    433 
    434       /* generate IR to notify event i and possibly the ones
    435 	 immediately following it. */
    436       tl_assert(i >= 0 && i < clgs->events_used);
    437 
    438       ev  = &clgs->events[i];
    439       ev2 = ( i < clgs->events_used-1 ? &clgs->events[i+1] : NULL );
    440       ev3 = ( i < clgs->events_used-2 ? &clgs->events[i+2] : NULL );
    441 
    442       CLG_DEBUGIF(5) {
    443 	 VG_(printf)("   flush ");
    444 	 showEvent( ev );
    445       }
    446 
    447       i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
    448 
    449       /* Decide on helper fn to call and args to pass it, and advance
    450 	 i appropriately.
    451 	 Dm events have same effect as Dw events */
    452       switch (ev->tag) {
    453 	 case Ev_Ir:
    454 	    /* Merge an Ir with a following Dr. */
    455 	    if (ev2 && ev2->tag == Ev_Dr) {
    456 	       /* Why is this true?  It's because we're merging an Ir
    457 		  with a following Dr.  The Ir derives from the
    458 		  instruction's IMark and the Dr from data
    459 		  references which follow it.  In short it holds
    460 		  because each insn starts with an IMark, hence an
    461 		  Ev_Ir, and so these Dr must pertain to the
    462 		  immediately preceding Ir.  Same applies to analogous
    463 		  assertions in the subsequent cases. */
    464 	       tl_assert(ev2->inode == ev->inode);
    465 	       helperName = CLG_(cachesim).log_1I1Dr_name;
    466 	       helperAddr = CLG_(cachesim).log_1I1Dr;
    467 	       argv = mkIRExprVec_3( i_node_expr,
    468 				     get_Event_dea(ev2),
    469 				     mkIRExpr_HWord( get_Event_dszB(ev2) ) );
    470 	       regparms = 3;
    471 	       inew = i+2;
    472 	    }
    473 	    /* Merge an Ir with a following Dw/Dm. */
    474 	    else
    475 	    if (ev2 && (ev2->tag == Ev_Dw || ev2->tag == Ev_Dm)) {
    476 	       tl_assert(ev2->inode == ev->inode);
    477 	       helperName = CLG_(cachesim).log_1I1Dw_name;
    478 	       helperAddr = CLG_(cachesim).log_1I1Dw;
    479 	       argv = mkIRExprVec_3( i_node_expr,
    480 				     get_Event_dea(ev2),
    481 				     mkIRExpr_HWord( get_Event_dszB(ev2) ) );
    482 	       regparms = 3;
    483 	       inew = i+2;
    484 	    }
    485 	    /* Merge an Ir with two following Irs. */
    486 	    else
    487 	    if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) {
    488 	       helperName = CLG_(cachesim).log_3I0D_name;
    489 	       helperAddr = CLG_(cachesim).log_3I0D;
    490 	       argv = mkIRExprVec_3( i_node_expr,
    491 				     mkIRExpr_HWord( (HWord)ev2->inode ),
    492 				     mkIRExpr_HWord( (HWord)ev3->inode ) );
    493 	       regparms = 3;
    494 	       inew = i+3;
    495 	    }
    496 	    /* Merge an Ir with one following Ir. */
    497 	    else
    498 	    if (ev2 && ev2->tag == Ev_Ir) {
    499 	       helperName = CLG_(cachesim).log_2I0D_name;
    500 	       helperAddr = CLG_(cachesim).log_2I0D;
    501 	       argv = mkIRExprVec_2( i_node_expr,
    502 				     mkIRExpr_HWord( (HWord)ev2->inode ) );
    503 	       regparms = 2;
    504 	       inew = i+2;
    505 	    }
    506 	    /* No merging possible; emit as-is. */
    507 	    else {
    508 	       helperName = CLG_(cachesim).log_1I0D_name;
    509 	       helperAddr = CLG_(cachesim).log_1I0D;
    510 	       argv = mkIRExprVec_1( i_node_expr );
    511 	       regparms = 1;
    512 	       inew = i+1;
    513 	    }
    514 	    break;
    515 	 case Ev_Dr:
    516 	    /* Data read or modify */
    517 	    helperName = CLG_(cachesim).log_0I1Dr_name;
    518 	    helperAddr = CLG_(cachesim).log_0I1Dr;
    519 	    argv = mkIRExprVec_3( i_node_expr,
    520 				  get_Event_dea(ev),
    521 				  mkIRExpr_HWord( get_Event_dszB(ev) ) );
    522 	    regparms = 3;
    523 	    inew = i+1;
    524 	    break;
    525 	 case Ev_Dw:
    526 	 case Ev_Dm:
    527 	    /* Data write */
    528 	    helperName = CLG_(cachesim).log_0I1Dw_name;
    529 	    helperAddr = CLG_(cachesim).log_0I1Dw;
    530 	    argv = mkIRExprVec_3( i_node_expr,
    531 				  get_Event_dea(ev),
    532 				  mkIRExpr_HWord( get_Event_dszB(ev) ) );
    533 	    regparms = 3;
    534 	    inew = i+1;
    535 	    break;
    536          case Ev_Bc:
    537             /* Conditional branch */
    538             helperName = "log_cond_branch";
    539             helperAddr = &log_cond_branch;
    540             argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
    541             regparms = 2;
    542             inew = i+1;
    543             break;
    544          case Ev_Bi:
    545             /* Branch to an unknown destination */
    546             helperName = "log_ind_branch";
    547             helperAddr = &log_ind_branch;
    548             argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
    549             regparms = 2;
    550             inew = i+1;
    551             break;
    552          case Ev_G:
    553             /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */
    554             helperName = "log_global_event";
    555             helperAddr = &log_global_event;
    556             argv = mkIRExprVec_1( i_node_expr );
    557             regparms = 1;
    558             inew = i+1;
    559             break;
    560 	 default:
    561 	    tl_assert(0);
    562       }
    563 
    564       CLG_DEBUGIF(5) {
    565 	  if (inew > i+1) {
    566 	      VG_(printf)("   merge ");
    567 	      showEvent( ev2 );
    568 	  }
    569 	  if (inew > i+2) {
    570 	      VG_(printf)("   merge ");
    571 	      showEvent( ev3 );
    572 	  }
    573 	  if (helperAddr)
    574 	      VG_(printf)("   call  %s (%p)\n",
    575 			  helperName, helperAddr);
    576       }
    577 
    578       /* helper could be unset depending on the simulator used */
    579       if (helperAddr == 0) continue;
    580 
    581       /* Add the helper. */
    582       tl_assert(helperName);
    583       tl_assert(helperAddr);
    584       tl_assert(argv);
    585       di = unsafeIRDirty_0_N( regparms,
    586 			      helperName, VG_(fnptr_to_fnentry)( helperAddr ),
    587 			      argv );
    588       addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
    589    }
    590 
    591    clgs->events_used = 0;
    592 }
    593 
    594 static void addEvent_Ir ( ClgState* clgs, InstrInfo* inode )
    595 {
    596    Event* evt;
    597    tl_assert(clgs->seen_before || (inode->eventset == 0));
    598    if (!CLG_(clo).simulate_cache) return;
    599 
    600    if (clgs->events_used == N_EVENTS)
    601       flushEvents(clgs);
    602    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
    603    evt = &clgs->events[clgs->events_used];
    604    init_Event(evt);
    605    evt->tag      = Ev_Ir;
    606    evt->inode    = inode;
    607    clgs->events_used++;
    608 }
    609 
    610 static
    611 void addEvent_Dr ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
    612 {
    613    Event* evt;
    614    tl_assert(isIRAtom(ea));
    615    tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
    616    if (!CLG_(clo).simulate_cache) return;
    617 
    618    if (clgs->events_used == N_EVENTS)
    619       flushEvents(clgs);
    620    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
    621    evt = &clgs->events[clgs->events_used];
    622    init_Event(evt);
    623    evt->tag       = Ev_Dr;
    624    evt->inode     = inode;
    625    evt->Ev.Dr.szB = datasize;
    626    evt->Ev.Dr.ea  = ea;
    627    clgs->events_used++;
    628 }
    629 
    630 static
    631 void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
    632 {
    633    Event* lastEvt;
    634    Event* evt;
    635    tl_assert(isIRAtom(ea));
    636    tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
    637    if (!CLG_(clo).simulate_cache) return;
    638 
    639    /* Is it possible to merge this write with the preceding read? */
    640    lastEvt = &clgs->events[clgs->events_used-1];
    641    if (clgs->events_used > 0
    642        && lastEvt->tag       == Ev_Dr
    643        && lastEvt->Ev.Dr.szB == datasize
    644        && lastEvt->inode     == inode
    645        && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
    646    {
    647       lastEvt->tag   = Ev_Dm;
    648       return;
    649    }
    650 
    651    /* No.  Add as normal. */
    652    if (clgs->events_used == N_EVENTS)
    653       flushEvents(clgs);
    654    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
    655    evt = &clgs->events[clgs->events_used];
    656    init_Event(evt);
    657    evt->tag       = Ev_Dw;
    658    evt->inode     = inode;
    659    evt->Ev.Dw.szB = datasize;
    660    evt->Ev.Dw.ea  = ea;
    661    clgs->events_used++;
    662 }
    663 
    664 static
    665 void addEvent_Bc ( ClgState* clgs, InstrInfo* inode, IRAtom* guard )
    666 {
    667    Event* evt;
    668    tl_assert(isIRAtom(guard));
    669    tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, guard)
    670              == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
    671    if (!CLG_(clo).simulate_branch) return;
    672 
    673    if (clgs->events_used == N_EVENTS)
    674       flushEvents(clgs);
    675    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
    676    evt = &clgs->events[clgs->events_used];
    677    init_Event(evt);
    678    evt->tag         = Ev_Bc;
    679    evt->inode       = inode;
    680    evt->Ev.Bc.taken = guard;
    681    clgs->events_used++;
    682 }
    683 
    684 static
    685 void addEvent_Bi ( ClgState* clgs, InstrInfo* inode, IRAtom* whereTo )
    686 {
    687    Event* evt;
    688    tl_assert(isIRAtom(whereTo));
    689    tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, whereTo)
    690              == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
    691    if (!CLG_(clo).simulate_branch) return;
    692 
    693    if (clgs->events_used == N_EVENTS)
    694       flushEvents(clgs);
    695    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
    696    evt = &clgs->events[clgs->events_used];
    697    init_Event(evt);
    698    evt->tag       = Ev_Bi;
    699    evt->inode     = inode;
    700    evt->Ev.Bi.dst = whereTo;
    701    clgs->events_used++;
    702 }
    703 
    704 static
    705 void addEvent_G ( ClgState* clgs, InstrInfo* inode )
    706 {
    707    Event* evt;
    708    if (!CLG_(clo).collect_bus) return;
    709 
    710    if (clgs->events_used == N_EVENTS)
    711       flushEvents(clgs);
    712    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
    713    evt = &clgs->events[clgs->events_used];
    714    init_Event(evt);
    715    evt->tag       = Ev_G;
    716    evt->inode     = inode;
    717    clgs->events_used++;
    718 }
    719 
    720 /* Initialise or check (if already seen before) an InstrInfo for next insn.
    721    We only can set instr_offset/instr_size here. The required event set and
    722    resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
    723    instructions. The event set is extended as required on flush of the event
    724    queue (when Dm events were determined), cost offsets are determined at
    725    end of BB instrumentation. */
    726 static
    727 InstrInfo* next_InstrInfo ( ClgState* clgs, UInt instr_size )
    728 {
    729    InstrInfo* ii;
    730    tl_assert(clgs->ii_index >= 0);
    731    tl_assert(clgs->ii_index < clgs->bb->instr_count);
    732    ii = &clgs->bb->instr[ clgs->ii_index ];
    733 
    734    if (clgs->seen_before) {
    735        CLG_ASSERT(ii->instr_offset == clgs->instr_offset);
    736        CLG_ASSERT(ii->instr_size == instr_size);
    737    }
    738    else {
    739        ii->instr_offset = clgs->instr_offset;
    740        ii->instr_size = instr_size;
    741        ii->cost_offset = 0;
    742        ii->eventset = 0;
    743    }
    744 
    745    clgs->ii_index++;
    746    clgs->instr_offset += instr_size;
    747    CLG_(stat).distinct_instrs++;
    748 
    749    return ii;
    750 }
    751 
    752 // return total number of cost values needed for this BB
    753 static
    754 UInt update_cost_offsets( ClgState* clgs )
    755 {
    756     Int i;
    757     InstrInfo* ii;
    758     UInt cost_offset = 0;
    759 
    760     CLG_ASSERT(clgs->bb->instr_count == clgs->ii_index);
    761     for(i=0; i<clgs->ii_index; i++) {
    762 	ii = &clgs->bb->instr[i];
    763 	if (clgs->seen_before) {
    764 	    CLG_ASSERT(ii->cost_offset == cost_offset);
    765 	} else
    766 	    ii->cost_offset = cost_offset;
    767 	cost_offset += ii->eventset ? ii->eventset->size : 0;
    768     }
    769 
    770     return cost_offset;
    771 }
    772 
    773 /*------------------------------------------------------------*/
    774 /*--- Instrumentation                                      ---*/
    775 /*------------------------------------------------------------*/
    776 
    777 #if defined(VG_BIGENDIAN)
    778 # define CLGEndness Iend_BE
    779 #elif defined(VG_LITTLEENDIAN)
    780 # define CLGEndness Iend_LE
    781 #else
    782 # error "Unknown endianness"
    783 #endif
    784 
    785 static
    786 Addr IRConst2Addr(IRConst* con)
    787 {
    788     Addr addr;
    789 
    790     if (sizeof(Addr) == 4) {
    791 	CLG_ASSERT( con->tag == Ico_U32 );
    792 	addr = con->Ico.U32;
    793     }
    794     else if (sizeof(Addr) == 8) {
    795 	CLG_ASSERT( con->tag == Ico_U64 );
    796 	addr = con->Ico.U64;
    797     }
    798     else
    799 	VG_(tool_panic)("Callgrind: invalid Addr type");
    800 
    801     return addr;
    802 }
    803 
    804 /* First pass over a BB to instrument, counting instructions and jumps
    805  * This is needed for the size of the BB struct to allocate
    806  *
    807  * Called from CLG_(get_bb)
    808  */
    809 void CLG_(collectBlockInfo)(IRSB* sbIn,
    810 			    /*INOUT*/ UInt* instrs,
    811 			    /*INOUT*/ UInt* cjmps,
    812 			    /*INOUT*/ Bool* cjmp_inverted)
    813 {
    814     Int i;
    815     IRStmt* st;
    816     Addr instrAddr =0, jumpDst;
    817     UInt instrLen = 0;
    818     Bool toNextInstr = False;
    819 
    820     // Ist_Exit has to be ignored in preamble code, before first IMark:
    821     // preamble code is added by VEX for self modifying code, and has
    822     // nothing to do with client code
    823     Bool inPreamble = True;
    824 
    825     if (!sbIn) return;
    826 
    827     for (i = 0; i < sbIn->stmts_used; i++) {
    828 	  st = sbIn->stmts[i];
    829 	  if (Ist_IMark == st->tag) {
    830 	      inPreamble = False;
    831 
    832 	      instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr);
    833 	      instrLen  = st->Ist.IMark.len;
    834 
    835 	      (*instrs)++;
    836 	      toNextInstr = False;
    837 	  }
    838 	  if (inPreamble) continue;
    839 	  if (Ist_Exit == st->tag) {
    840 	      jumpDst = IRConst2Addr(st->Ist.Exit.dst);
    841 	      toNextInstr =  (jumpDst == instrAddr + instrLen);
    842 
    843 	      (*cjmps)++;
    844 	  }
    845     }
    846 
    847     /* if the last instructions of BB conditionally jumps to next instruction
    848      * (= first instruction of next BB in memory), this is a inverted by VEX.
    849      */
    850     *cjmp_inverted = toNextInstr;
    851 }
    852 
    853 static
    854 void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy)
    855 {
    856     addStmtToIRSB( bbOut,
    857 		   IRStmt_Store(CLGEndness,
    858 				IRExpr_Const(hWordTy == Ity_I32 ?
    859 					     IRConst_U32( addr ) :
    860 					     IRConst_U64( addr )),
    861 				IRExpr_Const(IRConst_U32(val)) ));
    862 }
    863 
    864 
    865 /* add helper call to setup_bbcc, with pointer to BB struct as argument
    866  *
    867  * precondition for setup_bbcc:
    868  * - jmps_passed has number of cond.jumps passed in last executed BB
    869  * - current_bbcc has a pointer to the BBCC of the last executed BB
    870  *   Thus, if bbcc_jmpkind is != -1 (JmpNone),
    871  *     current_bbcc->bb->jmp_addr
    872  *   gives the address of the jump source.
    873  *
    874  * the setup does 2 things:
    875  * - trace call:
    876  *   * Unwind own call stack, i.e sync our ESP with real ESP
    877  *     This is for ESP manipulation (longjmps, C++ exec handling) and RET
    878  *   * For CALLs or JMPs crossing objects, record call arg +
    879  *     push are on own call stack
    880  *
    881  * - prepare for cache log functions:
    882  *   set current_bbcc to BBCC that gets the costs for this BB execution
    883  *   attached
    884  */
    885 static
    886 void addBBSetupCall(ClgState* clgs)
    887 {
    888    IRDirty* di;
    889    IRExpr  *arg1, **argv;
    890 
    891    arg1 = mkIRExpr_HWord( (HWord)clgs->bb );
    892    argv = mkIRExprVec_1(arg1);
    893    di = unsafeIRDirty_0_N( 1, "setup_bbcc",
    894 			      VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ),
    895 			      argv);
    896    addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
    897 }
    898 
    899 
    900 static
    901 IRSB* CLG_(instrument)( VgCallbackClosure* closure,
    902 			IRSB* sbIn,
    903 			VexGuestLayout* layout,
    904 			VexGuestExtents* vge,
    905 			IRType gWordTy, IRType hWordTy )
    906 {
    907    Int      i, isize;
    908    IRStmt*  st;
    909    Addr     origAddr;
    910    Addr64   cia; /* address of current insn */
    911    InstrInfo* curr_inode = NULL;
    912    ClgState clgs;
    913    UInt     cJumps = 0;
    914 
    915 
    916    if (gWordTy != hWordTy) {
    917       /* We don't currently support this case. */
    918       VG_(tool_panic)("host/guest word size mismatch");
    919    }
    920 
    921    // No instrumentation if it is switched off
    922    if (! CLG_(instrument_state)) {
    923        CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n",
    924 		 (Addr)closure->readdr);
    925        return sbIn;
    926    }
    927 
    928    CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr)closure->readdr);
    929 
    930    /* Set up SB for instrumented IR */
    931    clgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
    932 
    933    // Copy verbatim any IR preamble preceding the first IMark
    934    i = 0;
    935    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
    936       addStmtToIRSB( clgs.sbOut, sbIn->stmts[i] );
    937       i++;
    938    }
    939 
    940    // Get the first statement, and origAddr from it
    941    CLG_ASSERT(sbIn->stmts_used >0);
    942    CLG_ASSERT(i < sbIn->stmts_used);
    943    st = sbIn->stmts[i];
    944    CLG_ASSERT(Ist_IMark == st->tag);
    945 
    946    origAddr = (Addr)st->Ist.IMark.addr;
    947    cia   = st->Ist.IMark.addr;
    948    isize = st->Ist.IMark.len;
    949    CLG_ASSERT(origAddr == st->Ist.IMark.addr);  // XXX: check no overflow
    950 
    951    /* Get BB struct (creating if necessary).
    952     * JS: The hash table is keyed with orig_addr_noredir -- important!
    953     * JW: Why? If it is because of different chasing of the redirection,
    954     *     this is not needed, as chasing is switched off in callgrind
    955     */
    956    clgs.bb = CLG_(get_bb)(origAddr, sbIn, &(clgs.seen_before));
    957 
    958    addBBSetupCall(&clgs);
    959 
    960    // Set up running state
    961    clgs.events_used = 0;
    962    clgs.ii_index = 0;
    963    clgs.instr_offset = 0;
    964 
    965    for (/*use current i*/; i < sbIn->stmts_used; i++) {
    966 
    967       st = sbIn->stmts[i];
    968       CLG_ASSERT(isFlatIRStmt(st));
    969 
    970       switch (st->tag) {
    971 	 case Ist_NoOp:
    972 	 case Ist_AbiHint:
    973 	 case Ist_Put:
    974 	 case Ist_PutI:
    975 	 case Ist_MBE:
    976 	    break;
    977 
    978 	 case Ist_IMark: {
    979             cia   = st->Ist.IMark.addr;
    980             isize = st->Ist.IMark.len;
    981             CLG_ASSERT(clgs.instr_offset == (Addr)cia - origAddr);
    982 	    // If Vex fails to decode an instruction, the size will be zero.
    983 	    // Pretend otherwise.
    984 	    if (isize == 0) isize = VG_MIN_INSTR_SZB;
    985 
    986 	    // Sanity-check size.
    987 	    tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
    988 		     || VG_CLREQ_SZB == isize );
    989 
    990 	    // Init the inode, record it as the current one.
    991 	    // Subsequent Dr/Dw/Dm events from the same instruction will
    992 	    // also use it.
    993 	    curr_inode = next_InstrInfo (&clgs, isize);
    994 
    995 	    addEvent_Ir( &clgs, curr_inode );
    996 	    break;
    997 	 }
    998 
    999 	 case Ist_WrTmp: {
   1000 	    IRExpr* data = st->Ist.WrTmp.data;
   1001 	    if (data->tag == Iex_Load) {
   1002 	       IRExpr* aexpr = data->Iex.Load.addr;
   1003 	       // Note also, endianness info is ignored.  I guess
   1004 	       // that's not interesting.
   1005 	       addEvent_Dr( &clgs, curr_inode,
   1006 			    sizeofIRType(data->Iex.Load.ty), aexpr );
   1007 	    }
   1008 	    break;
   1009 	 }
   1010 
   1011 	 case Ist_Store: {
   1012 	    IRExpr* data  = st->Ist.Store.data;
   1013 	    IRExpr* aexpr = st->Ist.Store.addr;
   1014 	    addEvent_Dw( &clgs, curr_inode,
   1015 			 sizeofIRType(typeOfIRExpr(sbIn->tyenv, data)), aexpr );
   1016 	    break;
   1017 	 }
   1018 
   1019 	 case Ist_Dirty: {
   1020 	    Int      dataSize;
   1021 	    IRDirty* d = st->Ist.Dirty.details;
   1022 	    if (d->mFx != Ifx_None) {
   1023 	       /* This dirty helper accesses memory.  Collect the details. */
   1024 	       tl_assert(d->mAddr != NULL);
   1025 	       tl_assert(d->mSize != 0);
   1026 	       dataSize = d->mSize;
   1027 	       // Large (eg. 28B, 108B, 512B on x86) data-sized
   1028 	       // instructions will be done inaccurately, but they're
   1029 	       // very rare and this avoids errors from hitting more
   1030 	       // than two cache lines in the simulation.
   1031 	       if (dataSize > MIN_LINE_SIZE)
   1032 		  dataSize = MIN_LINE_SIZE;
   1033 	       if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
   1034 		  addEvent_Dr( &clgs, curr_inode, dataSize, d->mAddr );
   1035 	       if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
   1036 		  addEvent_Dw( &clgs, curr_inode, dataSize, d->mAddr );
   1037 	    } else {
   1038 	       tl_assert(d->mAddr == NULL);
   1039 	       tl_assert(d->mSize == 0);
   1040 	    }
   1041 	    break;
   1042 	 }
   1043 
   1044          case Ist_CAS: {
   1045             /* We treat it as a read and a write of the location.  I
   1046                think that is the same behaviour as it was before IRCAS
   1047                was introduced, since prior to that point, the Vex
   1048                front ends would translate a lock-prefixed instruction
   1049                into a (normal) read followed by a (normal) write. */
   1050             Int    dataSize;
   1051             IRCAS* cas = st->Ist.CAS.details;
   1052             CLG_ASSERT(cas->addr && isIRAtom(cas->addr));
   1053             CLG_ASSERT(cas->dataLo);
   1054             dataSize = sizeofIRType(typeOfIRExpr(sbIn->tyenv, cas->dataLo));
   1055             if (cas->dataHi != NULL)
   1056                dataSize *= 2; /* since this is a doubleword-cas */
   1057             addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr );
   1058             addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr );
   1059             addEvent_G(  &clgs, curr_inode );
   1060             break;
   1061          }
   1062 
   1063          case Ist_LLSC: {
   1064             IRType dataTy;
   1065             if (st->Ist.LLSC.storedata == NULL) {
   1066                /* LL */
   1067                dataTy = typeOfIRTemp(sbIn->tyenv, st->Ist.LLSC.result);
   1068                addEvent_Dr( &clgs, curr_inode,
   1069                             sizeofIRType(dataTy), st->Ist.LLSC.addr );
   1070             } else {
   1071                /* SC */
   1072                dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata);
   1073                addEvent_Dw( &clgs, curr_inode,
   1074                             sizeofIRType(dataTy), st->Ist.LLSC.addr );
   1075                /* I don't know whether the global-bus-lock cost should
   1076                   be attributed to the LL or the SC, but it doesn't
   1077                   really matter since they always have to be used in
   1078                   pairs anyway.  Hence put it (quite arbitrarily) on
   1079                   the SC. */
   1080                addEvent_G(  &clgs, curr_inode );
   1081             }
   1082             break;
   1083          }
   1084 
   1085  	 case Ist_Exit: {
   1086             Bool guest_exit, inverted;
   1087 
   1088             /* VEX code generation sometimes inverts conditional branches.
   1089              * As Callgrind counts (conditional) jumps, it has to correct
   1090              * inversions. The heuristic is the following:
   1091              * (1) Callgrind switches off SB chasing and unrolling, and
   1092              *     therefore it assumes that a candidate for inversion only is
   1093              *     the last conditional branch in an SB.
   1094              * (2) inversion is assumed if the branch jumps to the address of
   1095              *     the next guest instruction in memory.
   1096              * This heuristic is precalculated in CLG_(collectBlockInfo)().
   1097              *
   1098              * Branching behavior is also used for branch prediction. Note that
   1099              * above heuristic is different from what Cachegrind does.
   1100              * Cachegrind uses (2) for all branches.
   1101              */
   1102             if (cJumps+1 == clgs.bb->cjmp_count)
   1103                 inverted = clgs.bb->cjmp_inverted;
   1104             else
   1105                 inverted = False;
   1106 
   1107             // call branch predictor only if this is a branch in guest code
   1108             guest_exit = (st->Ist.Exit.jk == Ijk_Boring) ||
   1109                          (st->Ist.Exit.jk == Ijk_Call) ||
   1110                          (st->Ist.Exit.jk == Ijk_Ret);
   1111 
   1112             if (guest_exit) {
   1113                 /* Stuff to widen the guard expression to a host word, so
   1114                    we can pass it to the branch predictor simulation
   1115                    functions easily. */
   1116                 IRType   tyW    = hWordTy;
   1117                 IROp     widen  = tyW==Ity_I32  ? Iop_1Uto32  : Iop_1Uto64;
   1118                 IROp     opXOR  = tyW==Ity_I32  ? Iop_Xor32   : Iop_Xor64;
   1119                 IRTemp   guard1 = newIRTemp(clgs.sbOut->tyenv, Ity_I1);
   1120                 IRTemp   guardW = newIRTemp(clgs.sbOut->tyenv, tyW);
   1121                 IRTemp   guard  = newIRTemp(clgs.sbOut->tyenv, tyW);
   1122                 IRExpr*  one    = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
   1123                                                : IRExpr_Const(IRConst_U64(1));
   1124 
   1125                 /* Widen the guard expression. */
   1126                 addStmtToIRSB( clgs.sbOut,
   1127                                IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
   1128                 addStmtToIRSB( clgs.sbOut,
   1129                                IRStmt_WrTmp( guardW,
   1130                                              IRExpr_Unop(widen,
   1131                                                          IRExpr_RdTmp(guard1))) );
   1132                 /* If the exit is inverted, invert the sense of the guard. */
   1133                 addStmtToIRSB(
   1134                         clgs.sbOut,
   1135                         IRStmt_WrTmp(
   1136                                 guard,
   1137                                 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
   1138                                     : IRExpr_RdTmp(guardW)
   1139                                     ));
   1140                 /* And post the event. */
   1141                 addEvent_Bc( &clgs, curr_inode, IRExpr_RdTmp(guard) );
   1142             }
   1143 
   1144 	    /* We may never reach the next statement, so need to flush
   1145 	       all outstanding transactions now. */
   1146 	    flushEvents( &clgs );
   1147 
   1148 	    CLG_ASSERT(clgs.ii_index>0);
   1149 	    if (!clgs.seen_before) {
   1150 		clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
   1151 		clgs.bb->jmp[cJumps].skip = False;
   1152 	    }
   1153 
   1154 	    /* Update global variable jmps_passed before the jump
   1155 	     * A correction is needed if VEX inverted the last jump condition
   1156 	    */
   1157 	    addConstMemStoreStmt( clgs.sbOut,
   1158 				  (UWord) &CLG_(current_state).jmps_passed,
   1159                                   inverted ? cJumps+1 : cJumps, hWordTy);
   1160 	    cJumps++;
   1161 
   1162 	    break;
   1163 	 }
   1164 
   1165 	 default:
   1166 	    tl_assert(0);
   1167 	    break;
   1168       }
   1169 
   1170       /* Copy the original statement */
   1171       addStmtToIRSB( clgs.sbOut, st );
   1172 
   1173       CLG_DEBUGIF(5) {
   1174 	 VG_(printf)("   pass  ");
   1175 	 ppIRStmt(st);
   1176 	 VG_(printf)("\n");
   1177       }
   1178    }
   1179 
   1180    /* Deal with branches to unknown destinations.  Except ignore ones
   1181       which are function returns as we assume the return stack
   1182       predictor never mispredicts. */
   1183    if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
   1184       if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
   1185       switch (sbIn->next->tag) {
   1186          case Iex_Const:
   1187             break; /* boring - branch to known address */
   1188          case Iex_RdTmp:
   1189             /* looks like an indirect branch (branch to unknown) */
   1190             addEvent_Bi( &clgs, curr_inode, sbIn->next );
   1191             break;
   1192          default:
   1193             /* shouldn't happen - if the incoming IR is properly
   1194                flattened, should only have tmp and const cases to
   1195                consider. */
   1196             tl_assert(0);
   1197       }
   1198    }
   1199 
   1200    /* At the end of the bb.  Flush outstandings. */
   1201    flushEvents( &clgs );
   1202 
   1203    /* Always update global variable jmps_passed at end of bb.
   1204     * A correction is needed if VEX inverted the last jump condition
   1205     */
   1206    {
   1207       UInt jmps_passed = cJumps;
   1208       if (clgs.bb->cjmp_inverted) jmps_passed--;
   1209       addConstMemStoreStmt( clgs.sbOut,
   1210 			    (UWord) &CLG_(current_state).jmps_passed,
   1211 			    jmps_passed, hWordTy);
   1212    }
   1213    CLG_ASSERT(clgs.bb->cjmp_count == cJumps);
   1214    CLG_ASSERT(clgs.bb->instr_count = clgs.ii_index);
   1215 
   1216    /* This stores the instr of the call/ret at BB end */
   1217    clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
   1218 
   1219    if (clgs.seen_before) {
   1220        CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs));
   1221        CLG_ASSERT(clgs.bb->instr_len = clgs.instr_offset);
   1222        CLG_ASSERT(clgs.bb->jmpkind == sbIn->jumpkind);
   1223    }
   1224    else {
   1225        clgs.bb->cost_count = update_cost_offsets(&clgs);
   1226        clgs.bb->instr_len = clgs.instr_offset;
   1227        clgs.bb->jmpkind = sbIn->jumpkind;
   1228    }
   1229 
   1230    CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n",
   1231 	     origAddr, clgs.bb->instr_len,
   1232 	     clgs.bb->cjmp_count, clgs.bb->cost_count);
   1233    if (cJumps>0) {
   1234        CLG_DEBUG(3, "                     [ ");
   1235        for (i=0;i<cJumps;i++)
   1236 	   CLG_DEBUG(3, "%d ", clgs.bb->jmp[i].instr);
   1237        CLG_DEBUG(3, "], last inverted: %s \n",
   1238 		 clgs.bb->cjmp_inverted ? "yes":"no");
   1239    }
   1240 
   1241   return clgs.sbOut;
   1242 }
   1243 
   1244 /*--------------------------------------------------------------------*/
   1245 /*--- Discarding BB info                                           ---*/
   1246 /*--------------------------------------------------------------------*/
   1247 
   1248 // Called when a translation is removed from the translation cache for
   1249 // any reason at all: to free up space, because the guest code was
   1250 // unmapped or modified, or for any arbitrary reason.
   1251 static
   1252 void clg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge )
   1253 {
   1254     Addr orig_addr = (Addr)orig_addr64;
   1255 
   1256     tl_assert(vge.n_used > 0);
   1257 
   1258    if (0)
   1259       VG_(printf)( "discard_superblock_info: %p, %p, %llu\n",
   1260                    (void*)(Addr)orig_addr,
   1261                    (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
   1262 
   1263    // Get BB info, remove from table, free BB info.  Simple!  Note that we
   1264    // use orig_addr, not the first instruction address in vge.
   1265    CLG_(delete_bb)(orig_addr);
   1266 }
   1267 
   1268 
   1269 /*------------------------------------------------------------*/
   1270 /*--- CLG_(fini)() and related function                     ---*/
   1271 /*------------------------------------------------------------*/
   1272 
   1273 
   1274 
   1275 static void zero_thread_cost(thread_info* t)
   1276 {
   1277   Int i;
   1278 
   1279   for(i = 0; i < CLG_(current_call_stack).sp; i++) {
   1280     if (!CLG_(current_call_stack).entry[i].jcc) continue;
   1281 
   1282     /* reset call counters to current for active calls */
   1283     CLG_(copy_cost)( CLG_(sets).full,
   1284 		    CLG_(current_call_stack).entry[i].enter_cost,
   1285 		    CLG_(current_state).cost );
   1286     CLG_(current_call_stack).entry[i].jcc->call_counter = 0;
   1287   }
   1288 
   1289   CLG_(forall_bbccs)(CLG_(zero_bbcc));
   1290 
   1291   /* set counter for last dump */
   1292   CLG_(copy_cost)( CLG_(sets).full,
   1293 		  t->lastdump_cost, CLG_(current_state).cost );
   1294 }
   1295 
   1296 void CLG_(zero_all_cost)(Bool only_current_thread)
   1297 {
   1298   if (VG_(clo_verbosity) > 1)
   1299     VG_(message)(Vg_DebugMsg, "  Zeroing costs...\n");
   1300 
   1301   if (only_current_thread)
   1302     zero_thread_cost(CLG_(get_current_thread)());
   1303   else
   1304     CLG_(forall_threads)(zero_thread_cost);
   1305 
   1306   if (VG_(clo_verbosity) > 1)
   1307     VG_(message)(Vg_DebugMsg, "  ...done\n");
   1308 }
   1309 
   1310 static
   1311 void unwind_thread(thread_info* t)
   1312 {
   1313   /* unwind signal handlers */
   1314   while(CLG_(current_state).sig !=0)
   1315     CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig);
   1316 
   1317   /* unwind regular call stack */
   1318   while(CLG_(current_call_stack).sp>0)
   1319     CLG_(pop_call_stack)();
   1320 
   1321   /* reset context and function stack for context generation */
   1322   CLG_(init_exec_state)( &CLG_(current_state) );
   1323   CLG_(current_fn_stack).top = CLG_(current_fn_stack).bottom;
   1324 }
   1325 
   1326 static
   1327 void zero_state_cost(thread_info* t)
   1328 {
   1329     CLG_(zero_cost)( CLG_(sets).full, CLG_(current_state).cost );
   1330 }
   1331 
   1332 /* Ups, this can go wrong... */
   1333 extern void VG_(discard_translations) ( Addr64 start, ULong range );
   1334 
   1335 void CLG_(set_instrument_state)(Char* reason, Bool state)
   1336 {
   1337   if (CLG_(instrument_state) == state) {
   1338     CLG_DEBUG(2, "%s: instrumentation already %s\n",
   1339 	     reason, state ? "ON" : "OFF");
   1340     return;
   1341   }
   1342   CLG_(instrument_state) = state;
   1343   CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n",
   1344 	   reason, state ? "ON" : "OFF");
   1345 
   1346   VG_(discard_translations)( (Addr64)0x1000, (ULong) ~0xfffl);
   1347 
   1348   /* reset internal state: call stacks, simulator */
   1349   CLG_(forall_threads)(unwind_thread);
   1350   CLG_(forall_threads)(zero_state_cost);
   1351   (*CLG_(cachesim).clear)();
   1352 
   1353   if (VG_(clo_verbosity) > 1)
   1354     VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n",
   1355 		 reason, state ? "ON" : "OFF");
   1356 }
   1357 
   1358 
   1359 static
   1360 Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
   1361 {
   1362    if (!VG_IS_TOOL_USERREQ('C','T',args[0]))
   1363       return False;
   1364 
   1365    switch(args[0]) {
   1366    case VG_USERREQ__DUMP_STATS:
   1367       CLG_(dump_profile)("Client Request", True);
   1368       *ret = 0;                 /* meaningless */
   1369       break;
   1370 
   1371    case VG_USERREQ__DUMP_STATS_AT:
   1372      {
   1373        Char buf[512];
   1374        VG_(sprintf)(buf,"Client Request: %s", (Char*)args[1]);
   1375        CLG_(dump_profile)(buf, True);
   1376        *ret = 0;                 /* meaningless */
   1377      }
   1378      break;
   1379 
   1380    case VG_USERREQ__ZERO_STATS:
   1381      CLG_(zero_all_cost)(True);
   1382       *ret = 0;                 /* meaningless */
   1383       break;
   1384 
   1385    case VG_USERREQ__TOGGLE_COLLECT:
   1386      CLG_(current_state).collect = !CLG_(current_state).collect;
   1387      CLG_DEBUG(2, "Client Request: toggled collection state to %s\n",
   1388 	      CLG_(current_state).collect ? "ON" : "OFF");
   1389      *ret = 0;                 /* meaningless */
   1390      break;
   1391 
   1392    case VG_USERREQ__START_INSTRUMENTATION:
   1393      CLG_(set_instrument_state)("Client Request", True);
   1394      *ret = 0;                 /* meaningless */
   1395      break;
   1396 
   1397    case VG_USERREQ__STOP_INSTRUMENTATION:
   1398      CLG_(set_instrument_state)("Client Request", False);
   1399      *ret = 0;                 /* meaningless */
   1400      break;
   1401 
   1402    default:
   1403       return False;
   1404    }
   1405 
   1406    return True;
   1407 }
   1408 
   1409 
   1410 /* Syscall Timing */
   1411 
   1412 /* struct timeval syscalltime[VG_N_THREADS]; */
   1413 #if CLG_MICROSYSTIME
   1414 #include <sys/time.h>
   1415 #include <sys/syscall.h>
   1416 extern Int VG_(do_syscall) ( UInt, ... );
   1417 
   1418 ULong syscalltime[VG_N_THREADS];
   1419 #else
   1420 UInt syscalltime[VG_N_THREADS];
   1421 #endif
   1422 
   1423 static
   1424 void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno,
   1425                            UWord* args, UInt nArgs)
   1426 {
   1427   if (CLG_(clo).collect_systime) {
   1428 #if CLG_MICROSYSTIME
   1429     struct vki_timeval tv_now;
   1430     VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
   1431     syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec;
   1432 #else
   1433     syscalltime[tid] = VG_(read_millisecond_timer)();
   1434 #endif
   1435   }
   1436 }
   1437 
   1438 static
   1439 void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno,
   1440                             UWord* args, UInt nArgs, SysRes res)
   1441 {
   1442   if (CLG_(clo).collect_systime &&
   1443       CLG_(current_state).bbcc) {
   1444       Int o;
   1445 #if CLG_MICROSYSTIME
   1446     struct vki_timeval tv_now;
   1447     ULong diff;
   1448 
   1449     VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
   1450     diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid];
   1451 #else
   1452     UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid];
   1453 #endif
   1454 
   1455     /* offset o is for "SysCount", o+1 for "SysTime" */
   1456     o = fullOffset(EG_SYS);
   1457     CLG_ASSERT(o>=0);
   1458     CLG_DEBUG(0,"   Time (Off %d) for Syscall %d: %ull\n", o, syscallno, diff);
   1459 
   1460     CLG_(current_state).cost[o] ++;
   1461     CLG_(current_state).cost[o+1] += diff;
   1462     if (!CLG_(current_state).bbcc->skipped)
   1463       CLG_(init_cost_lz)(CLG_(sets).full,
   1464 			&(CLG_(current_state).bbcc->skipped));
   1465     CLG_(current_state).bbcc->skipped[o] ++;
   1466     CLG_(current_state).bbcc->skipped[o+1] += diff;
   1467   }
   1468 }
   1469 
   1470 static UInt ULong_width(ULong n)
   1471 {
   1472    UInt w = 0;
   1473    while (n > 0) {
   1474       n = n / 10;
   1475       w++;
   1476    }
   1477    if (w == 0) w = 1;
   1478    return w + (w-1)/3;   // add space for commas
   1479 }
   1480 
   1481 static
   1482 void branchsim_printstat(int l1, int l2, int l3)
   1483 {
   1484     static Char buf1[128], buf2[128], buf3[128], fmt[128];
   1485     FullCost total;
   1486     ULong Bc_total_b, Bc_total_mp, Bi_total_b, Bi_total_mp;
   1487     ULong B_total_b, B_total_mp;
   1488 
   1489     total = CLG_(total_cost);
   1490     Bc_total_b  = total[ fullOffset(EG_BC)   ];
   1491     Bc_total_mp = total[ fullOffset(EG_BC)+1 ];
   1492     Bi_total_b  = total[ fullOffset(EG_BI)   ];
   1493     Bi_total_mp = total[ fullOffset(EG_BI)+1 ];
   1494 
   1495     /* Make format string, getting width right for numbers */
   1496     VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu cond + %%,%dllu ind)\n",
   1497                  l1, l2, l3);
   1498 
   1499     if (0 == Bc_total_b)  Bc_total_b = 1;
   1500     if (0 == Bi_total_b)  Bi_total_b = 1;
   1501     B_total_b  = Bc_total_b  + Bi_total_b;
   1502     B_total_mp = Bc_total_mp + Bi_total_mp;
   1503 
   1504     VG_(umsg)("\n");
   1505     VG_(umsg)(fmt, "Branches:     ",
   1506               B_total_b, Bc_total_b, Bi_total_b);
   1507 
   1508     VG_(umsg)(fmt, "Mispredicts:  ",
   1509               B_total_mp, Bc_total_mp, Bi_total_mp);
   1510 
   1511     VG_(percentify)(B_total_mp,  B_total_b,  1, l1+1, buf1);
   1512     VG_(percentify)(Bc_total_mp, Bc_total_b, 1, l2+1, buf2);
   1513     VG_(percentify)(Bi_total_mp, Bi_total_b, 1, l3+1, buf3);
   1514 
   1515     VG_(umsg)("Mispred rate:  %s (%s     + %s   )\n", buf1, buf2,buf3);
   1516 }
   1517 
   1518 
   1519 static
   1520 void finish(void)
   1521 {
   1522   Char buf[32+COSTS_LEN], fmt[128];
   1523   Int l1, l2, l3;
   1524   FullCost total;
   1525 
   1526   CLG_DEBUG(0, "finish()\n");
   1527 
   1528   (*CLG_(cachesim).finish)();
   1529 
   1530   /* pop all remaining items from CallStack for correct sum
   1531    */
   1532   CLG_(forall_threads)(unwind_thread);
   1533 
   1534   CLG_(dump_profile)(0, False);
   1535 
   1536   CLG_(finish_command)();
   1537 
   1538   if (VG_(clo_verbosity) == 0) return;
   1539 
   1540   /* Hash table stats */
   1541   if (VG_(clo_stats)) {
   1542     int BB_lookups =
   1543       CLG_(stat).full_debug_BBs +
   1544       CLG_(stat).fn_name_debug_BBs +
   1545       CLG_(stat).file_line_debug_BBs +
   1546       CLG_(stat).no_debug_BBs;
   1547 
   1548     VG_(message)(Vg_DebugMsg, "\n");
   1549     VG_(message)(Vg_DebugMsg, "Distinct objects: %d\n",
   1550 		 CLG_(stat).distinct_objs);
   1551     VG_(message)(Vg_DebugMsg, "Distinct files:   %d\n",
   1552 		 CLG_(stat).distinct_files);
   1553     VG_(message)(Vg_DebugMsg, "Distinct fns:     %d\n",
   1554 		 CLG_(stat).distinct_fns);
   1555     VG_(message)(Vg_DebugMsg, "Distinct contexts:%d\n",
   1556 		 CLG_(stat).distinct_contexts);
   1557     VG_(message)(Vg_DebugMsg, "Distinct BBs:     %d\n",
   1558 		 CLG_(stat).distinct_bbs);
   1559     VG_(message)(Vg_DebugMsg, "Cost entries:     %d (Chunks %d)\n",
   1560 		 CLG_(costarray_entries), CLG_(costarray_chunks));
   1561     VG_(message)(Vg_DebugMsg, "Distinct BBCCs:   %d\n",
   1562 		 CLG_(stat).distinct_bbccs);
   1563     VG_(message)(Vg_DebugMsg, "Distinct JCCs:    %d\n",
   1564 		 CLG_(stat).distinct_jccs);
   1565     VG_(message)(Vg_DebugMsg, "Distinct skips:   %d\n",
   1566 		 CLG_(stat).distinct_skips);
   1567     VG_(message)(Vg_DebugMsg, "BB lookups:       %d\n",
   1568 		 BB_lookups);
   1569     if (BB_lookups>0) {
   1570       VG_(message)(Vg_DebugMsg, "With full      debug info:%3d%% (%d)\n",
   1571 		   CLG_(stat).full_debug_BBs    * 100 / BB_lookups,
   1572 		   CLG_(stat).full_debug_BBs);
   1573       VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)\n",
   1574 		   CLG_(stat).file_line_debug_BBs * 100 / BB_lookups,
   1575 		   CLG_(stat).file_line_debug_BBs);
   1576       VG_(message)(Vg_DebugMsg, "With fn name   debug info:%3d%% (%d)\n",
   1577 		   CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups,
   1578 		   CLG_(stat).fn_name_debug_BBs);
   1579       VG_(message)(Vg_DebugMsg, "With no        debug info:%3d%% (%d)\n",
   1580 		   CLG_(stat).no_debug_BBs      * 100 / BB_lookups,
   1581 		   CLG_(stat).no_debug_BBs);
   1582     }
   1583     VG_(message)(Vg_DebugMsg, "BBCC Clones:       %d\n",
   1584 		 CLG_(stat).bbcc_clones);
   1585     VG_(message)(Vg_DebugMsg, "BBs Retranslated:  %d\n",
   1586 		 CLG_(stat).bb_retranslations);
   1587     VG_(message)(Vg_DebugMsg, "Distinct instrs:   %d\n",
   1588 		 CLG_(stat).distinct_instrs);
   1589     VG_(message)(Vg_DebugMsg, "");
   1590 
   1591     VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d\n",
   1592 		 CLG_(stat).cxt_lru_misses);
   1593     VG_(message)(Vg_DebugMsg, "LRU BBCC Misses:   %d\n",
   1594 		 CLG_(stat).bbcc_lru_misses);
   1595     VG_(message)(Vg_DebugMsg, "LRU JCC Misses:    %d\n",
   1596 		 CLG_(stat).jcc_lru_misses);
   1597     VG_(message)(Vg_DebugMsg, "BBs Executed:      %llu\n",
   1598 		 CLG_(stat).bb_executions);
   1599     VG_(message)(Vg_DebugMsg, "Calls:             %llu\n",
   1600 		 CLG_(stat).call_counter);
   1601     VG_(message)(Vg_DebugMsg, "CondJMP followed:  %llu\n",
   1602 		 CLG_(stat).jcnd_counter);
   1603     VG_(message)(Vg_DebugMsg, "Boring JMPs:       %llu\n",
   1604 		 CLG_(stat).jump_counter);
   1605     VG_(message)(Vg_DebugMsg, "Recursive calls:   %llu\n",
   1606 		 CLG_(stat).rec_call_counter);
   1607     VG_(message)(Vg_DebugMsg, "Returns:           %llu\n",
   1608 		 CLG_(stat).ret_counter);
   1609 
   1610     VG_(message)(Vg_DebugMsg, "");
   1611   }
   1612 
   1613   CLG_(sprint_eventmapping)(buf, CLG_(dumpmap));
   1614   VG_(message)(Vg_UserMsg, "Events    : %s\n", buf);
   1615   CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), CLG_(total_cost));
   1616   VG_(message)(Vg_UserMsg, "Collected : %s\n", buf);
   1617   VG_(message)(Vg_UserMsg, "\n");
   1618 
   1619   /* determine value widths for statistics */
   1620   total = CLG_(total_cost);
   1621   l1 = ULong_width( total[fullOffset(EG_IR)] );
   1622   l2 = l3 = 0;
   1623   if (CLG_(clo).simulate_cache) {
   1624       l2 = ULong_width( total[fullOffset(EG_DR)] );
   1625       l3 = ULong_width( total[fullOffset(EG_DW)] );
   1626   }
   1627   if (CLG_(clo).simulate_branch) {
   1628       int l2b = ULong_width( total[fullOffset(EG_BC)] );
   1629       int l3b = ULong_width( total[fullOffset(EG_BI)] );
   1630       if (l2b > l2) l2 = l2b;
   1631       if (l3b > l3) l3 = l3b;
   1632   }
   1633 
   1634   /* Make format string, getting width right for numbers */
   1635   VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
   1636 
   1637   /* Always print this */
   1638   VG_(umsg)(fmt, "I   refs:     ", total[fullOffset(EG_IR)] );
   1639 
   1640   if (CLG_(clo).simulate_cache)
   1641       (*CLG_(cachesim).printstat)(l1, l2, l3);
   1642 
   1643   if (CLG_(clo).simulate_branch)
   1644       branchsim_printstat(l1, l2, l3);
   1645 
   1646 }
   1647 
   1648 
   1649 void CLG_(fini)(Int exitcode)
   1650 {
   1651   finish();
   1652 }
   1653 
   1654 
   1655 /*--------------------------------------------------------------------*/
   1656 /*--- Setup                                                        ---*/
   1657 /*--------------------------------------------------------------------*/
   1658 
   1659 static void clg_start_client_code_callback ( ThreadId tid, ULong blocks_done )
   1660 {
   1661    static ULong last_blocks_done = 0;
   1662 
   1663    if (0)
   1664       VG_(printf)("%d R %llu\n", (Int)tid, blocks_done);
   1665 
   1666    /* throttle calls to CLG_(run_thread) by number of BBs executed */
   1667    if (blocks_done - last_blocks_done < 5000) return;
   1668    last_blocks_done = blocks_done;
   1669 
   1670    CLG_(run_thread)( tid );
   1671 }
   1672 
   1673 static
   1674 void CLG_(post_clo_init)(void)
   1675 {
   1676    VG_(clo_vex_control).iropt_unroll_thresh = 0;
   1677    VG_(clo_vex_control).guest_chase_thresh = 0;
   1678 
   1679    CLG_DEBUG(1, "  dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No");
   1680    CLG_DEBUG(1, "  call sep. : %d\n", CLG_(clo).separate_callers);
   1681    CLG_DEBUG(1, "  rec. sep. : %d\n", CLG_(clo).separate_recursions);
   1682 
   1683    if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) {
   1684        VG_(message)(Vg_UserMsg, "Using source line as position.\n");
   1685        CLG_(clo).dump_line = True;
   1686    }
   1687 
   1688    CLG_(init_dumps)();
   1689    CLG_(init_command)();
   1690 
   1691    (*CLG_(cachesim).post_clo_init)();
   1692 
   1693    CLG_(init_eventsets)();
   1694    CLG_(init_statistics)(& CLG_(stat));
   1695    CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) );
   1696 
   1697    /* initialize hash tables */
   1698    CLG_(init_obj_table)();
   1699    CLG_(init_cxt_table)();
   1700    CLG_(init_bb_hash)();
   1701 
   1702    CLG_(init_threads)();
   1703    CLG_(run_thread)(1);
   1704 
   1705    CLG_(instrument_state) = CLG_(clo).instrument_atstart;
   1706 
   1707    if (VG_(clo_verbosity > 0)) {
   1708       VG_(message)(Vg_UserMsg,
   1709                    "For interactive control, run 'callgrind_control -h'.\n");
   1710    }
   1711 }
   1712 
   1713 static
   1714 void CLG_(pre_clo_init)(void)
   1715 {
   1716     VG_(details_name)            ("Callgrind");
   1717     VG_(details_version)         (NULL);
   1718     VG_(details_description)     ("a call-graph generating cache profiler");
   1719     VG_(details_copyright_author)("Copyright (C) 2002-2010, and GNU GPL'd, "
   1720 				  "by Josef Weidendorfer et al.");
   1721     VG_(details_bug_reports_to)  (VG_BUGS_TO);
   1722     VG_(details_avg_translation_sizeB) ( 500 );
   1723 
   1724     VG_(basic_tool_funcs)        (CLG_(post_clo_init),
   1725                                   CLG_(instrument),
   1726                                   CLG_(fini));
   1727 
   1728     VG_(needs_superblock_discards)(clg_discard_superblock_info);
   1729 
   1730 
   1731     VG_(needs_command_line_options)(CLG_(process_cmd_line_option),
   1732 				    CLG_(print_usage),
   1733 				    CLG_(print_debug_usage));
   1734 
   1735     VG_(needs_client_requests)(CLG_(handle_client_request));
   1736     VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime),
   1737 			       CLG_(post_syscalltime));
   1738 
   1739     VG_(track_start_client_code)  ( & clg_start_client_code_callback );
   1740     VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) );
   1741     VG_(track_post_deliver_signal)( & CLG_(post_signal) );
   1742 
   1743     CLG_(set_clo_defaults)();
   1744 }
   1745 
   1746 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init))
   1747 
   1748 /*--------------------------------------------------------------------*/
   1749 /*--- end                                                   main.c ---*/
   1750 /*--------------------------------------------------------------------*/
   1751