Home | History | Annotate | Download | only in lackey
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- An example Valgrind tool.                          lk_main.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Lackey, an example Valgrind tool that does
      8    some simple program measurement and tracing.
      9 
     10    Copyright (C) 2002-2011 Nicholas Nethercote
     11       njn (at) valgrind.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 // This tool shows how to do some basic instrumentation.
     32 //
     33 // There are four kinds of instrumentation it can do.  They can be turned
     34 // on/off independently with command line options:
     35 //
     36 // * --basic-counts   : do basic counts, eg. number of instructions
     37 //                      executed, jumps executed, etc.
     38 // * --detailed-counts: do more detailed counts:  number of loads, stores
     39 //                      and ALU operations of different sizes.
     40 // * --trace-mem=yes:   trace all (data) memory accesses.
     41 // * --trace-superblocks=yes:
     42 //                      trace all superblock entries.  Mostly of interest
     43 //                      to the Valgrind developers.
     44 //
     45 // The code for each kind of instrumentation is guarded by a clo_* variable:
     46 // clo_basic_counts, clo_detailed_counts, clo_trace_mem and clo_trace_sbs.
     47 //
     48 // If you want to modify any of the instrumentation code, look for the code
     49 // that is guarded by the relevant clo_* variable (eg. clo_trace_mem)
     50 // If you're not interested in the other kinds of instrumentation you can
     51 // remove them.  If you want to do more complex modifications, please read
     52 // VEX/pub/libvex_ir.h to understand the intermediate representation.
     53 //
     54 //
     55 // Specific Details about --trace-mem=yes
     56 // --------------------------------------
     57 // Lackey's --trace-mem code is a good starting point for building Valgrind
     58 // tools that act on memory loads and stores.  It also could be used as is,
     59 // with its output used as input to a post-mortem processing step.  However,
     60 // because memory traces can be very large, online analysis is generally
     61 // better.
     62 //
     63 // It prints memory data access traces that look like this:
     64 //
     65 //   I  0023C790,2  # instruction read at 0x0023C790 of size 2
     66 //   I  0023C792,5
     67 //    S BE80199C,4  # data store at 0xBE80199C of size 4
     68 //   I  0025242B,3
     69 //    L BE801950,4  # data load at 0xBE801950 of size 4
     70 //   I  0023D476,7
     71 //    M 0025747C,1  # data modify at 0x0025747C of size 1
     72 //   I  0023DC20,2
     73 //    L 00254962,1
     74 //    L BE801FB3,1
     75 //   I  00252305,1
     76 //    L 00254AEB,1
     77 //    S 00257998,1
     78 //
     79 // Every instruction executed has an "instr" event representing it.
     80 // Instructions that do memory accesses are followed by one or more "load",
     81 // "store" or "modify" events.  Some instructions do more than one load or
     82 // store, as in the last two examples in the above trace.
     83 //
     84 // Here are some examples of x86 instructions that do different combinations
     85 // of loads, stores, and modifies.
     86 //
     87 //    Instruction          Memory accesses                  Event sequence
     88 //    -----------          ---------------                  --------------
     89 //    add %eax, %ebx       No loads or stores               instr
     90 //
     91 //    movl (%eax), %ebx    loads (%eax)                     instr, load
     92 //
     93 //    movl %eax, (%ebx)    stores (%ebx)                    instr, store
     94 //
     95 //    incl (%ecx)          modifies (%ecx)                  instr, modify
     96 //
     97 //    cmpsb                loads (%esi), loads(%edi)        instr, load, load
     98 //
     99 //    call*l (%edx)        loads (%edx), stores -4(%esp)    instr, load, store
    100 //    pushl (%edx)         loads (%edx), stores -4(%esp)    instr, load, store
    101 //    movsw                loads (%esi), stores (%edi)      instr, load, store
    102 //
    103 // Instructions using x86 "rep" prefixes are traced as if they are repeated
    104 // N times.
    105 //
    106 // Lackey with --trace-mem gives good traces, but they are not perfect, for
    107 // the following reasons:
    108 //
    109 // - It does not trace into the OS kernel, so system calls and other kernel
    110 //   operations (eg. some scheduling and signal handling code) are ignored.
    111 //
    112 // - It could model loads and stores done at the system call boundary using
    113 //   the pre_mem_read/post_mem_write events.  For example, if you call
    114 //   fstat() you know that the passed in buffer has been written.  But it
    115 //   currently does not do this.
    116 //
    117 // - Valgrind replaces some code (not much) with its own, notably parts of
    118 //   code for scheduling operations and signal handling.  This code is not
    119 //   traced.
    120 //
    121 // - There is no consideration of virtual-to-physical address mapping.
    122 //   This may not matter for many purposes.
    123 //
    124 // - Valgrind modifies the instruction stream in some very minor ways.  For
    125 //   example, on x86 the bts, btc, btr instructions are incorrectly
    126 //   considered to always touch memory (this is a consequence of these
    127 //   instructions being very difficult to simulate).
    128 //
    129 // - Valgrind tools layout memory differently to normal programs, so the
    130 //   addresses you get will not be typical.  Thus Lackey (and all Valgrind
    131 //   tools) is suitable for getting relative memory traces -- eg. if you
    132 //   want to analyse locality of memory accesses -- but is not good if
    133 //   absolute addresses are important.
    134 //
    135 // Despite all these warnings, Lackey's results should be good enough for a
    136 // wide range of purposes.  For example, Cachegrind shares all the above
    137 // shortcomings and it is still useful.
    138 //
    139 // For further inspiration, you should look at cachegrind/cg_main.c which
    140 // uses the same basic technique for tracing memory accesses, but also groups
    141 // events together for processing into twos and threes so that fewer C calls
    142 // are made and things run faster.
    143 //
    144 // Specific Details about --trace-superblocks=yes
    145 // ----------------------------------------------
    146 // Valgrind splits code up into single entry, multiple exit blocks
    147 // known as superblocks.  By itself, --trace-superblocks=yes just
    148 // prints a message as each superblock is run:
    149 //
    150 //  SB 04013170
    151 //  SB 04013177
    152 //  SB 04013173
    153 //  SB 04013177
    154 //
    155 // The hex number is the address of the first instruction in the
    156 // superblock.  You can see the relationship more obviously if you use
    157 // --trace-superblocks=yes and --trace-mem=yes together.  Then a "SB"
    158 // message at address X is immediately followed by an "instr:" message
    159 // for that address, as the first instruction in the block is
    160 // executed, for example:
    161 //
    162 //  SB 04014073
    163 //  I  04014073,3
    164 //   L 7FEFFF7F8,8
    165 //  I  04014076,4
    166 //  I  0401407A,3
    167 //  I  0401407D,3
    168 //  I  04014080,3
    169 //  I  04014083,6
    170 
    171 
    172 #include "pub_tool_basics.h"
    173 #include "pub_tool_tooliface.h"
    174 #include "pub_tool_libcassert.h"
    175 #include "pub_tool_libcprint.h"
    176 #include "pub_tool_debuginfo.h"
    177 #include "pub_tool_libcbase.h"
    178 #include "pub_tool_options.h"
    179 #include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
    180 
    181 /*------------------------------------------------------------*/
    182 /*--- Command line options                                 ---*/
    183 /*------------------------------------------------------------*/
    184 
    185 /* Command line options controlling instrumentation kinds, as described at
    186  * the top of this file. */
    187 static Bool clo_basic_counts    = True;
    188 static Bool clo_detailed_counts = False;
    189 static Bool clo_trace_mem       = False;
    190 static Bool clo_trace_sbs       = False;
    191 
    192 /* The name of the function of which the number of calls (under
    193  * --basic-counts=yes) is to be counted, with default. Override with command
    194  * line option --fnname. */
    195 static Char* clo_fnname = "main";
    196 
    197 static Bool lk_process_cmd_line_option(Char* arg)
    198 {
    199    if VG_STR_CLO(arg, "--fnname", clo_fnname) {}
    200    else if VG_BOOL_CLO(arg, "--basic-counts",      clo_basic_counts) {}
    201    else if VG_BOOL_CLO(arg, "--detailed-counts",   clo_detailed_counts) {}
    202    else if VG_BOOL_CLO(arg, "--trace-mem",         clo_trace_mem) {}
    203    else if VG_BOOL_CLO(arg, "--trace-superblocks", clo_trace_sbs) {}
    204    else
    205       return False;
    206 
    207    tl_assert(clo_fnname);
    208    tl_assert(clo_fnname[0]);
    209    return True;
    210 }
    211 
    212 static void lk_print_usage(void)
    213 {
    214    VG_(printf)(
    215 "    --basic-counts=no|yes     count instructions, jumps, etc. [yes]\n"
    216 "    --detailed-counts=no|yes  count loads, stores and alu ops [no]\n"
    217 "    --trace-mem=no|yes        trace all loads and stores [no]\n"
    218 "    --trace-superblocks=no|yes  trace all superblock entries [no]\n"
    219 "    --fnname=<name>           count calls to <name> (only used if\n"
    220 "                              --basic-count=yes)  [main]\n"
    221    );
    222 }
    223 
    224 static void lk_print_debug_usage(void)
    225 {
    226    VG_(printf)(
    227 "    (none)\n"
    228    );
    229 }
    230 
    231 /*------------------------------------------------------------*/
    232 /*--- Stuff for --basic-counts                             ---*/
    233 /*------------------------------------------------------------*/
    234 
    235 /* Nb: use ULongs because the numbers can get very big */
    236 static ULong n_func_calls    = 0;
    237 static ULong n_SBs_entered   = 0;
    238 static ULong n_SBs_completed = 0;
    239 static ULong n_IRStmts       = 0;
    240 static ULong n_guest_instrs  = 0;
    241 static ULong n_Jccs          = 0;
    242 static ULong n_Jccs_untaken  = 0;
    243 static ULong n_IJccs         = 0;
    244 static ULong n_IJccs_untaken = 0;
    245 
    246 static void add_one_func_call(void)
    247 {
    248    n_func_calls++;
    249 }
    250 
    251 static void add_one_SB_entered(void)
    252 {
    253    n_SBs_entered++;
    254 }
    255 
    256 static void add_one_SB_completed(void)
    257 {
    258    n_SBs_completed++;
    259 }
    260 
    261 static void add_one_IRStmt(void)
    262 {
    263    n_IRStmts++;
    264 }
    265 
    266 static void add_one_guest_instr(void)
    267 {
    268    n_guest_instrs++;
    269 }
    270 
    271 static void add_one_Jcc(void)
    272 {
    273    n_Jccs++;
    274 }
    275 
    276 static void add_one_Jcc_untaken(void)
    277 {
    278    n_Jccs_untaken++;
    279 }
    280 
    281 static void add_one_inverted_Jcc(void)
    282 {
    283    n_IJccs++;
    284 }
    285 
    286 static void add_one_inverted_Jcc_untaken(void)
    287 {
    288    n_IJccs_untaken++;
    289 }
    290 
    291 /*------------------------------------------------------------*/
    292 /*--- Stuff for --detailed-counts                          ---*/
    293 /*------------------------------------------------------------*/
    294 
    295 /* --- Operations --- */
    296 
    297 typedef enum { OpLoad=0, OpStore=1, OpAlu=2 } Op;
    298 
    299 #define N_OPS 3
    300 
    301 
    302 /* --- Types --- */
    303 
    304 #define N_TYPES 10
    305 
    306 static Int type2index ( IRType ty )
    307 {
    308    switch (ty) {
    309       case Ity_I1:      return 0;
    310       case Ity_I8:      return 1;
    311       case Ity_I16:     return 2;
    312       case Ity_I32:     return 3;
    313       case Ity_I64:     return 4;
    314       case Ity_I128:    return 5;
    315       case Ity_F32:     return 6;
    316       case Ity_F64:     return 7;
    317       case Ity_F128:    return 8;
    318       case Ity_V128:    return 9;
    319       default: tl_assert(0);
    320    }
    321 }
    322 
    323 static HChar* nameOfTypeIndex ( Int i )
    324 {
    325    switch (i) {
    326       case 0: return "I1";   break;
    327       case 1: return "I8";   break;
    328       case 2: return "I16";  break;
    329       case 3: return "I32";  break;
    330       case 4: return "I64";  break;
    331       case 5: return "I128"; break;
    332       case 6: return "F32";  break;
    333       case 7: return "F64";  break;
    334       case 8: return "F128";  break;
    335       case 9: return "V128"; break;
    336       default: tl_assert(0);
    337    }
    338 }
    339 
    340 
    341 /* --- Counts --- */
    342 
    343 static ULong detailCounts[N_OPS][N_TYPES];
    344 
    345 /* The helper that is called from the instrumented code. */
    346 static VG_REGPARM(1)
    347 void increment_detail(ULong* detail)
    348 {
    349    (*detail)++;
    350 }
    351 
    352 /* A helper that adds the instrumentation for a detail. */
    353 static void instrument_detail(IRSB* sb, Op op, IRType type)
    354 {
    355    IRDirty* di;
    356    IRExpr** argv;
    357    const UInt typeIx = type2index(type);
    358 
    359    tl_assert(op < N_OPS);
    360    tl_assert(typeIx < N_TYPES);
    361 
    362    argv = mkIRExprVec_1( mkIRExpr_HWord( (HWord)&detailCounts[op][typeIx] ) );
    363    di = unsafeIRDirty_0_N( 1, "increment_detail",
    364                               VG_(fnptr_to_fnentry)( &increment_detail ),
    365                               argv);
    366    addStmtToIRSB( sb, IRStmt_Dirty(di) );
    367 }
    368 
    369 /* Summarize and print the details. */
    370 static void print_details ( void )
    371 {
    372    Int typeIx;
    373    VG_(umsg)("   Type        Loads       Stores       AluOps\n");
    374    VG_(umsg)("   -------------------------------------------\n");
    375    for (typeIx = 0; typeIx < N_TYPES; typeIx++) {
    376       VG_(umsg)("   %4s %'12llu %'12llu %'12llu\n",
    377                 nameOfTypeIndex( typeIx ),
    378                 detailCounts[OpLoad ][typeIx],
    379                 detailCounts[OpStore][typeIx],
    380                 detailCounts[OpAlu  ][typeIx]
    381       );
    382    }
    383 }
    384 
    385 
    386 /*------------------------------------------------------------*/
    387 /*--- Stuff for --trace-mem                                ---*/
    388 /*------------------------------------------------------------*/
    389 
    390 #define MAX_DSIZE    512
    391 
    392 typedef
    393    IRExpr
    394    IRAtom;
    395 
    396 typedef
    397    enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm }
    398    EventKind;
    399 
    400 typedef
    401    struct {
    402       EventKind  ekind;
    403       IRAtom*    addr;
    404       Int        size;
    405    }
    406    Event;
    407 
    408 /* Up to this many unnotified events are allowed.  Must be at least two,
    409    so that reads and writes to the same address can be merged into a modify.
    410    Beyond that, larger numbers just potentially induce more spilling due to
    411    extending live ranges of address temporaries. */
    412 #define N_EVENTS 4
    413 
    414 /* Maintain an ordered list of memory events which are outstanding, in
    415    the sense that no IR has yet been generated to do the relevant
    416    helper calls.  The SB is scanned top to bottom and memory events
    417    are added to the end of the list, merging with the most recent
    418    notified event where possible (Dw immediately following Dr and
    419    having the same size and EA can be merged).
    420 
    421    This merging is done so that for architectures which have
    422    load-op-store instructions (x86, amd64), the instr is treated as if
    423    it makes just one memory reference (a modify), rather than two (a
    424    read followed by a write at the same address).
    425 
    426    At various points the list will need to be flushed, that is, IR
    427    generated from it.  That must happen before any possible exit from
    428    the block (the end, or an IRStmt_Exit).  Flushing also takes place
    429    when there is no space to add a new event.
    430 
    431    If we require the simulation statistics to be up to date with
    432    respect to possible memory exceptions, then the list would have to
    433    be flushed before each memory reference.  That's a pain so we don't
    434    bother.
    435 
    436    Flushing the list consists of walking it start to end and emitting
    437    instrumentation IR for each event, in the order in which they
    438    appear. */
    439 
    440 static Event events[N_EVENTS];
    441 static Int   events_used = 0;
    442 
    443 
    444 static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size)
    445 {
    446    VG_(printf)("I  %08lx,%lu\n", addr, size);
    447 }
    448 
    449 static VG_REGPARM(2) void trace_load(Addr addr, SizeT size)
    450 {
    451    VG_(printf)(" L %08lx,%lu\n", addr, size);
    452 }
    453 
    454 static VG_REGPARM(2) void trace_store(Addr addr, SizeT size)
    455 {
    456    VG_(printf)(" S %08lx,%lu\n", addr, size);
    457 }
    458 
    459 static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size)
    460 {
    461    VG_(printf)(" M %08lx,%lu\n", addr, size);
    462 }
    463 
    464 
    465 static void flushEvents(IRSB* sb)
    466 {
    467    Int        i;
    468    Char*      helperName;
    469    void*      helperAddr;
    470    IRExpr**   argv;
    471    IRDirty*   di;
    472    Event*     ev;
    473 
    474    for (i = 0; i < events_used; i++) {
    475 
    476       ev = &events[i];
    477 
    478       // Decide on helper fn to call and args to pass it.
    479       switch (ev->ekind) {
    480          case Event_Ir: helperName = "trace_instr";
    481                         helperAddr =  trace_instr;  break;
    482 
    483          case Event_Dr: helperName = "trace_load";
    484                         helperAddr =  trace_load;   break;
    485 
    486          case Event_Dw: helperName = "trace_store";
    487                         helperAddr =  trace_store;  break;
    488 
    489          case Event_Dm: helperName = "trace_modify";
    490                         helperAddr =  trace_modify; break;
    491          default:
    492             tl_assert(0);
    493       }
    494 
    495       // Add the helper.
    496       argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) );
    497       di   = unsafeIRDirty_0_N( /*regparms*/2,
    498                                 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
    499                                 argv );
    500       addStmtToIRSB( sb, IRStmt_Dirty(di) );
    501    }
    502 
    503    events_used = 0;
    504 }
    505 
    506 // WARNING:  If you aren't interested in instruction reads, you can omit the
    507 // code that adds calls to trace_instr() in flushEvents().  However, you
    508 // must still call this function, addEvent_Ir() -- it is necessary to add
    509 // the Ir events to the events list so that merging of paired load/store
    510 // events into modify events works correctly.
    511 static void addEvent_Ir ( IRSB* sb, IRAtom* iaddr, UInt isize )
    512 {
    513    Event* evt;
    514    tl_assert(clo_trace_mem);
    515    tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
    516             || VG_CLREQ_SZB == isize );
    517    if (events_used == N_EVENTS)
    518       flushEvents(sb);
    519    tl_assert(events_used >= 0 && events_used < N_EVENTS);
    520    evt = &events[events_used];
    521    evt->ekind = Event_Ir;
    522    evt->addr  = iaddr;
    523    evt->size  = isize;
    524    events_used++;
    525 }
    526 
    527 static
    528 void addEvent_Dr ( IRSB* sb, IRAtom* daddr, Int dsize )
    529 {
    530    Event* evt;
    531    tl_assert(clo_trace_mem);
    532    tl_assert(isIRAtom(daddr));
    533    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
    534    if (events_used == N_EVENTS)
    535       flushEvents(sb);
    536    tl_assert(events_used >= 0 && events_used < N_EVENTS);
    537    evt = &events[events_used];
    538    evt->ekind = Event_Dr;
    539    evt->addr  = daddr;
    540    evt->size  = dsize;
    541    events_used++;
    542 }
    543 
    544 static
    545 void addEvent_Dw ( IRSB* sb, IRAtom* daddr, Int dsize )
    546 {
    547    Event* lastEvt;
    548    Event* evt;
    549    tl_assert(clo_trace_mem);
    550    tl_assert(isIRAtom(daddr));
    551    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
    552 
    553    // Is it possible to merge this write with the preceding read?
    554    lastEvt = &events[events_used-1];
    555    if (events_used > 0
    556     && lastEvt->ekind == Event_Dr
    557     && lastEvt->size  == dsize
    558     && eqIRAtom(lastEvt->addr, daddr))
    559    {
    560       lastEvt->ekind = Event_Dm;
    561       return;
    562    }
    563 
    564    // No.  Add as normal.
    565    if (events_used == N_EVENTS)
    566       flushEvents(sb);
    567    tl_assert(events_used >= 0 && events_used < N_EVENTS);
    568    evt = &events[events_used];
    569    evt->ekind = Event_Dw;
    570    evt->size  = dsize;
    571    evt->addr  = daddr;
    572    events_used++;
    573 }
    574 
    575 
    576 /*------------------------------------------------------------*/
    577 /*--- Stuff for --trace-superblocks                        ---*/
    578 /*------------------------------------------------------------*/
    579 
    580 static void trace_superblock(Addr addr)
    581 {
    582    VG_(printf)("SB %08lx\n", addr);
    583 }
    584 
    585 
    586 /*------------------------------------------------------------*/
    587 /*--- Basic tool functions                                 ---*/
    588 /*------------------------------------------------------------*/
    589 
    590 static void lk_post_clo_init(void)
    591 {
    592    Int op, tyIx;
    593 
    594    if (clo_detailed_counts) {
    595       for (op = 0; op < N_OPS; op++)
    596          for (tyIx = 0; tyIx < N_TYPES; tyIx++)
    597             detailCounts[op][tyIx] = 0;
    598    }
    599 }
    600 
    601 static
    602 IRSB* lk_instrument ( VgCallbackClosure* closure,
    603                       IRSB* sbIn,
    604                       VexGuestLayout* layout,
    605                       VexGuestExtents* vge,
    606                       IRType gWordTy, IRType hWordTy )
    607 {
    608    IRDirty*   di;
    609    Int        i;
    610    IRSB*      sbOut;
    611    Char       fnname[100];
    612    IRType     type;
    613    IRTypeEnv* tyenv = sbIn->tyenv;
    614    Addr       iaddr = 0, dst;
    615    UInt       ilen = 0;
    616    Bool       condition_inverted = False;
    617 
    618    if (gWordTy != hWordTy) {
    619       /* We don't currently support this case. */
    620       VG_(tool_panic)("host/guest word size mismatch");
    621    }
    622 
    623    /* Set up SB */
    624    sbOut = deepCopyIRSBExceptStmts(sbIn);
    625 
    626    // Copy verbatim any IR preamble preceding the first IMark
    627    i = 0;
    628    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
    629       addStmtToIRSB( sbOut, sbIn->stmts[i] );
    630       i++;
    631    }
    632 
    633    if (clo_basic_counts) {
    634       /* Count this superblock. */
    635       di = unsafeIRDirty_0_N( 0, "add_one_SB_entered",
    636                                  VG_(fnptr_to_fnentry)( &add_one_SB_entered ),
    637                                  mkIRExprVec_0() );
    638       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    639    }
    640 
    641    if (clo_trace_sbs) {
    642       /* Print this superblock's address. */
    643       di = unsafeIRDirty_0_N(
    644               0, "trace_superblock",
    645               VG_(fnptr_to_fnentry)( &trace_superblock ),
    646               mkIRExprVec_1( mkIRExpr_HWord( vge->base[0] ) )
    647            );
    648       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    649    }
    650 
    651    if (clo_trace_mem) {
    652       events_used = 0;
    653    }
    654 
    655    for (/*use current i*/; i < sbIn->stmts_used; i++) {
    656       IRStmt* st = sbIn->stmts[i];
    657       if (!st || st->tag == Ist_NoOp) continue;
    658 
    659       if (clo_basic_counts) {
    660          /* Count one VEX statement. */
    661          di = unsafeIRDirty_0_N( 0, "add_one_IRStmt",
    662                                     VG_(fnptr_to_fnentry)( &add_one_IRStmt ),
    663                                     mkIRExprVec_0() );
    664          addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    665       }
    666 
    667       switch (st->tag) {
    668          case Ist_NoOp:
    669          case Ist_AbiHint:
    670          case Ist_Put:
    671          case Ist_PutI:
    672          case Ist_MBE:
    673             addStmtToIRSB( sbOut, st );
    674             break;
    675 
    676          case Ist_IMark:
    677             if (clo_basic_counts) {
    678                /* Needed to be able to check for inverted condition in Ist_Exit */
    679                iaddr = st->Ist.IMark.addr;
    680                ilen  = st->Ist.IMark.len;
    681 
    682                /* Count guest instruction. */
    683                di = unsafeIRDirty_0_N( 0, "add_one_guest_instr",
    684                                           VG_(fnptr_to_fnentry)( &add_one_guest_instr ),
    685                                           mkIRExprVec_0() );
    686                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    687 
    688                /* An unconditional branch to a known destination in the
    689                 * guest's instructions can be represented, in the IRSB to
    690                 * instrument, by the VEX statements that are the
    691                 * translation of that known destination. This feature is
    692                 * called 'SB chasing' and can be influenced by command
    693                 * line option --vex-guest-chase-thresh.
    694                 *
    695                 * To get an accurate count of the calls to a specific
    696                 * function, taking SB chasing into account, we need to
    697                 * check for each guest instruction (Ist_IMark) if it is
    698                 * the entry point of a function.
    699                 */
    700                tl_assert(clo_fnname);
    701                tl_assert(clo_fnname[0]);
    702                if (VG_(get_fnname_if_entry)(st->Ist.IMark.addr,
    703                                             fnname, sizeof(fnname))
    704                    && 0 == VG_(strcmp)(fnname, clo_fnname)) {
    705                   di = unsafeIRDirty_0_N(
    706                           0, "add_one_func_call",
    707                              VG_(fnptr_to_fnentry)( &add_one_func_call ),
    708                              mkIRExprVec_0() );
    709                   addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    710                }
    711             }
    712             if (clo_trace_mem) {
    713                // WARNING: do not remove this function call, even if you
    714                // aren't interested in instruction reads.  See the comment
    715                // above the function itself for more detail.
    716                addEvent_Ir( sbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ),
    717                             st->Ist.IMark.len );
    718             }
    719             addStmtToIRSB( sbOut, st );
    720             break;
    721 
    722          case Ist_WrTmp:
    723             // Add a call to trace_load() if --trace-mem=yes.
    724             if (clo_trace_mem) {
    725                IRExpr* data = st->Ist.WrTmp.data;
    726                if (data->tag == Iex_Load) {
    727                   addEvent_Dr( sbOut, data->Iex.Load.addr,
    728                                sizeofIRType(data->Iex.Load.ty) );
    729                }
    730             }
    731             if (clo_detailed_counts) {
    732                IRExpr* expr = st->Ist.WrTmp.data;
    733                type = typeOfIRExpr(sbOut->tyenv, expr);
    734                tl_assert(type != Ity_INVALID);
    735                switch (expr->tag) {
    736                   case Iex_Load:
    737                      instrument_detail( sbOut, OpLoad, type );
    738                      break;
    739                   case Iex_Unop:
    740                   case Iex_Binop:
    741                   case Iex_Triop:
    742                   case Iex_Qop:
    743                   case Iex_Mux0X:
    744                      instrument_detail( sbOut, OpAlu, type );
    745                      break;
    746                   default:
    747                      break;
    748                }
    749             }
    750             addStmtToIRSB( sbOut, st );
    751             break;
    752 
    753          case Ist_Store:
    754             if (clo_trace_mem) {
    755                IRExpr* data  = st->Ist.Store.data;
    756                addEvent_Dw( sbOut, st->Ist.Store.addr,
    757                             sizeofIRType(typeOfIRExpr(tyenv, data)) );
    758             }
    759             if (clo_detailed_counts) {
    760                type = typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data);
    761                tl_assert(type != Ity_INVALID);
    762                instrument_detail( sbOut, OpStore, type );
    763             }
    764             addStmtToIRSB( sbOut, st );
    765             break;
    766 
    767          case Ist_Dirty: {
    768             if (clo_trace_mem) {
    769                Int      dsize;
    770                IRDirty* d = st->Ist.Dirty.details;
    771                if (d->mFx != Ifx_None) {
    772                   // This dirty helper accesses memory.  Collect the details.
    773                   tl_assert(d->mAddr != NULL);
    774                   tl_assert(d->mSize != 0);
    775                   dsize = d->mSize;
    776                   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
    777                      addEvent_Dr( sbOut, d->mAddr, dsize );
    778                   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
    779                      addEvent_Dw( sbOut, d->mAddr, dsize );
    780                } else {
    781                   tl_assert(d->mAddr == NULL);
    782                   tl_assert(d->mSize == 0);
    783                }
    784             }
    785             addStmtToIRSB( sbOut, st );
    786             break;
    787          }
    788 
    789          case Ist_CAS: {
    790             /* We treat it as a read and a write of the location.  I
    791                think that is the same behaviour as it was before IRCAS
    792                was introduced, since prior to that point, the Vex
    793                front ends would translate a lock-prefixed instruction
    794                into a (normal) read followed by a (normal) write. */
    795             Int    dataSize;
    796             IRType dataTy;
    797             IRCAS* cas = st->Ist.CAS.details;
    798             tl_assert(cas->addr != NULL);
    799             tl_assert(cas->dataLo != NULL);
    800             dataTy   = typeOfIRExpr(tyenv, cas->dataLo);
    801             dataSize = sizeofIRType(dataTy);
    802             if (cas->dataHi != NULL)
    803                dataSize *= 2; /* since it's a doubleword-CAS */
    804             if (clo_trace_mem) {
    805                addEvent_Dr( sbOut, cas->addr, dataSize );
    806                addEvent_Dw( sbOut, cas->addr, dataSize );
    807             }
    808             if (clo_detailed_counts) {
    809                instrument_detail( sbOut, OpLoad, dataTy );
    810                if (cas->dataHi != NULL) /* dcas */
    811                   instrument_detail( sbOut, OpLoad, dataTy );
    812                instrument_detail( sbOut, OpStore, dataTy );
    813                if (cas->dataHi != NULL) /* dcas */
    814                   instrument_detail( sbOut, OpStore, dataTy );
    815             }
    816             addStmtToIRSB( sbOut, st );
    817             break;
    818          }
    819 
    820          case Ist_LLSC: {
    821             IRType dataTy;
    822             if (st->Ist.LLSC.storedata == NULL) {
    823                /* LL */
    824                dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
    825                if (clo_trace_mem)
    826                   addEvent_Dr( sbOut, st->Ist.LLSC.addr,
    827                                       sizeofIRType(dataTy) );
    828                if (clo_detailed_counts)
    829                   instrument_detail( sbOut, OpLoad, dataTy );
    830             } else {
    831                /* SC */
    832                dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
    833                if (clo_trace_mem)
    834                   addEvent_Dw( sbOut, st->Ist.LLSC.addr,
    835                                       sizeofIRType(dataTy) );
    836                if (clo_detailed_counts)
    837                   instrument_detail( sbOut, OpStore, dataTy );
    838             }
    839             addStmtToIRSB( sbOut, st );
    840             break;
    841          }
    842 
    843          case Ist_Exit:
    844             if (clo_basic_counts) {
    845                // The condition of a branch was inverted by VEX if a taken
    846                // branch is in fact a fall trough according to client address
    847                tl_assert(iaddr != 0);
    848                dst = (sizeof(Addr) == 4) ? st->Ist.Exit.dst->Ico.U32 :
    849                                            st->Ist.Exit.dst->Ico.U64;
    850                condition_inverted = (dst == iaddr + ilen);
    851 
    852                /* Count Jcc */
    853                if (!condition_inverted)
    854                   di = unsafeIRDirty_0_N( 0, "add_one_Jcc",
    855                                           VG_(fnptr_to_fnentry)( &add_one_Jcc ),
    856                                           mkIRExprVec_0() );
    857                else
    858                   di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc",
    859                                           VG_(fnptr_to_fnentry)(
    860                                              &add_one_inverted_Jcc ),
    861                                           mkIRExprVec_0() );
    862 
    863                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    864             }
    865             if (clo_trace_mem) {
    866                flushEvents(sbOut);
    867             }
    868 
    869             addStmtToIRSB( sbOut, st );      // Original statement
    870 
    871             if (clo_basic_counts) {
    872                /* Count non-taken Jcc */
    873                if (!condition_inverted)
    874                   di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken",
    875                                           VG_(fnptr_to_fnentry)(
    876                                              &add_one_Jcc_untaken ),
    877                                           mkIRExprVec_0() );
    878                else
    879                   di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc_untaken",
    880                                           VG_(fnptr_to_fnentry)(
    881                                              &add_one_inverted_Jcc_untaken ),
    882                                           mkIRExprVec_0() );
    883 
    884                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    885             }
    886             break;
    887 
    888          default:
    889             tl_assert(0);
    890       }
    891    }
    892 
    893    if (clo_basic_counts) {
    894       /* Count this basic block. */
    895       di = unsafeIRDirty_0_N( 0, "add_one_SB_completed",
    896                                  VG_(fnptr_to_fnentry)( &add_one_SB_completed ),
    897                                  mkIRExprVec_0() );
    898       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    899    }
    900 
    901    if (clo_trace_mem) {
    902       /* At the end of the sbIn.  Flush outstandings. */
    903       flushEvents(sbOut);
    904    }
    905 
    906    return sbOut;
    907 }
    908 
    909 static void lk_fini(Int exitcode)
    910 {
    911    char percentify_buf[5]; /* Two digits, '%' and 0. */
    912    const int percentify_size = sizeof(percentify_buf) - 1;
    913    const int percentify_decs = 0;
    914 
    915    tl_assert(clo_fnname);
    916    tl_assert(clo_fnname[0]);
    917 
    918    if (clo_basic_counts) {
    919       ULong total_Jccs = n_Jccs + n_IJccs;
    920       ULong taken_Jccs = (n_Jccs - n_Jccs_untaken) + n_IJccs_untaken;
    921 
    922       VG_(umsg)("Counted %'llu call%s to %s()\n",
    923                 n_func_calls, ( n_func_calls==1 ? "" : "s" ), clo_fnname);
    924 
    925       VG_(umsg)("\n");
    926       VG_(umsg)("Jccs:\n");
    927       VG_(umsg)("  total:         %'llu\n", total_Jccs);
    928       VG_(percentify)(taken_Jccs, (total_Jccs ? total_Jccs : 1),
    929          percentify_decs, percentify_size, percentify_buf);
    930       VG_(umsg)("  taken:         %'llu (%s)\n",
    931          taken_Jccs, percentify_buf);
    932 
    933       VG_(umsg)("\n");
    934       VG_(umsg)("Executed:\n");
    935       VG_(umsg)("  SBs entered:   %'llu\n", n_SBs_entered);
    936       VG_(umsg)("  SBs completed: %'llu\n", n_SBs_completed);
    937       VG_(umsg)("  guest instrs:  %'llu\n", n_guest_instrs);
    938       VG_(umsg)("  IRStmts:       %'llu\n", n_IRStmts);
    939 
    940       VG_(umsg)("\n");
    941       VG_(umsg)("Ratios:\n");
    942       tl_assert(n_SBs_entered); // Paranoia time.
    943       VG_(umsg)("  guest instrs : SB entered  = %'llu : 10\n",
    944          10 * n_guest_instrs / n_SBs_entered);
    945       VG_(umsg)("       IRStmts : SB entered  = %'llu : 10\n",
    946          10 * n_IRStmts / n_SBs_entered);
    947       tl_assert(n_guest_instrs); // Paranoia time.
    948       VG_(umsg)("       IRStmts : guest instr = %'llu : 10\n",
    949          10 * n_IRStmts / n_guest_instrs);
    950    }
    951 
    952    if (clo_detailed_counts) {
    953       VG_(umsg)("\n");
    954       VG_(umsg)("IR-level counts by type:\n");
    955       print_details();
    956    }
    957 
    958    if (clo_basic_counts) {
    959       VG_(umsg)("\n");
    960       VG_(umsg)("Exit code:       %d\n", exitcode);
    961    }
    962 }
    963 
    964 static void lk_pre_clo_init(void)
    965 {
    966    VG_(details_name)            ("Lackey");
    967    VG_(details_version)         (NULL);
    968    VG_(details_description)     ("an example Valgrind tool");
    969    VG_(details_copyright_author)(
    970       "Copyright (C) 2002-2011, and GNU GPL'd, by Nicholas Nethercote.");
    971    VG_(details_bug_reports_to)  (VG_BUGS_TO);
    972    VG_(details_avg_translation_sizeB) ( 200 );
    973 
    974    VG_(basic_tool_funcs)          (lk_post_clo_init,
    975                                    lk_instrument,
    976                                    lk_fini);
    977    VG_(needs_command_line_options)(lk_process_cmd_line_option,
    978                                    lk_print_usage,
    979                                    lk_print_debug_usage);
    980 }
    981 
    982 VG_DETERMINE_INTERFACE_VERSION(lk_pre_clo_init)
    983 
    984 /*--------------------------------------------------------------------*/
    985 /*--- end                                                lk_main.c ---*/
    986 /*--------------------------------------------------------------------*/
    987