Home | History | Annotate | Download | only in lackey
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- An example Valgrind tool.                          lk_main.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Lackey, an example Valgrind tool that does
      8    some simple program measurement and tracing.
      9 
     10    Copyright (C) 2002-2013 Nicholas Nethercote
     11       njn (at) valgrind.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 // This tool shows how to do some basic instrumentation.
     32 //
     33 // There are four kinds of instrumentation it can do.  They can be turned
     34 // on/off independently with command line options:
     35 //
     36 // * --basic-counts   : do basic counts, eg. number of instructions
     37 //                      executed, jumps executed, etc.
     38 // * --detailed-counts: do more detailed counts:  number of loads, stores
     39 //                      and ALU operations of different sizes.
     40 // * --trace-mem=yes:   trace all (data) memory accesses.
     41 // * --trace-superblocks=yes:
     42 //                      trace all superblock entries.  Mostly of interest
     43 //                      to the Valgrind developers.
     44 //
     45 // The code for each kind of instrumentation is guarded by a clo_* variable:
     46 // clo_basic_counts, clo_detailed_counts, clo_trace_mem and clo_trace_sbs.
     47 //
     48 // If you want to modify any of the instrumentation code, look for the code
     49 // that is guarded by the relevant clo_* variable (eg. clo_trace_mem)
     50 // If you're not interested in the other kinds of instrumentation you can
     51 // remove them.  If you want to do more complex modifications, please read
     52 // VEX/pub/libvex_ir.h to understand the intermediate representation.
     53 //
     54 //
     55 // Specific Details about --trace-mem=yes
     56 // --------------------------------------
     57 // Lackey's --trace-mem code is a good starting point for building Valgrind
     58 // tools that act on memory loads and stores.  It also could be used as is,
     59 // with its output used as input to a post-mortem processing step.  However,
     60 // because memory traces can be very large, online analysis is generally
     61 // better.
     62 //
     63 // It prints memory data access traces that look like this:
     64 //
     65 //   I  0023C790,2  # instruction read at 0x0023C790 of size 2
     66 //   I  0023C792,5
     67 //    S BE80199C,4  # data store at 0xBE80199C of size 4
     68 //   I  0025242B,3
     69 //    L BE801950,4  # data load at 0xBE801950 of size 4
     70 //   I  0023D476,7
     71 //    M 0025747C,1  # data modify at 0x0025747C of size 1
     72 //   I  0023DC20,2
     73 //    L 00254962,1
     74 //    L BE801FB3,1
     75 //   I  00252305,1
     76 //    L 00254AEB,1
     77 //    S 00257998,1
     78 //
     79 // Every instruction executed has an "instr" event representing it.
     80 // Instructions that do memory accesses are followed by one or more "load",
     81 // "store" or "modify" events.  Some instructions do more than one load or
     82 // store, as in the last two examples in the above trace.
     83 //
     84 // Here are some examples of x86 instructions that do different combinations
     85 // of loads, stores, and modifies.
     86 //
     87 //    Instruction          Memory accesses                  Event sequence
     88 //    -----------          ---------------                  --------------
     89 //    add %eax, %ebx       No loads or stores               instr
     90 //
     91 //    movl (%eax), %ebx    loads (%eax)                     instr, load
     92 //
     93 //    movl %eax, (%ebx)    stores (%ebx)                    instr, store
     94 //
     95 //    incl (%ecx)          modifies (%ecx)                  instr, modify
     96 //
     97 //    cmpsb                loads (%esi), loads(%edi)        instr, load, load
     98 //
     99 //    call*l (%edx)        loads (%edx), stores -4(%esp)    instr, load, store
    100 //    pushl (%edx)         loads (%edx), stores -4(%esp)    instr, load, store
    101 //    movsw                loads (%esi), stores (%edi)      instr, load, store
    102 //
    103 // Instructions using x86 "rep" prefixes are traced as if they are repeated
    104 // N times.
    105 //
    106 // Lackey with --trace-mem gives good traces, but they are not perfect, for
    107 // the following reasons:
    108 //
    109 // - It does not trace into the OS kernel, so system calls and other kernel
    110 //   operations (eg. some scheduling and signal handling code) are ignored.
    111 //
    112 // - It could model loads and stores done at the system call boundary using
    113 //   the pre_mem_read/post_mem_write events.  For example, if you call
    114 //   fstat() you know that the passed in buffer has been written.  But it
    115 //   currently does not do this.
    116 //
    117 // - Valgrind replaces some code (not much) with its own, notably parts of
    118 //   code for scheduling operations and signal handling.  This code is not
    119 //   traced.
    120 //
    121 // - There is no consideration of virtual-to-physical address mapping.
    122 //   This may not matter for many purposes.
    123 //
    124 // - Valgrind modifies the instruction stream in some very minor ways.  For
    125 //   example, on x86 the bts, btc, btr instructions are incorrectly
    126 //   considered to always touch memory (this is a consequence of these
    127 //   instructions being very difficult to simulate).
    128 //
    129 // - Valgrind tools layout memory differently to normal programs, so the
    130 //   addresses you get will not be typical.  Thus Lackey (and all Valgrind
    131 //   tools) is suitable for getting relative memory traces -- eg. if you
    132 //   want to analyse locality of memory accesses -- but is not good if
    133 //   absolute addresses are important.
    134 //
    135 // Despite all these warnings, Lackey's results should be good enough for a
    136 // wide range of purposes.  For example, Cachegrind shares all the above
    137 // shortcomings and it is still useful.
    138 //
    139 // For further inspiration, you should look at cachegrind/cg_main.c which
    140 // uses the same basic technique for tracing memory accesses, but also groups
    141 // events together for processing into twos and threes so that fewer C calls
    142 // are made and things run faster.
    143 //
    144 // Specific Details about --trace-superblocks=yes
    145 // ----------------------------------------------
    146 // Valgrind splits code up into single entry, multiple exit blocks
    147 // known as superblocks.  By itself, --trace-superblocks=yes just
    148 // prints a message as each superblock is run:
    149 //
    150 //  SB 04013170
    151 //  SB 04013177
    152 //  SB 04013173
    153 //  SB 04013177
    154 //
    155 // The hex number is the address of the first instruction in the
    156 // superblock.  You can see the relationship more obviously if you use
    157 // --trace-superblocks=yes and --trace-mem=yes together.  Then a "SB"
    158 // message at address X is immediately followed by an "instr:" message
    159 // for that address, as the first instruction in the block is
    160 // executed, for example:
    161 //
    162 //  SB 04014073
    163 //  I  04014073,3
    164 //   L 7FEFFF7F8,8
    165 //  I  04014076,4
    166 //  I  0401407A,3
    167 //  I  0401407D,3
    168 //  I  04014080,3
    169 //  I  04014083,6
    170 
    171 
    172 #include "pub_tool_basics.h"
    173 #include "pub_tool_tooliface.h"
    174 #include "pub_tool_libcassert.h"
    175 #include "pub_tool_libcprint.h"
    176 #include "pub_tool_debuginfo.h"
    177 #include "pub_tool_libcbase.h"
    178 #include "pub_tool_options.h"
    179 #include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
    180 
    181 /*------------------------------------------------------------*/
    182 /*--- Command line options                                 ---*/
    183 /*------------------------------------------------------------*/
    184 
    185 /* Command line options controlling instrumentation kinds, as described at
    186  * the top of this file. */
    187 static Bool clo_basic_counts    = True;
    188 static Bool clo_detailed_counts = False;
    189 static Bool clo_trace_mem       = False;
    190 static Bool clo_trace_sbs       = False;
    191 
    192 /* The name of the function of which the number of calls (under
    193  * --basic-counts=yes) is to be counted, with default. Override with command
    194  * line option --fnname. */
    195 static const HChar* clo_fnname = "main";
    196 
    197 static Bool lk_process_cmd_line_option(const HChar* arg)
    198 {
    199    if VG_STR_CLO(arg, "--fnname", clo_fnname) {}
    200    else if VG_BOOL_CLO(arg, "--basic-counts",      clo_basic_counts) {}
    201    else if VG_BOOL_CLO(arg, "--detailed-counts",   clo_detailed_counts) {}
    202    else if VG_BOOL_CLO(arg, "--trace-mem",         clo_trace_mem) {}
    203    else if VG_BOOL_CLO(arg, "--trace-superblocks", clo_trace_sbs) {}
    204    else
    205       return False;
    206 
    207    tl_assert(clo_fnname);
    208    tl_assert(clo_fnname[0]);
    209    return True;
    210 }
    211 
    212 static void lk_print_usage(void)
    213 {
    214    VG_(printf)(
    215 "    --basic-counts=no|yes     count instructions, jumps, etc. [yes]\n"
    216 "    --detailed-counts=no|yes  count loads, stores and alu ops [no]\n"
    217 "    --trace-mem=no|yes        trace all loads and stores [no]\n"
    218 "    --trace-superblocks=no|yes  trace all superblock entries [no]\n"
    219 "    --fnname=<name>           count calls to <name> (only used if\n"
    220 "                              --basic-count=yes)  [main]\n"
    221    );
    222 }
    223 
    224 static void lk_print_debug_usage(void)
    225 {
    226    VG_(printf)(
    227 "    (none)\n"
    228    );
    229 }
    230 
    231 /*------------------------------------------------------------*/
    232 /*--- Stuff for --basic-counts                             ---*/
    233 /*------------------------------------------------------------*/
    234 
    235 /* Nb: use ULongs because the numbers can get very big */
    236 static ULong n_func_calls    = 0;
    237 static ULong n_SBs_entered   = 0;
    238 static ULong n_SBs_completed = 0;
    239 static ULong n_IRStmts       = 0;
    240 static ULong n_guest_instrs  = 0;
    241 static ULong n_Jccs          = 0;
    242 static ULong n_Jccs_untaken  = 0;
    243 static ULong n_IJccs         = 0;
    244 static ULong n_IJccs_untaken = 0;
    245 
    246 static void add_one_func_call(void)
    247 {
    248    n_func_calls++;
    249 }
    250 
    251 static void add_one_SB_entered(void)
    252 {
    253    n_SBs_entered++;
    254 }
    255 
    256 static void add_one_SB_completed(void)
    257 {
    258    n_SBs_completed++;
    259 }
    260 
    261 static void add_one_IRStmt(void)
    262 {
    263    n_IRStmts++;
    264 }
    265 
    266 static void add_one_guest_instr(void)
    267 {
    268    n_guest_instrs++;
    269 }
    270 
    271 static void add_one_Jcc(void)
    272 {
    273    n_Jccs++;
    274 }
    275 
    276 static void add_one_Jcc_untaken(void)
    277 {
    278    n_Jccs_untaken++;
    279 }
    280 
    281 static void add_one_inverted_Jcc(void)
    282 {
    283    n_IJccs++;
    284 }
    285 
    286 static void add_one_inverted_Jcc_untaken(void)
    287 {
    288    n_IJccs_untaken++;
    289 }
    290 
    291 /*------------------------------------------------------------*/
    292 /*--- Stuff for --detailed-counts                          ---*/
    293 /*------------------------------------------------------------*/
    294 
    295 typedef
    296    IRExpr
    297    IRAtom;
    298 
    299 /* --- Operations --- */
    300 
    301 typedef enum { OpLoad=0, OpStore=1, OpAlu=2 } Op;
    302 
    303 #define N_OPS 3
    304 
    305 
    306 /* --- Types --- */
    307 
    308 #define N_TYPES 11
    309 
    310 static Int type2index ( IRType ty )
    311 {
    312    switch (ty) {
    313       case Ity_I1:      return 0;
    314       case Ity_I8:      return 1;
    315       case Ity_I16:     return 2;
    316       case Ity_I32:     return 3;
    317       case Ity_I64:     return 4;
    318       case Ity_I128:    return 5;
    319       case Ity_F32:     return 6;
    320       case Ity_F64:     return 7;
    321       case Ity_F128:    return 8;
    322       case Ity_V128:    return 9;
    323       case Ity_V256:    return 10;
    324       default: tl_assert(0);
    325    }
    326 }
    327 
    328 static const HChar* nameOfTypeIndex ( Int i )
    329 {
    330    switch (i) {
    331       case 0: return "I1";   break;
    332       case 1: return "I8";   break;
    333       case 2: return "I16";  break;
    334       case 3: return "I32";  break;
    335       case 4: return "I64";  break;
    336       case 5: return "I128"; break;
    337       case 6: return "F32";  break;
    338       case 7: return "F64";  break;
    339       case 8: return "F128";  break;
    340       case 9: return "V128"; break;
    341       case 10: return "V256"; break;
    342       default: tl_assert(0);
    343    }
    344 }
    345 
    346 
    347 /* --- Counts --- */
    348 
    349 static ULong detailCounts[N_OPS][N_TYPES];
    350 
    351 /* The helper that is called from the instrumented code. */
    352 static VG_REGPARM(1)
    353 void increment_detail(ULong* detail)
    354 {
    355    (*detail)++;
    356 }
    357 
    358 /* A helper that adds the instrumentation for a detail.  guard ::
    359    Ity_I1 is the guarding condition for the event.  If NULL it is
    360    assumed to mean "always True". */
    361 static void instrument_detail(IRSB* sb, Op op, IRType type, IRAtom* guard)
    362 {
    363    IRDirty* di;
    364    IRExpr** argv;
    365    const UInt typeIx = type2index(type);
    366 
    367    tl_assert(op < N_OPS);
    368    tl_assert(typeIx < N_TYPES);
    369 
    370    argv = mkIRExprVec_1( mkIRExpr_HWord( (HWord)&detailCounts[op][typeIx] ) );
    371    di = unsafeIRDirty_0_N( 1, "increment_detail",
    372                               VG_(fnptr_to_fnentry)( &increment_detail ),
    373                               argv);
    374    if (guard) di->guard = guard;
    375    addStmtToIRSB( sb, IRStmt_Dirty(di) );
    376 }
    377 
    378 /* Summarize and print the details. */
    379 static void print_details ( void )
    380 {
    381    Int typeIx;
    382    VG_(umsg)("   Type        Loads       Stores       AluOps\n");
    383    VG_(umsg)("   -------------------------------------------\n");
    384    for (typeIx = 0; typeIx < N_TYPES; typeIx++) {
    385       VG_(umsg)("   %4s %'12llu %'12llu %'12llu\n",
    386                 nameOfTypeIndex( typeIx ),
    387                 detailCounts[OpLoad ][typeIx],
    388                 detailCounts[OpStore][typeIx],
    389                 detailCounts[OpAlu  ][typeIx]
    390       );
    391    }
    392 }
    393 
    394 
    395 /*------------------------------------------------------------*/
    396 /*--- Stuff for --trace-mem                                ---*/
    397 /*------------------------------------------------------------*/
    398 
    399 #define MAX_DSIZE    512
    400 
    401 typedef
    402    enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm }
    403    EventKind;
    404 
    405 typedef
    406    struct {
    407       EventKind  ekind;
    408       IRAtom*    addr;
    409       Int        size;
    410       IRAtom*    guard; /* :: Ity_I1, or NULL=="always True" */
    411    }
    412    Event;
    413 
    414 /* Up to this many unnotified events are allowed.  Must be at least two,
    415    so that reads and writes to the same address can be merged into a modify.
    416    Beyond that, larger numbers just potentially induce more spilling due to
    417    extending live ranges of address temporaries. */
    418 #define N_EVENTS 4
    419 
    420 /* Maintain an ordered list of memory events which are outstanding, in
    421    the sense that no IR has yet been generated to do the relevant
    422    helper calls.  The SB is scanned top to bottom and memory events
    423    are added to the end of the list, merging with the most recent
    424    notified event where possible (Dw immediately following Dr and
    425    having the same size and EA can be merged).
    426 
    427    This merging is done so that for architectures which have
    428    load-op-store instructions (x86, amd64), the instr is treated as if
    429    it makes just one memory reference (a modify), rather than two (a
    430    read followed by a write at the same address).
    431 
    432    At various points the list will need to be flushed, that is, IR
    433    generated from it.  That must happen before any possible exit from
    434    the block (the end, or an IRStmt_Exit).  Flushing also takes place
    435    when there is no space to add a new event, and before entering a
    436    RMW (read-modify-write) section on processors supporting LL/SC.
    437 
    438    If we require the simulation statistics to be up to date with
    439    respect to possible memory exceptions, then the list would have to
    440    be flushed before each memory reference.  That's a pain so we don't
    441    bother.
    442 
    443    Flushing the list consists of walking it start to end and emitting
    444    instrumentation IR for each event, in the order in which they
    445    appear. */
    446 
    447 static Event events[N_EVENTS];
    448 static Int   events_used = 0;
    449 
    450 
    451 static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size)
    452 {
    453    VG_(printf)("I  %08lx,%lu\n", addr, size);
    454 }
    455 
    456 static VG_REGPARM(2) void trace_load(Addr addr, SizeT size)
    457 {
    458    VG_(printf)(" L %08lx,%lu\n", addr, size);
    459 }
    460 
    461 static VG_REGPARM(2) void trace_store(Addr addr, SizeT size)
    462 {
    463    VG_(printf)(" S %08lx,%lu\n", addr, size);
    464 }
    465 
    466 static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size)
    467 {
    468    VG_(printf)(" M %08lx,%lu\n", addr, size);
    469 }
    470 
    471 
    472 static void flushEvents(IRSB* sb)
    473 {
    474    Int        i;
    475    const HChar* helperName;
    476    void*      helperAddr;
    477    IRExpr**   argv;
    478    IRDirty*   di;
    479    Event*     ev;
    480 
    481    for (i = 0; i < events_used; i++) {
    482 
    483       ev = &events[i];
    484 
    485       // Decide on helper fn to call and args to pass it.
    486       switch (ev->ekind) {
    487          case Event_Ir: helperName = "trace_instr";
    488                         helperAddr =  trace_instr;  break;
    489 
    490          case Event_Dr: helperName = "trace_load";
    491                         helperAddr =  trace_load;   break;
    492 
    493          case Event_Dw: helperName = "trace_store";
    494                         helperAddr =  trace_store;  break;
    495 
    496          case Event_Dm: helperName = "trace_modify";
    497                         helperAddr =  trace_modify; break;
    498          default:
    499             tl_assert(0);
    500       }
    501 
    502       // Add the helper.
    503       argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) );
    504       di   = unsafeIRDirty_0_N( /*regparms*/2,
    505                                 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
    506                                 argv );
    507       if (ev->guard) {
    508          di->guard = ev->guard;
    509       }
    510       addStmtToIRSB( sb, IRStmt_Dirty(di) );
    511    }
    512 
    513    events_used = 0;
    514 }
    515 
    516 // WARNING:  If you aren't interested in instruction reads, you can omit the
    517 // code that adds calls to trace_instr() in flushEvents().  However, you
    518 // must still call this function, addEvent_Ir() -- it is necessary to add
    519 // the Ir events to the events list so that merging of paired load/store
    520 // events into modify events works correctly.
    521 static void addEvent_Ir ( IRSB* sb, IRAtom* iaddr, UInt isize )
    522 {
    523    Event* evt;
    524    tl_assert(clo_trace_mem);
    525    tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
    526             || VG_CLREQ_SZB == isize );
    527    if (events_used == N_EVENTS)
    528       flushEvents(sb);
    529    tl_assert(events_used >= 0 && events_used < N_EVENTS);
    530    evt = &events[events_used];
    531    evt->ekind = Event_Ir;
    532    evt->addr  = iaddr;
    533    evt->size  = isize;
    534    evt->guard = NULL;
    535    events_used++;
    536 }
    537 
    538 /* Add a guarded read event. */
    539 static
    540 void addEvent_Dr_guarded ( IRSB* sb, IRAtom* daddr, Int dsize, IRAtom* guard )
    541 {
    542    Event* evt;
    543    tl_assert(clo_trace_mem);
    544    tl_assert(isIRAtom(daddr));
    545    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
    546    if (events_used == N_EVENTS)
    547       flushEvents(sb);
    548    tl_assert(events_used >= 0 && events_used < N_EVENTS);
    549    evt = &events[events_used];
    550    evt->ekind = Event_Dr;
    551    evt->addr  = daddr;
    552    evt->size  = dsize;
    553    evt->guard = guard;
    554    events_used++;
    555 }
    556 
    557 /* Add an ordinary read event, by adding a guarded read event with an
    558    always-true guard. */
    559 static
    560 void addEvent_Dr ( IRSB* sb, IRAtom* daddr, Int dsize )
    561 {
    562    addEvent_Dr_guarded(sb, daddr, dsize, NULL);
    563 }
    564 
    565 /* Add a guarded write event. */
    566 static
    567 void addEvent_Dw_guarded ( IRSB* sb, IRAtom* daddr, Int dsize, IRAtom* guard )
    568 {
    569    Event* evt;
    570    tl_assert(clo_trace_mem);
    571    tl_assert(isIRAtom(daddr));
    572    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
    573    if (events_used == N_EVENTS)
    574       flushEvents(sb);
    575    tl_assert(events_used >= 0 && events_used < N_EVENTS);
    576    evt = &events[events_used];
    577    evt->ekind = Event_Dw;
    578    evt->addr  = daddr;
    579    evt->size  = dsize;
    580    evt->guard = guard;
    581    events_used++;
    582 }
    583 
    584 /* Add an ordinary write event.  Try to merge it with an immediately
    585    preceding ordinary read event of the same size to the same
    586    address. */
    587 static
    588 void addEvent_Dw ( IRSB* sb, IRAtom* daddr, Int dsize )
    589 {
    590    Event* lastEvt;
    591    Event* evt;
    592    tl_assert(clo_trace_mem);
    593    tl_assert(isIRAtom(daddr));
    594    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
    595 
    596    // Is it possible to merge this write with the preceding read?
    597    lastEvt = &events[events_used-1];
    598    if (events_used > 0
    599        && lastEvt->ekind == Event_Dr
    600        && lastEvt->size  == dsize
    601        && lastEvt->guard == NULL
    602        && eqIRAtom(lastEvt->addr, daddr))
    603    {
    604       lastEvt->ekind = Event_Dm;
    605       return;
    606    }
    607 
    608    // No.  Add as normal.
    609    if (events_used == N_EVENTS)
    610       flushEvents(sb);
    611    tl_assert(events_used >= 0 && events_used < N_EVENTS);
    612    evt = &events[events_used];
    613    evt->ekind = Event_Dw;
    614    evt->size  = dsize;
    615    evt->addr  = daddr;
    616    evt->guard = NULL;
    617    events_used++;
    618 }
    619 
    620 
    621 /*------------------------------------------------------------*/
    622 /*--- Stuff for --trace-superblocks                        ---*/
    623 /*------------------------------------------------------------*/
    624 
    625 static void trace_superblock(Addr addr)
    626 {
    627    VG_(printf)("SB %08lx\n", addr);
    628 }
    629 
    630 
    631 /*------------------------------------------------------------*/
    632 /*--- Basic tool functions                                 ---*/
    633 /*------------------------------------------------------------*/
    634 
    635 static void lk_post_clo_init(void)
    636 {
    637    Int op, tyIx;
    638 
    639    if (clo_detailed_counts) {
    640       for (op = 0; op < N_OPS; op++)
    641          for (tyIx = 0; tyIx < N_TYPES; tyIx++)
    642             detailCounts[op][tyIx] = 0;
    643    }
    644 }
    645 
    646 static
    647 IRSB* lk_instrument ( VgCallbackClosure* closure,
    648                       IRSB* sbIn,
    649                       VexGuestLayout* layout,
    650                       VexGuestExtents* vge,
    651                       VexArchInfo* archinfo_host,
    652                       IRType gWordTy, IRType hWordTy )
    653 {
    654    IRDirty*   di;
    655    Int        i;
    656    IRSB*      sbOut;
    657    HChar      fnname[100];
    658    IRTypeEnv* tyenv = sbIn->tyenv;
    659    Addr       iaddr = 0, dst;
    660    UInt       ilen = 0;
    661    Bool       condition_inverted = False;
    662 
    663    if (gWordTy != hWordTy) {
    664       /* We don't currently support this case. */
    665       VG_(tool_panic)("host/guest word size mismatch");
    666    }
    667 
    668    /* Set up SB */
    669    sbOut = deepCopyIRSBExceptStmts(sbIn);
    670 
    671    // Copy verbatim any IR preamble preceding the first IMark
    672    i = 0;
    673    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
    674       addStmtToIRSB( sbOut, sbIn->stmts[i] );
    675       i++;
    676    }
    677 
    678    if (clo_basic_counts) {
    679       /* Count this superblock. */
    680       di = unsafeIRDirty_0_N( 0, "add_one_SB_entered",
    681                                  VG_(fnptr_to_fnentry)( &add_one_SB_entered ),
    682                                  mkIRExprVec_0() );
    683       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    684    }
    685 
    686    if (clo_trace_sbs) {
    687       /* Print this superblock's address. */
    688       di = unsafeIRDirty_0_N(
    689               0, "trace_superblock",
    690               VG_(fnptr_to_fnentry)( &trace_superblock ),
    691               mkIRExprVec_1( mkIRExpr_HWord( vge->base[0] ) )
    692            );
    693       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    694    }
    695 
    696    if (clo_trace_mem) {
    697       events_used = 0;
    698    }
    699 
    700    for (/*use current i*/; i < sbIn->stmts_used; i++) {
    701       IRStmt* st = sbIn->stmts[i];
    702       if (!st || st->tag == Ist_NoOp) continue;
    703 
    704       if (clo_basic_counts) {
    705          /* Count one VEX statement. */
    706          di = unsafeIRDirty_0_N( 0, "add_one_IRStmt",
    707                                     VG_(fnptr_to_fnentry)( &add_one_IRStmt ),
    708                                     mkIRExprVec_0() );
    709          addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    710       }
    711 
    712       switch (st->tag) {
    713          case Ist_NoOp:
    714          case Ist_AbiHint:
    715          case Ist_Put:
    716          case Ist_PutI:
    717          case Ist_MBE:
    718             addStmtToIRSB( sbOut, st );
    719             break;
    720 
    721          case Ist_IMark:
    722             if (clo_basic_counts) {
    723                /* Needed to be able to check for inverted condition in Ist_Exit */
    724                iaddr = st->Ist.IMark.addr;
    725                ilen  = st->Ist.IMark.len;
    726 
    727                /* Count guest instruction. */
    728                di = unsafeIRDirty_0_N( 0, "add_one_guest_instr",
    729                                           VG_(fnptr_to_fnentry)( &add_one_guest_instr ),
    730                                           mkIRExprVec_0() );
    731                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    732 
    733                /* An unconditional branch to a known destination in the
    734                 * guest's instructions can be represented, in the IRSB to
    735                 * instrument, by the VEX statements that are the
    736                 * translation of that known destination. This feature is
    737                 * called 'SB chasing' and can be influenced by command
    738                 * line option --vex-guest-chase-thresh.
    739                 *
    740                 * To get an accurate count of the calls to a specific
    741                 * function, taking SB chasing into account, we need to
    742                 * check for each guest instruction (Ist_IMark) if it is
    743                 * the entry point of a function.
    744                 */
    745                tl_assert(clo_fnname);
    746                tl_assert(clo_fnname[0]);
    747                if (VG_(get_fnname_if_entry)(st->Ist.IMark.addr,
    748                                             fnname, sizeof(fnname))
    749                    && 0 == VG_(strcmp)(fnname, clo_fnname)) {
    750                   di = unsafeIRDirty_0_N(
    751                           0, "add_one_func_call",
    752                              VG_(fnptr_to_fnentry)( &add_one_func_call ),
    753                              mkIRExprVec_0() );
    754                   addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    755                }
    756             }
    757             if (clo_trace_mem) {
    758                // WARNING: do not remove this function call, even if you
    759                // aren't interested in instruction reads.  See the comment
    760                // above the function itself for more detail.
    761                addEvent_Ir( sbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ),
    762                             st->Ist.IMark.len );
    763             }
    764             addStmtToIRSB( sbOut, st );
    765             break;
    766 
    767          case Ist_WrTmp:
    768             // Add a call to trace_load() if --trace-mem=yes.
    769             if (clo_trace_mem) {
    770                IRExpr* data = st->Ist.WrTmp.data;
    771                if (data->tag == Iex_Load) {
    772                   addEvent_Dr( sbOut, data->Iex.Load.addr,
    773                                sizeofIRType(data->Iex.Load.ty) );
    774                }
    775             }
    776             if (clo_detailed_counts) {
    777                IRExpr* expr = st->Ist.WrTmp.data;
    778                IRType  type = typeOfIRExpr(sbOut->tyenv, expr);
    779                tl_assert(type != Ity_INVALID);
    780                switch (expr->tag) {
    781                   case Iex_Load:
    782                     instrument_detail( sbOut, OpLoad, type, NULL/*guard*/ );
    783                      break;
    784                   case Iex_Unop:
    785                   case Iex_Binop:
    786                   case Iex_Triop:
    787                   case Iex_Qop:
    788                   case Iex_ITE:
    789                      instrument_detail( sbOut, OpAlu, type, NULL/*guard*/ );
    790                      break;
    791                   default:
    792                      break;
    793                }
    794             }
    795             addStmtToIRSB( sbOut, st );
    796             break;
    797 
    798          case Ist_Store: {
    799             IRExpr* data = st->Ist.Store.data;
    800             IRType  type = typeOfIRExpr(tyenv, data);
    801             tl_assert(type != Ity_INVALID);
    802             if (clo_trace_mem) {
    803                addEvent_Dw( sbOut, st->Ist.Store.addr,
    804                             sizeofIRType(type) );
    805             }
    806             if (clo_detailed_counts) {
    807                instrument_detail( sbOut, OpStore, type, NULL/*guard*/ );
    808             }
    809             addStmtToIRSB( sbOut, st );
    810             break;
    811          }
    812 
    813          case Ist_StoreG: {
    814             IRStoreG* sg   = st->Ist.StoreG.details;
    815             IRExpr*   data = sg->data;
    816             IRType    type = typeOfIRExpr(tyenv, data);
    817             tl_assert(type != Ity_INVALID);
    818             if (clo_trace_mem) {
    819                addEvent_Dw_guarded( sbOut, sg->addr,
    820                                     sizeofIRType(type), sg->guard );
    821             }
    822             if (clo_detailed_counts) {
    823                instrument_detail( sbOut, OpStore, type, sg->guard );
    824             }
    825             addStmtToIRSB( sbOut, st );
    826             break;
    827          }
    828 
    829          case Ist_LoadG: {
    830             IRLoadG* lg       = st->Ist.LoadG.details;
    831             IRType   type     = Ity_INVALID; /* loaded type */
    832             IRType   typeWide = Ity_INVALID; /* after implicit widening */
    833             typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
    834             tl_assert(type != Ity_INVALID);
    835             if (clo_trace_mem) {
    836                addEvent_Dr_guarded( sbOut, lg->addr,
    837                                     sizeofIRType(type), lg->guard );
    838             }
    839             if (clo_detailed_counts) {
    840                instrument_detail( sbOut, OpLoad, type, lg->guard );
    841             }
    842             addStmtToIRSB( sbOut, st );
    843             break;
    844          }
    845 
    846          case Ist_Dirty: {
    847             if (clo_trace_mem) {
    848                Int      dsize;
    849                IRDirty* d = st->Ist.Dirty.details;
    850                if (d->mFx != Ifx_None) {
    851                   // This dirty helper accesses memory.  Collect the details.
    852                   tl_assert(d->mAddr != NULL);
    853                   tl_assert(d->mSize != 0);
    854                   dsize = d->mSize;
    855                   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
    856                      addEvent_Dr( sbOut, d->mAddr, dsize );
    857                   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
    858                      addEvent_Dw( sbOut, d->mAddr, dsize );
    859                } else {
    860                   tl_assert(d->mAddr == NULL);
    861                   tl_assert(d->mSize == 0);
    862                }
    863             }
    864             addStmtToIRSB( sbOut, st );
    865             break;
    866          }
    867 
    868          case Ist_CAS: {
    869             /* We treat it as a read and a write of the location.  I
    870                think that is the same behaviour as it was before IRCAS
    871                was introduced, since prior to that point, the Vex
    872                front ends would translate a lock-prefixed instruction
    873                into a (normal) read followed by a (normal) write. */
    874             Int    dataSize;
    875             IRType dataTy;
    876             IRCAS* cas = st->Ist.CAS.details;
    877             tl_assert(cas->addr != NULL);
    878             tl_assert(cas->dataLo != NULL);
    879             dataTy   = typeOfIRExpr(tyenv, cas->dataLo);
    880             dataSize = sizeofIRType(dataTy);
    881             if (cas->dataHi != NULL)
    882                dataSize *= 2; /* since it's a doubleword-CAS */
    883             if (clo_trace_mem) {
    884                addEvent_Dr( sbOut, cas->addr, dataSize );
    885                addEvent_Dw( sbOut, cas->addr, dataSize );
    886             }
    887             if (clo_detailed_counts) {
    888                instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
    889                if (cas->dataHi != NULL) /* dcas */
    890                   instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
    891                instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
    892                if (cas->dataHi != NULL) /* dcas */
    893                   instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
    894             }
    895             addStmtToIRSB( sbOut, st );
    896             break;
    897          }
    898 
    899          case Ist_LLSC: {
    900             IRType dataTy;
    901             if (st->Ist.LLSC.storedata == NULL) {
    902                /* LL */
    903                dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
    904                if (clo_trace_mem) {
    905                   addEvent_Dr( sbOut, st->Ist.LLSC.addr,
    906                                       sizeofIRType(dataTy) );
    907                   /* flush events before LL, helps SC to succeed */
    908                   flushEvents(sbOut);
    909 	       }
    910                if (clo_detailed_counts)
    911                   instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
    912             } else {
    913                /* SC */
    914                dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
    915                if (clo_trace_mem)
    916                   addEvent_Dw( sbOut, st->Ist.LLSC.addr,
    917                                       sizeofIRType(dataTy) );
    918                if (clo_detailed_counts)
    919                   instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
    920             }
    921             addStmtToIRSB( sbOut, st );
    922             break;
    923          }
    924 
    925          case Ist_Exit:
    926             if (clo_basic_counts) {
    927                // The condition of a branch was inverted by VEX if a taken
    928                // branch is in fact a fall trough according to client address
    929                tl_assert(iaddr != 0);
    930                dst = (sizeof(Addr) == 4) ? st->Ist.Exit.dst->Ico.U32 :
    931                                            st->Ist.Exit.dst->Ico.U64;
    932                condition_inverted = (dst == iaddr + ilen);
    933 
    934                /* Count Jcc */
    935                if (!condition_inverted)
    936                   di = unsafeIRDirty_0_N( 0, "add_one_Jcc",
    937                                           VG_(fnptr_to_fnentry)( &add_one_Jcc ),
    938                                           mkIRExprVec_0() );
    939                else
    940                   di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc",
    941                                           VG_(fnptr_to_fnentry)(
    942                                              &add_one_inverted_Jcc ),
    943                                           mkIRExprVec_0() );
    944 
    945                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    946             }
    947             if (clo_trace_mem) {
    948                flushEvents(sbOut);
    949             }
    950 
    951             addStmtToIRSB( sbOut, st );      // Original statement
    952 
    953             if (clo_basic_counts) {
    954                /* Count non-taken Jcc */
    955                if (!condition_inverted)
    956                   di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken",
    957                                           VG_(fnptr_to_fnentry)(
    958                                              &add_one_Jcc_untaken ),
    959                                           mkIRExprVec_0() );
    960                else
    961                   di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc_untaken",
    962                                           VG_(fnptr_to_fnentry)(
    963                                              &add_one_inverted_Jcc_untaken ),
    964                                           mkIRExprVec_0() );
    965 
    966                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    967             }
    968             break;
    969 
    970          default:
    971             ppIRStmt(st);
    972             tl_assert(0);
    973       }
    974    }
    975 
    976    if (clo_basic_counts) {
    977       /* Count this basic block. */
    978       di = unsafeIRDirty_0_N( 0, "add_one_SB_completed",
    979                                  VG_(fnptr_to_fnentry)( &add_one_SB_completed ),
    980                                  mkIRExprVec_0() );
    981       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    982    }
    983 
    984    if (clo_trace_mem) {
    985       /* At the end of the sbIn.  Flush outstandings. */
    986       flushEvents(sbOut);
    987    }
    988 
    989    return sbOut;
    990 }
    991 
    992 static void lk_fini(Int exitcode)
    993 {
    994    HChar percentify_buf[5]; /* Two digits, '%' and 0. */
    995    const int percentify_size = sizeof(percentify_buf) - 1;
    996    const int percentify_decs = 0;
    997 
    998    tl_assert(clo_fnname);
    999    tl_assert(clo_fnname[0]);
   1000 
   1001    if (clo_basic_counts) {
   1002       ULong total_Jccs = n_Jccs + n_IJccs;
   1003       ULong taken_Jccs = (n_Jccs - n_Jccs_untaken) + n_IJccs_untaken;
   1004 
   1005       VG_(umsg)("Counted %'llu call%s to %s()\n",
   1006                 n_func_calls, ( n_func_calls==1 ? "" : "s" ), clo_fnname);
   1007 
   1008       VG_(umsg)("\n");
   1009       VG_(umsg)("Jccs:\n");
   1010       VG_(umsg)("  total:         %'llu\n", total_Jccs);
   1011       VG_(percentify)(taken_Jccs, (total_Jccs ? total_Jccs : 1),
   1012          percentify_decs, percentify_size, percentify_buf);
   1013       VG_(umsg)("  taken:         %'llu (%s)\n",
   1014          taken_Jccs, percentify_buf);
   1015 
   1016       VG_(umsg)("\n");
   1017       VG_(umsg)("Executed:\n");
   1018       VG_(umsg)("  SBs entered:   %'llu\n", n_SBs_entered);
   1019       VG_(umsg)("  SBs completed: %'llu\n", n_SBs_completed);
   1020       VG_(umsg)("  guest instrs:  %'llu\n", n_guest_instrs);
   1021       VG_(umsg)("  IRStmts:       %'llu\n", n_IRStmts);
   1022 
   1023       VG_(umsg)("\n");
   1024       VG_(umsg)("Ratios:\n");
   1025       tl_assert(n_SBs_entered); // Paranoia time.
   1026       VG_(umsg)("  guest instrs : SB entered  = %'llu : 10\n",
   1027          10 * n_guest_instrs / n_SBs_entered);
   1028       VG_(umsg)("       IRStmts : SB entered  = %'llu : 10\n",
   1029          10 * n_IRStmts / n_SBs_entered);
   1030       tl_assert(n_guest_instrs); // Paranoia time.
   1031       VG_(umsg)("       IRStmts : guest instr = %'llu : 10\n",
   1032          10 * n_IRStmts / n_guest_instrs);
   1033    }
   1034 
   1035    if (clo_detailed_counts) {
   1036       VG_(umsg)("\n");
   1037       VG_(umsg)("IR-level counts by type:\n");
   1038       print_details();
   1039    }
   1040 
   1041    if (clo_basic_counts) {
   1042       VG_(umsg)("\n");
   1043       VG_(umsg)("Exit code:       %d\n", exitcode);
   1044    }
   1045 }
   1046 
   1047 static void lk_pre_clo_init(void)
   1048 {
   1049    VG_(details_name)            ("Lackey");
   1050    VG_(details_version)         (NULL);
   1051    VG_(details_description)     ("an example Valgrind tool");
   1052    VG_(details_copyright_author)(
   1053       "Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote.");
   1054    VG_(details_bug_reports_to)  (VG_BUGS_TO);
   1055    VG_(details_avg_translation_sizeB) ( 200 );
   1056 
   1057    VG_(basic_tool_funcs)          (lk_post_clo_init,
   1058                                    lk_instrument,
   1059                                    lk_fini);
   1060    VG_(needs_command_line_options)(lk_process_cmd_line_option,
   1061                                    lk_print_usage,
   1062                                    lk_print_debug_usage);
   1063 }
   1064 
   1065 VG_DETERMINE_INTERFACE_VERSION(lk_pre_clo_init)
   1066 
   1067 /*--------------------------------------------------------------------*/
   1068 /*--- end                                                lk_main.c ---*/
   1069 /*--------------------------------------------------------------------*/
   1070