Home | History | Annotate | Download | only in lackey
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- An example Valgrind tool.                          lk_main.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Lackey, an example Valgrind tool that does
      8    some simple program measurement and tracing.
      9 
     10    Copyright (C) 2002-2017 Nicholas Nethercote
     11       njn (at) valgrind.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 // This tool shows how to do some basic instrumentation.
     32 //
     33 // There are four kinds of instrumentation it can do.  They can be turned
     34 // on/off independently with command line options:
     35 //
     36 // * --basic-counts   : do basic counts, eg. number of instructions
     37 //                      executed, jumps executed, etc.
     38 // * --detailed-counts: do more detailed counts:  number of loads, stores
     39 //                      and ALU operations of different sizes.
     40 // * --trace-mem=yes:   trace all (data) memory accesses.
     41 // * --trace-superblocks=yes:
     42 //                      trace all superblock entries.  Mostly of interest
     43 //                      to the Valgrind developers.
     44 //
     45 // The code for each kind of instrumentation is guarded by a clo_* variable:
     46 // clo_basic_counts, clo_detailed_counts, clo_trace_mem and clo_trace_sbs.
     47 //
     48 // If you want to modify any of the instrumentation code, look for the code
     49 // that is guarded by the relevant clo_* variable (eg. clo_trace_mem)
     50 // If you're not interested in the other kinds of instrumentation you can
     51 // remove them.  If you want to do more complex modifications, please read
     52 // VEX/pub/libvex_ir.h to understand the intermediate representation.
     53 //
     54 //
     55 // Specific Details about --trace-mem=yes
     56 // --------------------------------------
     57 // Lackey's --trace-mem code is a good starting point for building Valgrind
     58 // tools that act on memory loads and stores.  It also could be used as is,
     59 // with its output used as input to a post-mortem processing step.  However,
     60 // because memory traces can be very large, online analysis is generally
     61 // better.
     62 //
     63 // It prints memory data access traces that look like this:
     64 //
     65 //   I  0023C790,2  # instruction read at 0x0023C790 of size 2
     66 //   I  0023C792,5
     67 //    S BE80199C,4  # data store at 0xBE80199C of size 4
     68 //   I  0025242B,3
     69 //    L BE801950,4  # data load at 0xBE801950 of size 4
     70 //   I  0023D476,7
     71 //    M 0025747C,1  # data modify at 0x0025747C of size 1
     72 //   I  0023DC20,2
     73 //    L 00254962,1
     74 //    L BE801FB3,1
     75 //   I  00252305,1
     76 //    L 00254AEB,1
     77 //    S 00257998,1
     78 //
     79 // Every instruction executed has an "instr" event representing it.
     80 // Instructions that do memory accesses are followed by one or more "load",
     81 // "store" or "modify" events.  Some instructions do more than one load or
     82 // store, as in the last two examples in the above trace.
     83 //
     84 // Here are some examples of x86 instructions that do different combinations
     85 // of loads, stores, and modifies.
     86 //
     87 //    Instruction          Memory accesses                  Event sequence
     88 //    -----------          ---------------                  --------------
     89 //    add %eax, %ebx       No loads or stores               instr
     90 //
     91 //    movl (%eax), %ebx    loads (%eax)                     instr, load
     92 //
     93 //    movl %eax, (%ebx)    stores (%ebx)                    instr, store
     94 //
     95 //    incl (%ecx)          modifies (%ecx)                  instr, modify
     96 //
     97 //    cmpsb                loads (%esi), loads(%edi)        instr, load, load
     98 //
     99 //    call*l (%edx)        loads (%edx), stores -4(%esp)    instr, load, store
    100 //    pushl (%edx)         loads (%edx), stores -4(%esp)    instr, load, store
    101 //    movsw                loads (%esi), stores (%edi)      instr, load, store
    102 //
    103 // Instructions using x86 "rep" prefixes are traced as if they are repeated
    104 // N times.
    105 //
    106 // Lackey with --trace-mem gives good traces, but they are not perfect, for
    107 // the following reasons:
    108 //
    109 // - It does not trace into the OS kernel, so system calls and other kernel
    110 //   operations (eg. some scheduling and signal handling code) are ignored.
    111 //
    112 // - It could model loads and stores done at the system call boundary using
    113 //   the pre_mem_read/post_mem_write events.  For example, if you call
    114 //   fstat() you know that the passed in buffer has been written.  But it
    115 //   currently does not do this.
    116 //
    117 // - Valgrind replaces some code (not much) with its own, notably parts of
    118 //   code for scheduling operations and signal handling.  This code is not
    119 //   traced.
    120 //
    121 // - There is no consideration of virtual-to-physical address mapping.
    122 //   This may not matter for many purposes.
    123 //
    124 // - Valgrind modifies the instruction stream in some very minor ways.  For
    125 //   example, on x86 the bts, btc, btr instructions are incorrectly
    126 //   considered to always touch memory (this is a consequence of these
    127 //   instructions being very difficult to simulate).
    128 //
    129 // - Valgrind tools layout memory differently to normal programs, so the
    130 //   addresses you get will not be typical.  Thus Lackey (and all Valgrind
    131 //   tools) is suitable for getting relative memory traces -- eg. if you
    132 //   want to analyse locality of memory accesses -- but is not good if
    133 //   absolute addresses are important.
    134 //
    135 // Despite all these warnings, Lackey's results should be good enough for a
    136 // wide range of purposes.  For example, Cachegrind shares all the above
    137 // shortcomings and it is still useful.
    138 //
    139 // For further inspiration, you should look at cachegrind/cg_main.c which
    140 // uses the same basic technique for tracing memory accesses, but also groups
    141 // events together for processing into twos and threes so that fewer C calls
    142 // are made and things run faster.
    143 //
    144 // Specific Details about --trace-superblocks=yes
    145 // ----------------------------------------------
    146 // Valgrind splits code up into single entry, multiple exit blocks
    147 // known as superblocks.  By itself, --trace-superblocks=yes just
    148 // prints a message as each superblock is run:
    149 //
    150 //  SB 04013170
    151 //  SB 04013177
    152 //  SB 04013173
    153 //  SB 04013177
    154 //
    155 // The hex number is the address of the first instruction in the
    156 // superblock.  You can see the relationship more obviously if you use
    157 // --trace-superblocks=yes and --trace-mem=yes together.  Then a "SB"
    158 // message at address X is immediately followed by an "instr:" message
    159 // for that address, as the first instruction in the block is
    160 // executed, for example:
    161 //
    162 //  SB 04014073
    163 //  I  04014073,3
    164 //   L 7FEFFF7F8,8
    165 //  I  04014076,4
    166 //  I  0401407A,3
    167 //  I  0401407D,3
    168 //  I  04014080,3
    169 //  I  04014083,6
    170 
    171 
    172 #include "pub_tool_basics.h"
    173 #include "pub_tool_tooliface.h"
    174 #include "pub_tool_libcassert.h"
    175 #include "pub_tool_libcprint.h"
    176 #include "pub_tool_debuginfo.h"
    177 #include "pub_tool_libcbase.h"
    178 #include "pub_tool_options.h"
    179 #include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
    180 
    181 /*------------------------------------------------------------*/
    182 /*--- Command line options                                 ---*/
    183 /*------------------------------------------------------------*/
    184 
    185 /* Command line options controlling instrumentation kinds, as described at
    186  * the top of this file. */
    187 static Bool clo_basic_counts    = True;
    188 static Bool clo_detailed_counts = False;
    189 static Bool clo_trace_mem       = False;
    190 static Bool clo_trace_sbs       = False;
    191 
    192 /* The name of the function of which the number of calls (under
    193  * --basic-counts=yes) is to be counted, with default. Override with command
    194  * line option --fnname. */
    195 static const HChar* clo_fnname = "main";
    196 
    197 static Bool lk_process_cmd_line_option(const HChar* arg)
    198 {
    199    if VG_STR_CLO(arg, "--fnname", clo_fnname) {}
    200    else if VG_BOOL_CLO(arg, "--basic-counts",      clo_basic_counts) {}
    201    else if VG_BOOL_CLO(arg, "--detailed-counts",   clo_detailed_counts) {}
    202    else if VG_BOOL_CLO(arg, "--trace-mem",         clo_trace_mem) {}
    203    else if VG_BOOL_CLO(arg, "--trace-superblocks", clo_trace_sbs) {}
    204    else
    205       return False;
    206 
    207    tl_assert(clo_fnname);
    208    tl_assert(clo_fnname[0]);
    209    return True;
    210 }
    211 
    212 static void lk_print_usage(void)
    213 {
    214    VG_(printf)(
    215 "    --basic-counts=no|yes     count instructions, jumps, etc. [yes]\n"
    216 "    --detailed-counts=no|yes  count loads, stores and alu ops [no]\n"
    217 "    --trace-mem=no|yes        trace all loads and stores [no]\n"
    218 "    --trace-superblocks=no|yes  trace all superblock entries [no]\n"
    219 "    --fnname=<name>           count calls to <name> (only used if\n"
    220 "                              --basic-count=yes)  [main]\n"
    221    );
    222 }
    223 
    224 static void lk_print_debug_usage(void)
    225 {
    226    VG_(printf)(
    227 "    (none)\n"
    228    );
    229 }
    230 
    231 /*------------------------------------------------------------*/
    232 /*--- Stuff for --basic-counts                             ---*/
    233 /*------------------------------------------------------------*/
    234 
    235 /* Nb: use ULongs because the numbers can get very big */
    236 static ULong n_func_calls    = 0;
    237 static ULong n_SBs_entered   = 0;
    238 static ULong n_SBs_completed = 0;
    239 static ULong n_IRStmts       = 0;
    240 static ULong n_guest_instrs  = 0;
    241 static ULong n_Jccs          = 0;
    242 static ULong n_Jccs_untaken  = 0;
    243 static ULong n_IJccs         = 0;
    244 static ULong n_IJccs_untaken = 0;
    245 
    246 static void add_one_func_call(void)
    247 {
    248    n_func_calls++;
    249 }
    250 
    251 static void add_one_SB_entered(void)
    252 {
    253    n_SBs_entered++;
    254 }
    255 
    256 static void add_one_SB_completed(void)
    257 {
    258    n_SBs_completed++;
    259 }
    260 
    261 static void add_one_IRStmt(void)
    262 {
    263    n_IRStmts++;
    264 }
    265 
    266 static void add_one_guest_instr(void)
    267 {
    268    n_guest_instrs++;
    269 }
    270 
    271 static void add_one_Jcc(void)
    272 {
    273    n_Jccs++;
    274 }
    275 
    276 static void add_one_Jcc_untaken(void)
    277 {
    278    n_Jccs_untaken++;
    279 }
    280 
    281 static void add_one_inverted_Jcc(void)
    282 {
    283    n_IJccs++;
    284 }
    285 
    286 static void add_one_inverted_Jcc_untaken(void)
    287 {
    288    n_IJccs_untaken++;
    289 }
    290 
    291 /*------------------------------------------------------------*/
    292 /*--- Stuff for --detailed-counts                          ---*/
    293 /*------------------------------------------------------------*/
    294 
    295 typedef
    296    IRExpr
    297    IRAtom;
    298 
    299 /* --- Operations --- */
    300 
    301 typedef enum { OpLoad=0, OpStore=1, OpAlu=2 } Op;
    302 
    303 #define N_OPS 3
    304 
    305 
    306 /* --- Types --- */
    307 
    308 #define N_TYPES 14
    309 
    310 static Int type2index ( IRType ty )
    311 {
    312    switch (ty) {
    313       case Ity_I1:      return 0;
    314       case Ity_I8:      return 1;
    315       case Ity_I16:     return 2;
    316       case Ity_I32:     return 3;
    317       case Ity_I64:     return 4;
    318       case Ity_I128:    return 5;
    319       case Ity_F32:     return 6;
    320       case Ity_F64:     return 7;
    321       case Ity_F128:    return 8;
    322       case Ity_V128:    return 9;
    323       case Ity_V256:    return 10;
    324       case Ity_D32:     return 11;
    325       case Ity_D64:     return 12;
    326       case Ity_D128:    return 13;
    327       default: tl_assert(0);
    328    }
    329 }
    330 
    331 static const HChar* nameOfTypeIndex ( Int i )
    332 {
    333    switch (i) {
    334       case 0: return "I1";   break;
    335       case 1: return "I8";   break;
    336       case 2: return "I16";  break;
    337       case 3: return "I32";  break;
    338       case 4: return "I64";  break;
    339       case 5: return "I128"; break;
    340       case 6: return "F32";  break;
    341       case 7: return "F64";  break;
    342       case 8: return "F128";  break;
    343       case 9: return "V128";  break;
    344       case 10: return "V256"; break;
    345       case 11: return "D32";  break;
    346       case 12: return "D64";  break;
    347       case 13: return "D128"; break;
    348       default: tl_assert(0);
    349    }
    350 }
    351 
    352 
    353 /* --- Counts --- */
    354 
    355 static ULong detailCounts[N_OPS][N_TYPES];
    356 
    357 /* The helper that is called from the instrumented code. */
    358 static VG_REGPARM(1)
    359 void increment_detail(ULong* detail)
    360 {
    361    (*detail)++;
    362 }
    363 
    364 /* A helper that adds the instrumentation for a detail.  guard ::
    365    Ity_I1 is the guarding condition for the event.  If NULL it is
    366    assumed to mean "always True". */
    367 static void instrument_detail(IRSB* sb, Op op, IRType type, IRAtom* guard)
    368 {
    369    IRDirty* di;
    370    IRExpr** argv;
    371    const UInt typeIx = type2index(type);
    372 
    373    tl_assert(op < N_OPS);
    374    tl_assert(typeIx < N_TYPES);
    375 
    376    argv = mkIRExprVec_1( mkIRExpr_HWord( (HWord)&detailCounts[op][typeIx] ) );
    377    di = unsafeIRDirty_0_N( 1, "increment_detail",
    378                               VG_(fnptr_to_fnentry)( &increment_detail ),
    379                               argv);
    380    if (guard) di->guard = guard;
    381    addStmtToIRSB( sb, IRStmt_Dirty(di) );
    382 }
    383 
    384 /* Summarize and print the details. */
    385 static void print_details ( void )
    386 {
    387    Int typeIx;
    388    VG_(umsg)("   Type        Loads       Stores       AluOps\n");
    389    VG_(umsg)("   -------------------------------------------\n");
    390    for (typeIx = 0; typeIx < N_TYPES; typeIx++) {
    391       VG_(umsg)("   %-4s %'12llu %'12llu %'12llu\n",
    392                 nameOfTypeIndex( typeIx ),
    393                 detailCounts[OpLoad ][typeIx],
    394                 detailCounts[OpStore][typeIx],
    395                 detailCounts[OpAlu  ][typeIx]
    396       );
    397    }
    398 }
    399 
    400 
    401 /*------------------------------------------------------------*/
    402 /*--- Stuff for --trace-mem                                ---*/
    403 /*------------------------------------------------------------*/
    404 
    405 #define MAX_DSIZE    512
    406 
    407 typedef
    408    enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm }
    409    EventKind;
    410 
    411 typedef
    412    struct {
    413       EventKind  ekind;
    414       IRAtom*    addr;
    415       Int        size;
    416       IRAtom*    guard; /* :: Ity_I1, or NULL=="always True" */
    417    }
    418    Event;
    419 
    420 /* Up to this many unnotified events are allowed.  Must be at least two,
    421    so that reads and writes to the same address can be merged into a modify.
    422    Beyond that, larger numbers just potentially induce more spilling due to
    423    extending live ranges of address temporaries. */
    424 #define N_EVENTS 4
    425 
    426 /* Maintain an ordered list of memory events which are outstanding, in
    427    the sense that no IR has yet been generated to do the relevant
    428    helper calls.  The SB is scanned top to bottom and memory events
    429    are added to the end of the list, merging with the most recent
    430    notified event where possible (Dw immediately following Dr and
    431    having the same size and EA can be merged).
    432 
    433    This merging is done so that for architectures which have
    434    load-op-store instructions (x86, amd64), the instr is treated as if
    435    it makes just one memory reference (a modify), rather than two (a
    436    read followed by a write at the same address).
    437 
    438    At various points the list will need to be flushed, that is, IR
    439    generated from it.  That must happen before any possible exit from
    440    the block (the end, or an IRStmt_Exit).  Flushing also takes place
    441    when there is no space to add a new event, and before entering a
    442    RMW (read-modify-write) section on processors supporting LL/SC.
    443 
    444    If we require the simulation statistics to be up to date with
    445    respect to possible memory exceptions, then the list would have to
    446    be flushed before each memory reference.  That's a pain so we don't
    447    bother.
    448 
    449    Flushing the list consists of walking it start to end and emitting
    450    instrumentation IR for each event, in the order in which they
    451    appear. */
    452 
    453 static Event events[N_EVENTS];
    454 static Int   events_used = 0;
    455 
    456 
    457 static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size)
    458 {
    459    VG_(printf)("I  %08lx,%lu\n", addr, size);
    460 }
    461 
    462 static VG_REGPARM(2) void trace_load(Addr addr, SizeT size)
    463 {
    464    VG_(printf)(" L %08lx,%lu\n", addr, size);
    465 }
    466 
    467 static VG_REGPARM(2) void trace_store(Addr addr, SizeT size)
    468 {
    469    VG_(printf)(" S %08lx,%lu\n", addr, size);
    470 }
    471 
    472 static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size)
    473 {
    474    VG_(printf)(" M %08lx,%lu\n", addr, size);
    475 }
    476 
    477 
    478 static void flushEvents(IRSB* sb)
    479 {
    480    Int        i;
    481    const HChar* helperName;
    482    void*      helperAddr;
    483    IRExpr**   argv;
    484    IRDirty*   di;
    485    Event*     ev;
    486 
    487    for (i = 0; i < events_used; i++) {
    488 
    489       ev = &events[i];
    490 
    491       // Decide on helper fn to call and args to pass it.
    492       switch (ev->ekind) {
    493          case Event_Ir: helperName = "trace_instr";
    494                         helperAddr =  trace_instr;  break;
    495 
    496          case Event_Dr: helperName = "trace_load";
    497                         helperAddr =  trace_load;   break;
    498 
    499          case Event_Dw: helperName = "trace_store";
    500                         helperAddr =  trace_store;  break;
    501 
    502          case Event_Dm: helperName = "trace_modify";
    503                         helperAddr =  trace_modify; break;
    504          default:
    505             tl_assert(0);
    506       }
    507 
    508       // Add the helper.
    509       argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) );
    510       di   = unsafeIRDirty_0_N( /*regparms*/2,
    511                                 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
    512                                 argv );
    513       if (ev->guard) {
    514          di->guard = ev->guard;
    515       }
    516       addStmtToIRSB( sb, IRStmt_Dirty(di) );
    517    }
    518 
    519    events_used = 0;
    520 }
    521 
    522 // WARNING:  If you aren't interested in instruction reads, you can omit the
    523 // code that adds calls to trace_instr() in flushEvents().  However, you
    524 // must still call this function, addEvent_Ir() -- it is necessary to add
    525 // the Ir events to the events list so that merging of paired load/store
    526 // events into modify events works correctly.
    527 static void addEvent_Ir ( IRSB* sb, IRAtom* iaddr, UInt isize )
    528 {
    529    Event* evt;
    530    tl_assert(clo_trace_mem);
    531    tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
    532             || VG_CLREQ_SZB == isize );
    533    if (events_used == N_EVENTS)
    534       flushEvents(sb);
    535    tl_assert(events_used >= 0 && events_used < N_EVENTS);
    536    evt = &events[events_used];
    537    evt->ekind = Event_Ir;
    538    evt->addr  = iaddr;
    539    evt->size  = isize;
    540    evt->guard = NULL;
    541    events_used++;
    542 }
    543 
    544 /* Add a guarded read event. */
    545 static
    546 void addEvent_Dr_guarded ( IRSB* sb, IRAtom* daddr, Int dsize, IRAtom* guard )
    547 {
    548    Event* evt;
    549    tl_assert(clo_trace_mem);
    550    tl_assert(isIRAtom(daddr));
    551    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
    552    if (events_used == N_EVENTS)
    553       flushEvents(sb);
    554    tl_assert(events_used >= 0 && events_used < N_EVENTS);
    555    evt = &events[events_used];
    556    evt->ekind = Event_Dr;
    557    evt->addr  = daddr;
    558    evt->size  = dsize;
    559    evt->guard = guard;
    560    events_used++;
    561 }
    562 
    563 /* Add an ordinary read event, by adding a guarded read event with an
    564    always-true guard. */
    565 static
    566 void addEvent_Dr ( IRSB* sb, IRAtom* daddr, Int dsize )
    567 {
    568    addEvent_Dr_guarded(sb, daddr, dsize, NULL);
    569 }
    570 
    571 /* Add a guarded write event. */
    572 static
    573 void addEvent_Dw_guarded ( IRSB* sb, IRAtom* daddr, Int dsize, IRAtom* guard )
    574 {
    575    Event* evt;
    576    tl_assert(clo_trace_mem);
    577    tl_assert(isIRAtom(daddr));
    578    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
    579    if (events_used == N_EVENTS)
    580       flushEvents(sb);
    581    tl_assert(events_used >= 0 && events_used < N_EVENTS);
    582    evt = &events[events_used];
    583    evt->ekind = Event_Dw;
    584    evt->addr  = daddr;
    585    evt->size  = dsize;
    586    evt->guard = guard;
    587    events_used++;
    588 }
    589 
    590 /* Add an ordinary write event.  Try to merge it with an immediately
    591    preceding ordinary read event of the same size to the same
    592    address. */
    593 static
    594 void addEvent_Dw ( IRSB* sb, IRAtom* daddr, Int dsize )
    595 {
    596    Event* lastEvt;
    597    Event* evt;
    598    tl_assert(clo_trace_mem);
    599    tl_assert(isIRAtom(daddr));
    600    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
    601 
    602    // Is it possible to merge this write with the preceding read?
    603    lastEvt = &events[events_used-1];
    604    if (events_used > 0
    605        && lastEvt->ekind == Event_Dr
    606        && lastEvt->size  == dsize
    607        && lastEvt->guard == NULL
    608        && eqIRAtom(lastEvt->addr, daddr))
    609    {
    610       lastEvt->ekind = Event_Dm;
    611       return;
    612    }
    613 
    614    // No.  Add as normal.
    615    if (events_used == N_EVENTS)
    616       flushEvents(sb);
    617    tl_assert(events_used >= 0 && events_used < N_EVENTS);
    618    evt = &events[events_used];
    619    evt->ekind = Event_Dw;
    620    evt->size  = dsize;
    621    evt->addr  = daddr;
    622    evt->guard = NULL;
    623    events_used++;
    624 }
    625 
    626 
    627 /*------------------------------------------------------------*/
    628 /*--- Stuff for --trace-superblocks                        ---*/
    629 /*------------------------------------------------------------*/
    630 
    631 static void trace_superblock(Addr addr)
    632 {
    633    VG_(printf)("SB %08lx\n", addr);
    634 }
    635 
    636 
    637 /*------------------------------------------------------------*/
    638 /*--- Basic tool functions                                 ---*/
    639 /*------------------------------------------------------------*/
    640 
    641 static void lk_post_clo_init(void)
    642 {
    643    Int op, tyIx;
    644 
    645    if (clo_detailed_counts) {
    646       for (op = 0; op < N_OPS; op++)
    647          for (tyIx = 0; tyIx < N_TYPES; tyIx++)
    648             detailCounts[op][tyIx] = 0;
    649    }
    650 }
    651 
    652 static
    653 IRSB* lk_instrument ( VgCallbackClosure* closure,
    654                       IRSB* sbIn,
    655                       const VexGuestLayout* layout,
    656                       const VexGuestExtents* vge,
    657                       const VexArchInfo* archinfo_host,
    658                       IRType gWordTy, IRType hWordTy )
    659 {
    660    IRDirty*   di;
    661    Int        i;
    662    IRSB*      sbOut;
    663    IRTypeEnv* tyenv = sbIn->tyenv;
    664    Addr       iaddr = 0, dst;
    665    UInt       ilen = 0;
    666    Bool       condition_inverted = False;
    667 
    668    if (gWordTy != hWordTy) {
    669       /* We don't currently support this case. */
    670       VG_(tool_panic)("host/guest word size mismatch");
    671    }
    672 
    673    /* Set up SB */
    674    sbOut = deepCopyIRSBExceptStmts(sbIn);
    675 
    676    // Copy verbatim any IR preamble preceding the first IMark
    677    i = 0;
    678    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
    679       addStmtToIRSB( sbOut, sbIn->stmts[i] );
    680       i++;
    681    }
    682 
    683    if (clo_basic_counts) {
    684       /* Count this superblock. */
    685       di = unsafeIRDirty_0_N( 0, "add_one_SB_entered",
    686                                  VG_(fnptr_to_fnentry)( &add_one_SB_entered ),
    687                                  mkIRExprVec_0() );
    688       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    689    }
    690 
    691    if (clo_trace_sbs) {
    692       /* Print this superblock's address. */
    693       di = unsafeIRDirty_0_N(
    694               0, "trace_superblock",
    695               VG_(fnptr_to_fnentry)( &trace_superblock ),
    696               mkIRExprVec_1( mkIRExpr_HWord( vge->base[0] ) )
    697            );
    698       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    699    }
    700 
    701    if (clo_trace_mem) {
    702       events_used = 0;
    703    }
    704 
    705    for (/*use current i*/; i < sbIn->stmts_used; i++) {
    706       IRStmt* st = sbIn->stmts[i];
    707       if (!st || st->tag == Ist_NoOp) continue;
    708 
    709       if (clo_basic_counts) {
    710          /* Count one VEX statement. */
    711          di = unsafeIRDirty_0_N( 0, "add_one_IRStmt",
    712                                     VG_(fnptr_to_fnentry)( &add_one_IRStmt ),
    713                                     mkIRExprVec_0() );
    714          addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    715       }
    716 
    717       switch (st->tag) {
    718          case Ist_NoOp:
    719          case Ist_AbiHint:
    720          case Ist_Put:
    721          case Ist_PutI:
    722          case Ist_MBE:
    723             addStmtToIRSB( sbOut, st );
    724             break;
    725 
    726          case Ist_IMark:
    727             if (clo_basic_counts) {
    728                /* Needed to be able to check for inverted condition in Ist_Exit */
    729                iaddr = st->Ist.IMark.addr;
    730                ilen  = st->Ist.IMark.len;
    731 
    732                /* Count guest instruction. */
    733                di = unsafeIRDirty_0_N( 0, "add_one_guest_instr",
    734                                           VG_(fnptr_to_fnentry)( &add_one_guest_instr ),
    735                                           mkIRExprVec_0() );
    736                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    737 
    738                /* An unconditional branch to a known destination in the
    739                 * guest's instructions can be represented, in the IRSB to
    740                 * instrument, by the VEX statements that are the
    741                 * translation of that known destination. This feature is
    742                 * called 'SB chasing' and can be influenced by command
    743                 * line option --vex-guest-chase-thresh.
    744                 *
    745                 * To get an accurate count of the calls to a specific
    746                 * function, taking SB chasing into account, we need to
    747                 * check for each guest instruction (Ist_IMark) if it is
    748                 * the entry point of a function.
    749                 */
    750                tl_assert(clo_fnname);
    751                tl_assert(clo_fnname[0]);
    752                const HChar *fnname;
    753                if (VG_(get_fnname_if_entry)(st->Ist.IMark.addr,
    754                                             &fnname)
    755                    && 0 == VG_(strcmp)(fnname, clo_fnname)) {
    756                   di = unsafeIRDirty_0_N(
    757                           0, "add_one_func_call",
    758                              VG_(fnptr_to_fnentry)( &add_one_func_call ),
    759                              mkIRExprVec_0() );
    760                   addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    761                }
    762             }
    763             if (clo_trace_mem) {
    764                // WARNING: do not remove this function call, even if you
    765                // aren't interested in instruction reads.  See the comment
    766                // above the function itself for more detail.
    767                addEvent_Ir( sbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ),
    768                             st->Ist.IMark.len );
    769             }
    770             addStmtToIRSB( sbOut, st );
    771             break;
    772 
    773          case Ist_WrTmp:
    774             // Add a call to trace_load() if --trace-mem=yes.
    775             if (clo_trace_mem) {
    776                IRExpr* data = st->Ist.WrTmp.data;
    777                if (data->tag == Iex_Load) {
    778                   addEvent_Dr( sbOut, data->Iex.Load.addr,
    779                                sizeofIRType(data->Iex.Load.ty) );
    780                }
    781             }
    782             if (clo_detailed_counts) {
    783                IRExpr* expr = st->Ist.WrTmp.data;
    784                IRType  type = typeOfIRExpr(sbOut->tyenv, expr);
    785                tl_assert(type != Ity_INVALID);
    786                switch (expr->tag) {
    787                   case Iex_Load:
    788                     instrument_detail( sbOut, OpLoad, type, NULL/*guard*/ );
    789                      break;
    790                   case Iex_Unop:
    791                   case Iex_Binop:
    792                   case Iex_Triop:
    793                   case Iex_Qop:
    794                   case Iex_ITE:
    795                      instrument_detail( sbOut, OpAlu, type, NULL/*guard*/ );
    796                      break;
    797                   default:
    798                      break;
    799                }
    800             }
    801             addStmtToIRSB( sbOut, st );
    802             break;
    803 
    804          case Ist_Store: {
    805             IRExpr* data = st->Ist.Store.data;
    806             IRType  type = typeOfIRExpr(tyenv, data);
    807             tl_assert(type != Ity_INVALID);
    808             if (clo_trace_mem) {
    809                addEvent_Dw( sbOut, st->Ist.Store.addr,
    810                             sizeofIRType(type) );
    811             }
    812             if (clo_detailed_counts) {
    813                instrument_detail( sbOut, OpStore, type, NULL/*guard*/ );
    814             }
    815             addStmtToIRSB( sbOut, st );
    816             break;
    817          }
    818 
    819          case Ist_StoreG: {
    820             IRStoreG* sg   = st->Ist.StoreG.details;
    821             IRExpr*   data = sg->data;
    822             IRType    type = typeOfIRExpr(tyenv, data);
    823             tl_assert(type != Ity_INVALID);
    824             if (clo_trace_mem) {
    825                addEvent_Dw_guarded( sbOut, sg->addr,
    826                                     sizeofIRType(type), sg->guard );
    827             }
    828             if (clo_detailed_counts) {
    829                instrument_detail( sbOut, OpStore, type, sg->guard );
    830             }
    831             addStmtToIRSB( sbOut, st );
    832             break;
    833          }
    834 
    835          case Ist_LoadG: {
    836             IRLoadG* lg       = st->Ist.LoadG.details;
    837             IRType   type     = Ity_INVALID; /* loaded type */
    838             IRType   typeWide = Ity_INVALID; /* after implicit widening */
    839             typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
    840             tl_assert(type != Ity_INVALID);
    841             if (clo_trace_mem) {
    842                addEvent_Dr_guarded( sbOut, lg->addr,
    843                                     sizeofIRType(type), lg->guard );
    844             }
    845             if (clo_detailed_counts) {
    846                instrument_detail( sbOut, OpLoad, type, lg->guard );
    847             }
    848             addStmtToIRSB( sbOut, st );
    849             break;
    850          }
    851 
    852          case Ist_Dirty: {
    853             if (clo_trace_mem) {
    854                Int      dsize;
    855                IRDirty* d = st->Ist.Dirty.details;
    856                if (d->mFx != Ifx_None) {
    857                   // This dirty helper accesses memory.  Collect the details.
    858                   tl_assert(d->mAddr != NULL);
    859                   tl_assert(d->mSize != 0);
    860                   dsize = d->mSize;
    861                   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
    862                      addEvent_Dr( sbOut, d->mAddr, dsize );
    863                   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
    864                      addEvent_Dw( sbOut, d->mAddr, dsize );
    865                } else {
    866                   tl_assert(d->mAddr == NULL);
    867                   tl_assert(d->mSize == 0);
    868                }
    869             }
    870             addStmtToIRSB( sbOut, st );
    871             break;
    872          }
    873 
    874          case Ist_CAS: {
    875             /* We treat it as a read and a write of the location.  I
    876                think that is the same behaviour as it was before IRCAS
    877                was introduced, since prior to that point, the Vex
    878                front ends would translate a lock-prefixed instruction
    879                into a (normal) read followed by a (normal) write. */
    880             Int    dataSize;
    881             IRType dataTy;
    882             IRCAS* cas = st->Ist.CAS.details;
    883             tl_assert(cas->addr != NULL);
    884             tl_assert(cas->dataLo != NULL);
    885             dataTy   = typeOfIRExpr(tyenv, cas->dataLo);
    886             dataSize = sizeofIRType(dataTy);
    887             if (cas->dataHi != NULL)
    888                dataSize *= 2; /* since it's a doubleword-CAS */
    889             if (clo_trace_mem) {
    890                addEvent_Dr( sbOut, cas->addr, dataSize );
    891                addEvent_Dw( sbOut, cas->addr, dataSize );
    892             }
    893             if (clo_detailed_counts) {
    894                instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
    895                if (cas->dataHi != NULL) /* dcas */
    896                   instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
    897                instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
    898                if (cas->dataHi != NULL) /* dcas */
    899                   instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
    900             }
    901             addStmtToIRSB( sbOut, st );
    902             break;
    903          }
    904 
    905          case Ist_LLSC: {
    906             IRType dataTy;
    907             if (st->Ist.LLSC.storedata == NULL) {
    908                /* LL */
    909                dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
    910                if (clo_trace_mem) {
    911                   addEvent_Dr( sbOut, st->Ist.LLSC.addr,
    912                                       sizeofIRType(dataTy) );
    913                   /* flush events before LL, helps SC to succeed */
    914                   flushEvents(sbOut);
    915 	       }
    916                if (clo_detailed_counts)
    917                   instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
    918             } else {
    919                /* SC */
    920                dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
    921                if (clo_trace_mem)
    922                   addEvent_Dw( sbOut, st->Ist.LLSC.addr,
    923                                       sizeofIRType(dataTy) );
    924                if (clo_detailed_counts)
    925                   instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
    926             }
    927             addStmtToIRSB( sbOut, st );
    928             break;
    929          }
    930 
    931          case Ist_Exit:
    932             if (clo_basic_counts) {
    933                // The condition of a branch was inverted by VEX if a taken
    934                // branch is in fact a fall trough according to client address
    935                tl_assert(iaddr != 0);
    936                dst = (sizeof(Addr) == 4) ? st->Ist.Exit.dst->Ico.U32 :
    937                                            st->Ist.Exit.dst->Ico.U64;
    938                condition_inverted = (dst == iaddr + ilen);
    939 
    940                /* Count Jcc */
    941                if (!condition_inverted)
    942                   di = unsafeIRDirty_0_N( 0, "add_one_Jcc",
    943                                           VG_(fnptr_to_fnentry)( &add_one_Jcc ),
    944                                           mkIRExprVec_0() );
    945                else
    946                   di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc",
    947                                           VG_(fnptr_to_fnentry)(
    948                                              &add_one_inverted_Jcc ),
    949                                           mkIRExprVec_0() );
    950 
    951                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    952             }
    953             if (clo_trace_mem) {
    954                flushEvents(sbOut);
    955             }
    956 
    957             addStmtToIRSB( sbOut, st );      // Original statement
    958 
    959             if (clo_basic_counts) {
    960                /* Count non-taken Jcc */
    961                if (!condition_inverted)
    962                   di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken",
    963                                           VG_(fnptr_to_fnentry)(
    964                                              &add_one_Jcc_untaken ),
    965                                           mkIRExprVec_0() );
    966                else
    967                   di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc_untaken",
    968                                           VG_(fnptr_to_fnentry)(
    969                                              &add_one_inverted_Jcc_untaken ),
    970                                           mkIRExprVec_0() );
    971 
    972                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    973             }
    974             break;
    975 
    976          default:
    977             ppIRStmt(st);
    978             tl_assert(0);
    979       }
    980    }
    981 
    982    if (clo_basic_counts) {
    983       /* Count this basic block. */
    984       di = unsafeIRDirty_0_N( 0, "add_one_SB_completed",
    985                                  VG_(fnptr_to_fnentry)( &add_one_SB_completed ),
    986                                  mkIRExprVec_0() );
    987       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
    988    }
    989 
    990    if (clo_trace_mem) {
    991       /* At the end of the sbIn.  Flush outstandings. */
    992       flushEvents(sbOut);
    993    }
    994 
    995    return sbOut;
    996 }
    997 
    998 static void lk_fini(Int exitcode)
    999 {
   1000    tl_assert(clo_fnname);
   1001    tl_assert(clo_fnname[0]);
   1002 
   1003    if (clo_basic_counts) {
   1004       ULong total_Jccs = n_Jccs + n_IJccs;
   1005       ULong taken_Jccs = (n_Jccs - n_Jccs_untaken) + n_IJccs_untaken;
   1006 
   1007       VG_(umsg)("Counted %'llu call%s to %s()\n",
   1008                 n_func_calls, ( n_func_calls==1 ? "" : "s" ), clo_fnname);
   1009 
   1010       VG_(umsg)("\n");
   1011       VG_(umsg)("Jccs:\n");
   1012       VG_(umsg)("  total:         %'llu\n", total_Jccs);
   1013       VG_(umsg)("  taken:         %'llu (%.0f%%)\n",
   1014                 taken_Jccs, taken_Jccs * 100.0 / (total_Jccs ? total_Jccs : 1));
   1015 
   1016       VG_(umsg)("\n");
   1017       VG_(umsg)("Executed:\n");
   1018       VG_(umsg)("  SBs entered:   %'llu\n", n_SBs_entered);
   1019       VG_(umsg)("  SBs completed: %'llu\n", n_SBs_completed);
   1020       VG_(umsg)("  guest instrs:  %'llu\n", n_guest_instrs);
   1021       VG_(umsg)("  IRStmts:       %'llu\n", n_IRStmts);
   1022 
   1023       VG_(umsg)("\n");
   1024       VG_(umsg)("Ratios:\n");
   1025       tl_assert(n_SBs_entered); // Paranoia time.
   1026       VG_(umsg)("  guest instrs : SB entered  = %'llu : 10\n",
   1027          10 * n_guest_instrs / n_SBs_entered);
   1028       VG_(umsg)("       IRStmts : SB entered  = %'llu : 10\n",
   1029          10 * n_IRStmts / n_SBs_entered);
   1030       tl_assert(n_guest_instrs); // Paranoia time.
   1031       VG_(umsg)("       IRStmts : guest instr = %'llu : 10\n",
   1032          10 * n_IRStmts / n_guest_instrs);
   1033    }
   1034 
   1035    if (clo_detailed_counts) {
   1036       VG_(umsg)("\n");
   1037       VG_(umsg)("IR-level counts by type:\n");
   1038       print_details();
   1039    }
   1040 
   1041    if (clo_basic_counts) {
   1042       VG_(umsg)("\n");
   1043       VG_(umsg)("Exit code:       %d\n", exitcode);
   1044    }
   1045 }
   1046 
   1047 static void lk_pre_clo_init(void)
   1048 {
   1049    VG_(details_name)            ("Lackey");
   1050    VG_(details_version)         (NULL);
   1051    VG_(details_description)     ("an example Valgrind tool");
   1052    VG_(details_copyright_author)(
   1053       "Copyright (C) 2002-2017, and GNU GPL'd, by Nicholas Nethercote.");
   1054    VG_(details_bug_reports_to)  (VG_BUGS_TO);
   1055    VG_(details_avg_translation_sizeB) ( 200 );
   1056 
   1057    VG_(basic_tool_funcs)          (lk_post_clo_init,
   1058                                    lk_instrument,
   1059                                    lk_fini);
   1060    VG_(needs_command_line_options)(lk_process_cmd_line_option,
   1061                                    lk_print_usage,
   1062                                    lk_print_debug_usage);
   1063 }
   1064 
   1065 VG_DETERMINE_INTERFACE_VERSION(lk_pre_clo_init)
   1066 
   1067 /*--------------------------------------------------------------------*/
   1068 /*--- end                                                lk_main.c ---*/
   1069 /*--------------------------------------------------------------------*/
   1070