Home | History | Annotate | Download | only in cachegrind
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Cachegrind: everything but the simulation itself.            ---*/
      4 /*---                                                    cg_main.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Cachegrind, a Valgrind tool for cache
      9    profiling programs.
     10 
     11    Copyright (C) 2002-2015 Nicholas Nethercote
     12       njn (at) valgrind.org
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #include "pub_tool_basics.h"
     33 #include "pub_tool_debuginfo.h"
     34 #include "pub_tool_libcbase.h"
     35 #include "pub_tool_libcassert.h"
     36 #include "pub_tool_libcfile.h"
     37 #include "pub_tool_libcprint.h"
     38 #include "pub_tool_libcproc.h"
     39 #include "pub_tool_mallocfree.h"
     40 #include "pub_tool_options.h"
     41 #include "pub_tool_oset.h"
     42 #include "pub_tool_tooliface.h"
     43 #include "pub_tool_xarray.h"
     44 #include "pub_tool_clientstate.h"
     45 #include "pub_tool_machine.h"      // VG_(fnptr_to_fnentry)
     46 
     47 #include "cg_arch.h"
     48 #include "cg_sim.c"
     49 #include "cg_branchpred.c"
     50 
     51 /*------------------------------------------------------------*/
     52 /*--- Constants                                            ---*/
     53 /*------------------------------------------------------------*/
     54 
     55 /* Set to 1 for very verbose debugging */
     56 #define DEBUG_CG 0
     57 
     58 /*------------------------------------------------------------*/
     59 /*--- Options                                              ---*/
     60 /*------------------------------------------------------------*/
     61 
     62 static Bool  clo_cache_sim  = True;  /* do cache simulation? */
     63 static Bool  clo_branch_sim = False; /* do branch simulation? */
     64 static const HChar* clo_cachegrind_out_file = "cachegrind.out.%p";
     65 
     66 /*------------------------------------------------------------*/
     67 /*--- Cachesim configuration                               ---*/
     68 /*------------------------------------------------------------*/
     69 
     70 static Int min_line_size = 0; /* min of L1 and LL cache line sizes */
     71 
     72 /*------------------------------------------------------------*/
     73 /*--- Types and Data Structures                            ---*/
     74 /*------------------------------------------------------------*/
     75 
     76 typedef
     77    struct {
     78       ULong a;  /* total # memory accesses of this kind */
     79       ULong m1; /* misses in the first level cache */
     80       ULong mL; /* misses in the second level cache */
     81    }
     82    CacheCC;
     83 
     84 typedef
     85    struct {
     86       ULong b;  /* total # branches of this kind */
     87       ULong mp; /* number of branches mispredicted */
     88    }
     89    BranchCC;
     90 
     91 //------------------------------------------------------------
     92 // Primary data structure #1: CC table
     93 // - Holds the per-source-line hit/miss stats, grouped by file/function/line.
     94 // - an ordered set of CCs.  CC indexing done by file/function/line (as
     95 //   determined from the instrAddr).
     96 // - Traversed for dumping stats at end in file/func/line hierarchy.
     97 
     98 typedef struct {
     99    HChar* file;
    100    const HChar* fn;
    101    Int    line;
    102 }
    103 CodeLoc;
    104 
    105 typedef struct {
    106    CodeLoc  loc; /* Source location that these counts pertain to */
    107    CacheCC  Ir;  /* Insn read counts */
    108    CacheCC  Dr;  /* Data read counts */
    109    CacheCC  Dw;  /* Data write/modify counts */
    110    BranchCC Bc;  /* Conditional branch counts */
    111    BranchCC Bi;  /* Indirect branch counts */
    112 } LineCC;
    113 
    114 // First compare file, then fn, then line.
    115 static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
    116 {
    117    Word res;
    118    const CodeLoc* a = (const CodeLoc*)vloc;
    119    const CodeLoc* b = &(((const LineCC*)vcc)->loc);
    120 
    121    res = VG_(strcmp)(a->file, b->file);
    122    if (0 != res)
    123       return res;
    124 
    125    res = VG_(strcmp)(a->fn, b->fn);
    126    if (0 != res)
    127       return res;
    128 
    129    return a->line - b->line;
    130 }
    131 
    132 static OSet* CC_table;
    133 
    134 //------------------------------------------------------------
    135 // Primary data structure #2: InstrInfo table
    136 // - Holds the cached info about each instr that is used for simulation.
    137 // - table(SB_start_addr, list(InstrInfo))
    138 // - For each SB, each InstrInfo in the list holds info about the
    139 //   instruction (instrLen, instrAddr, etc), plus a pointer to its line
    140 //   CC.  This node is what's passed to the simulation function.
    141 // - When SBs are discarded the relevant list(instr_details) is freed.
    142 
    143 typedef struct _InstrInfo InstrInfo;
    144 struct _InstrInfo {
    145    Addr    instr_addr;
    146    UChar   instr_len;
    147    LineCC* parent;         // parent line-CC
    148 };
    149 
    150 typedef struct _SB_info SB_info;
    151 struct _SB_info {
    152    Addr      SB_addr;      // key;  MUST BE FIRST
    153    Int       n_instrs;
    154    InstrInfo instrs[0];
    155 };
    156 
    157 static OSet* instrInfoTable;
    158 
    159 //------------------------------------------------------------
    160 // Secondary data structure: string table
    161 // - holds strings, avoiding dups
    162 // - used for filenames and function names, each of which will be
    163 //   pointed to by one or more CCs.
    164 // - it also allows equality checks just by pointer comparison, which
    165 //   is good when printing the output file at the end.
    166 
    167 static OSet* stringTable;
    168 
    169 //------------------------------------------------------------
    170 // Stats
    171 static Int  distinct_files      = 0;
    172 static Int  distinct_fns        = 0;
    173 static Int  distinct_lines      = 0;
    174 static Int  distinct_instrsGen  = 0;
    175 static Int  distinct_instrsNoX  = 0;
    176 
    177 static Int  full_debugs         = 0;
    178 static Int  file_line_debugs    = 0;
    179 static Int  fn_debugs           = 0;
    180 static Int  no_debugs           = 0;
    181 
    182 /*------------------------------------------------------------*/
    183 /*--- String table operations                              ---*/
    184 /*------------------------------------------------------------*/
    185 
    186 static Word stringCmp( const void* key, const void* elem )
    187 {
    188    return VG_(strcmp)(*(const HChar *const *)key, *(const HChar *const *)elem);
    189 }
    190 
    191 // Get a permanent string;  either pull it out of the string table if it's
    192 // been encountered before, or dup it and put it into the string table.
    193 static HChar* get_perm_string(const HChar* s)
    194 {
    195    HChar** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
    196    if (s_ptr) {
    197       return *s_ptr;
    198    } else {
    199       HChar** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(HChar*));
    200       *s_node = VG_(strdup)("cg.main.gps.1", s);
    201       VG_(OSetGen_Insert)(stringTable, s_node);
    202       return *s_node;
    203    }
    204 }
    205 
    206 /*------------------------------------------------------------*/
    207 /*--- CC table operations                                  ---*/
    208 /*------------------------------------------------------------*/
    209 
    210 static void get_debug_info(Addr instr_addr, const HChar **dir,
    211                            const HChar **file, const HChar **fn, UInt* line)
    212 {
    213    Bool found_file_line = VG_(get_filename_linenum)(
    214                              instr_addr,
    215                              file, dir,
    216                              line
    217                           );
    218    Bool found_fn        = VG_(get_fnname)(instr_addr, fn);
    219 
    220    if (!found_file_line) {
    221       *file = "???";
    222       *line = 0;
    223    }
    224    if (!found_fn) {
    225       *fn = "???";
    226    }
    227 
    228    if (found_file_line) {
    229       if (found_fn) full_debugs++;
    230       else          file_line_debugs++;
    231    } else {
    232       if (found_fn) fn_debugs++;
    233       else          no_debugs++;
    234    }
    235 }
    236 
    237 // Do a three step traversal: by file, then fn, then line.
    238 // Returns a pointer to the line CC, creates a new one if necessary.
    239 static LineCC* get_lineCC(Addr origAddr)
    240 {
    241    const HChar *fn, *file, *dir;
    242    UInt    line;
    243    CodeLoc loc;
    244    LineCC* lineCC;
    245 
    246    get_debug_info(origAddr, &dir, &file, &fn, &line);
    247 
    248    // Form an absolute pathname if a directory is available
    249    HChar absfile[VG_(strlen)(dir) + 1 + VG_(strlen)(file) + 1];
    250 
    251    if (dir[0]) {
    252       VG_(sprintf)(absfile, "%s/%s", dir, file);
    253    } else {
    254       VG_(sprintf)(absfile, "%s", file);
    255    }
    256 
    257    loc.file = absfile;
    258    loc.fn   = fn;
    259    loc.line = line;
    260 
    261    lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
    262    if (!lineCC) {
    263       // Allocate and zero a new node.
    264       lineCC           = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
    265       lineCC->loc.file = get_perm_string(loc.file);
    266       lineCC->loc.fn   = get_perm_string(loc.fn);
    267       lineCC->loc.line = loc.line;
    268       lineCC->Ir.a     = 0;
    269       lineCC->Ir.m1    = 0;
    270       lineCC->Ir.mL    = 0;
    271       lineCC->Dr.a     = 0;
    272       lineCC->Dr.m1    = 0;
    273       lineCC->Dr.mL    = 0;
    274       lineCC->Dw.a     = 0;
    275       lineCC->Dw.m1    = 0;
    276       lineCC->Dw.mL    = 0;
    277       lineCC->Bc.b     = 0;
    278       lineCC->Bc.mp    = 0;
    279       lineCC->Bi.b     = 0;
    280       lineCC->Bi.mp    = 0;
    281       VG_(OSetGen_Insert)(CC_table, lineCC);
    282    }
    283 
    284    return lineCC;
    285 }
    286 
    287 /*------------------------------------------------------------*/
    288 /*--- Cache simulation functions                           ---*/
    289 /*------------------------------------------------------------*/
    290 
    291 /* A common case for an instruction read event is that the
    292  * bytes read belong to the same cache line in both L1I and LL
    293  * (if cache line sizes of L1 and LL are the same).
    294  * As this can be detected at instrumentation time, and results
    295  * in faster simulation, special-casing is benefical.
    296  *
    297  * Abbrevations used in var/function names:
    298  *  IrNoX - instruction read does not cross cache lines
    299  *  IrGen - generic instruction read; not detected as IrNoX
    300  *  Ir    - not known / not important whether it is an IrNoX
    301  */
    302 
    303 // Only used with --cache-sim=no.
    304 static VG_REGPARM(1)
    305 void log_1Ir(InstrInfo* n)
    306 {
    307    n->parent->Ir.a++;
    308 }
    309 
    310 // Only used with --cache-sim=no.
    311 static VG_REGPARM(2)
    312 void log_2Ir(InstrInfo* n, InstrInfo* n2)
    313 {
    314    n->parent->Ir.a++;
    315    n2->parent->Ir.a++;
    316 }
    317 
    318 // Only used with --cache-sim=no.
    319 static VG_REGPARM(3)
    320 void log_3Ir(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
    321 {
    322    n->parent->Ir.a++;
    323    n2->parent->Ir.a++;
    324    n3->parent->Ir.a++;
    325 }
    326 
    327 // Generic case for instruction reads: may cross cache lines.
    328 // All other Ir handlers expect IrNoX instruction reads.
    329 static VG_REGPARM(1)
    330 void log_1IrGen_0D_cache_access(InstrInfo* n)
    331 {
    332    //VG_(printf)("1IrGen_0D :  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n",
    333    //             n, n->instr_addr, n->instr_len);
    334    cachesim_I1_doref_Gen(n->instr_addr, n->instr_len,
    335 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
    336    n->parent->Ir.a++;
    337 }
    338 
    339 static VG_REGPARM(1)
    340 void log_1IrNoX_0D_cache_access(InstrInfo* n)
    341 {
    342    //VG_(printf)("1IrNoX_0D :  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n",
    343    //             n, n->instr_addr, n->instr_len);
    344    cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
    345 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
    346    n->parent->Ir.a++;
    347 }
    348 
    349 static VG_REGPARM(2)
    350 void log_2IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2)
    351 {
    352    //VG_(printf)("2IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
    353    //            "            CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
    354    //            n,  n->instr_addr,  n->instr_len,
    355    //            n2, n2->instr_addr, n2->instr_len);
    356    cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
    357 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
    358    n->parent->Ir.a++;
    359    cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
    360 			 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
    361    n2->parent->Ir.a++;
    362 }
    363 
    364 static VG_REGPARM(3)
    365 void log_3IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
    366 {
    367    //VG_(printf)("3IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
    368    //            "            CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
    369    //            "            CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
    370    //            n,  n->instr_addr,  n->instr_len,
    371    //            n2, n2->instr_addr, n2->instr_len,
    372    //            n3, n3->instr_addr, n3->instr_len);
    373    cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
    374 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
    375    n->parent->Ir.a++;
    376    cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
    377 			 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
    378    n2->parent->Ir.a++;
    379    cachesim_I1_doref_NoX(n3->instr_addr, n3->instr_len,
    380 			 &n3->parent->Ir.m1, &n3->parent->Ir.mL);
    381    n3->parent->Ir.a++;
    382 }
    383 
    384 static VG_REGPARM(3)
    385 void log_1IrNoX_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
    386 {
    387    //VG_(printf)("1IrNoX_1Dr:  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n"
    388    //            "                               daddr=0x%010lx,  dsize=%lu\n",
    389    //            n, n->instr_addr, n->instr_len, data_addr, data_size);
    390    cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
    391 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
    392    n->parent->Ir.a++;
    393 
    394    cachesim_D1_doref(data_addr, data_size,
    395                      &n->parent->Dr.m1, &n->parent->Dr.mL);
    396    n->parent->Dr.a++;
    397 }
    398 
    399 static VG_REGPARM(3)
    400 void log_1IrNoX_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
    401 {
    402    //VG_(printf)("1IrNoX_1Dw:  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n"
    403    //            "                               daddr=0x%010lx,  dsize=%lu\n",
    404    //            n, n->instr_addr, n->instr_len, data_addr, data_size);
    405    cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
    406 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
    407    n->parent->Ir.a++;
    408 
    409    cachesim_D1_doref(data_addr, data_size,
    410                      &n->parent->Dw.m1, &n->parent->Dw.mL);
    411    n->parent->Dw.a++;
    412 }
    413 
    414 /* Note that addEvent_D_guarded assumes that log_0Ir_1Dr_cache_access
    415    and log_0Ir_1Dw_cache_access have exactly the same prototype.  If
    416    you change them, you must change addEvent_D_guarded too. */
    417 static VG_REGPARM(3)
    418 void log_0Ir_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
    419 {
    420    //VG_(printf)("0Ir_1Dr:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
    421    //            n, data_addr, data_size);
    422    cachesim_D1_doref(data_addr, data_size,
    423                      &n->parent->Dr.m1, &n->parent->Dr.mL);
    424    n->parent->Dr.a++;
    425 }
    426 
    427 /* See comment on log_0Ir_1Dr_cache_access. */
    428 static VG_REGPARM(3)
    429 void log_0Ir_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
    430 {
    431    //VG_(printf)("0Ir_1Dw:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
    432    //            n, data_addr, data_size);
    433    cachesim_D1_doref(data_addr, data_size,
    434                      &n->parent->Dw.m1, &n->parent->Dw.mL);
    435    n->parent->Dw.a++;
    436 }
    437 
    438 /* For branches, we consult two different predictors, one which
    439    predicts taken/untaken for conditional branches, and the other
    440    which predicts the branch target address for indirect branches
    441    (jump-to-register style ones). */
    442 
    443 static VG_REGPARM(2)
    444 void log_cond_branch(InstrInfo* n, Word taken)
    445 {
    446    //VG_(printf)("cbrnch:  CCaddr=0x%010lx,  taken=0x%010lx\n",
    447    //             n, taken);
    448    n->parent->Bc.b++;
    449    n->parent->Bc.mp
    450       += (1 & do_cond_branch_predict(n->instr_addr, taken));
    451 }
    452 
    453 static VG_REGPARM(2)
    454 void log_ind_branch(InstrInfo* n, UWord actual_dst)
    455 {
    456    //VG_(printf)("ibrnch:  CCaddr=0x%010lx,    dst=0x%010lx\n",
    457    //             n, actual_dst);
    458    n->parent->Bi.b++;
    459    n->parent->Bi.mp
    460       += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
    461 }
    462 
    463 
    464 /*------------------------------------------------------------*/
    465 /*--- Instrumentation types and structures                 ---*/
    466 /*------------------------------------------------------------*/
    467 
    468 /* Maintain an ordered list of memory events which are outstanding, in
    469    the sense that no IR has yet been generated to do the relevant
    470    helper calls.  The BB is scanned top to bottom and memory events
    471    are added to the end of the list, merging with the most recent
    472    notified event where possible (Dw immediately following Dr and
    473    having the same size and EA can be merged).
    474 
    475    This merging is done so that for architectures which have
    476    load-op-store instructions (x86, amd64), the insn is treated as if
    477    it makes just one memory reference (a modify), rather than two (a
    478    read followed by a write at the same address).
    479 
    480    At various points the list will need to be flushed, that is, IR
    481    generated from it.  That must happen before any possible exit from
    482    the block (the end, or an IRStmt_Exit).  Flushing also takes place
    483    when there is no space to add a new event.
    484 
    485    If we require the simulation statistics to be up to date with
    486    respect to possible memory exceptions, then the list would have to
    487    be flushed before each memory reference.  That would however lose
    488    performance by inhibiting event-merging during flushing.
    489 
    490    Flushing the list consists of walking it start to end and emitting
    491    instrumentation IR for each event, in the order in which they
    492    appear.  It may be possible to emit a single call for two adjacent
    493    events in order to reduce the number of helper function calls made.
    494    For example, it could well be profitable to handle two adjacent Ir
    495    events with a single helper call.  */
    496 
    497 typedef
    498    IRExpr
    499    IRAtom;
    500 
    501 typedef
    502    enum {
    503       Ev_IrNoX,  // Instruction read not crossing cache lines
    504       Ev_IrGen,  // Generic Ir, not being detected as IrNoX
    505       Ev_Dr,     // Data read
    506       Ev_Dw,     // Data write
    507       Ev_Dm,     // Data modify (read then write)
    508       Ev_Bc,     // branch conditional
    509       Ev_Bi      // branch indirect (to unknown destination)
    510    }
    511    EventTag;
    512 
    513 typedef
    514    struct {
    515       EventTag   tag;
    516       InstrInfo* inode;
    517       union {
    518          struct {
    519          } IrGen;
    520          struct {
    521          } IrNoX;
    522          struct {
    523             IRAtom* ea;
    524             Int     szB;
    525          } Dr;
    526          struct {
    527             IRAtom* ea;
    528             Int     szB;
    529          } Dw;
    530          struct {
    531             IRAtom* ea;
    532             Int     szB;
    533          } Dm;
    534          struct {
    535             IRAtom* taken; /* :: Ity_I1 */
    536          } Bc;
    537          struct {
    538             IRAtom* dst;
    539          } Bi;
    540       } Ev;
    541    }
    542    Event;
    543 
    544 static void init_Event ( Event* ev ) {
    545    VG_(memset)(ev, 0, sizeof(Event));
    546 }
    547 
    548 static IRAtom* get_Event_dea ( Event* ev ) {
    549    switch (ev->tag) {
    550       case Ev_Dr: return ev->Ev.Dr.ea;
    551       case Ev_Dw: return ev->Ev.Dw.ea;
    552       case Ev_Dm: return ev->Ev.Dm.ea;
    553       default:    tl_assert(0);
    554    }
    555 }
    556 
    557 static Int get_Event_dszB ( Event* ev ) {
    558    switch (ev->tag) {
    559       case Ev_Dr: return ev->Ev.Dr.szB;
    560       case Ev_Dw: return ev->Ev.Dw.szB;
    561       case Ev_Dm: return ev->Ev.Dm.szB;
    562       default:    tl_assert(0);
    563    }
    564 }
    565 
    566 
    567 /* Up to this many unnotified events are allowed.  Number is
    568    arbitrary.  Larger numbers allow more event merging to occur, but
    569    potentially induce more spilling due to extending live ranges of
    570    address temporaries. */
    571 #define N_EVENTS 16
    572 
    573 
    574 /* A struct which holds all the running state during instrumentation.
    575    Mostly to avoid passing loads of parameters everywhere. */
    576 typedef
    577    struct {
    578       /* The current outstanding-memory-event list. */
    579       Event events[N_EVENTS];
    580       Int   events_used;
    581 
    582       /* The array of InstrInfo bins for the BB. */
    583       SB_info* sbInfo;
    584 
    585       /* Number InstrInfo bins 'used' so far. */
    586       Int sbInfo_i;
    587 
    588       /* The output SB being constructed. */
    589       IRSB* sbOut;
    590    }
    591    CgState;
    592 
    593 
    594 /*------------------------------------------------------------*/
    595 /*--- Instrumentation main                                 ---*/
    596 /*------------------------------------------------------------*/
    597 
    598 // Note that origAddr is the real origAddr, not the address of the first
    599 // instruction in the block (they can be different due to redirection).
    600 static
    601 SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
    602 {
    603    Int      i, n_instrs;
    604    IRStmt*  st;
    605    SB_info* sbInfo;
    606 
    607    // Count number of original instrs in SB
    608    n_instrs = 0;
    609    for (i = 0; i < sbIn->stmts_used; i++) {
    610       st = sbIn->stmts[i];
    611       if (Ist_IMark == st->tag) n_instrs++;
    612    }
    613 
    614    // Check that we don't have an entry for this BB in the instr-info table.
    615    // If this assertion fails, there has been some screwup:  some
    616    // translations must have been discarded but Cachegrind hasn't discarded
    617    // the corresponding entries in the instr-info table.
    618    sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
    619    tl_assert(NULL == sbInfo);
    620 
    621    // BB never translated before (at this address, at least;  could have
    622    // been unloaded and then reloaded elsewhere in memory)
    623    sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
    624                                 sizeof(SB_info) + n_instrs*sizeof(InstrInfo));
    625    sbInfo->SB_addr  = origAddr;
    626    sbInfo->n_instrs = n_instrs;
    627    VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
    628 
    629    return sbInfo;
    630 }
    631 
    632 
    633 static void showEvent ( Event* ev )
    634 {
    635    switch (ev->tag) {
    636       case Ev_IrGen:
    637          VG_(printf)("IrGen %p\n", ev->inode);
    638          break;
    639       case Ev_IrNoX:
    640          VG_(printf)("IrNoX %p\n", ev->inode);
    641          break;
    642       case Ev_Dr:
    643          VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
    644          ppIRExpr(ev->Ev.Dr.ea);
    645          VG_(printf)("\n");
    646          break;
    647       case Ev_Dw:
    648          VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
    649          ppIRExpr(ev->Ev.Dw.ea);
    650          VG_(printf)("\n");
    651          break;
    652       case Ev_Dm:
    653          VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
    654          ppIRExpr(ev->Ev.Dm.ea);
    655          VG_(printf)("\n");
    656          break;
    657       case Ev_Bc:
    658          VG_(printf)("Bc %p   GA=", ev->inode);
    659          ppIRExpr(ev->Ev.Bc.taken);
    660          VG_(printf)("\n");
    661          break;
    662       case Ev_Bi:
    663          VG_(printf)("Bi %p  DST=", ev->inode);
    664          ppIRExpr(ev->Ev.Bi.dst);
    665          VG_(printf)("\n");
    666          break;
    667       default:
    668          tl_assert(0);
    669          break;
    670    }
    671 }
    672 
    673 // Reserve and initialise an InstrInfo for the first mention of a new insn.
    674 static
    675 InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
    676 {
    677    InstrInfo* i_node;
    678    tl_assert(cgs->sbInfo_i >= 0);
    679    tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
    680    i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
    681    i_node->instr_addr = instr_addr;
    682    i_node->instr_len  = instr_len;
    683    i_node->parent     = get_lineCC(instr_addr);
    684    cgs->sbInfo_i++;
    685    return i_node;
    686 }
    687 
    688 
    689 /* Generate code for all outstanding memory events, and mark the queue
    690    empty.  Code is generated into cgs->bbOut, and this activity
    691    'consumes' slots in cgs->sbInfo. */
    692 
    693 static void flushEvents ( CgState* cgs )
    694 {
    695    Int        i, regparms;
    696    const HChar* helperName;
    697    void*      helperAddr;
    698    IRExpr**   argv;
    699    IRExpr*    i_node_expr;
    700    IRDirty*   di;
    701    Event*     ev;
    702    Event*     ev2;
    703    Event*     ev3;
    704 
    705    i = 0;
    706    while (i < cgs->events_used) {
    707 
    708       helperName = NULL;
    709       helperAddr = NULL;
    710       argv       = NULL;
    711       regparms   = 0;
    712 
    713       /* generate IR to notify event i and possibly the ones
    714          immediately following it. */
    715       tl_assert(i >= 0 && i < cgs->events_used);
    716 
    717       ev  = &cgs->events[i];
    718       ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
    719       ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
    720 
    721       if (DEBUG_CG) {
    722          VG_(printf)("   flush ");
    723          showEvent( ev );
    724       }
    725 
    726       i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
    727 
    728       /* Decide on helper fn to call and args to pass it, and advance
    729          i appropriately. */
    730       switch (ev->tag) {
    731          case Ev_IrNoX:
    732             /* Merge an IrNoX with a following Dr/Dm. */
    733             if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
    734                /* Why is this true?  It's because we're merging an Ir
    735                   with a following Dr or Dm.  The Ir derives from the
    736                   instruction's IMark and the Dr/Dm from data
    737                   references which follow it.  In short it holds
    738                   because each insn starts with an IMark, hence an
    739                   Ev_Ir, and so these Dr/Dm must pertain to the
    740                   immediately preceding Ir.  Same applies to analogous
    741                   assertions in the subsequent cases. */
    742                tl_assert(ev2->inode == ev->inode);
    743                helperName = "log_1IrNoX_1Dr_cache_access";
    744                helperAddr = &log_1IrNoX_1Dr_cache_access;
    745                argv = mkIRExprVec_3( i_node_expr,
    746                                      get_Event_dea(ev2),
    747                                      mkIRExpr_HWord( get_Event_dszB(ev2) ) );
    748                regparms = 3;
    749                i += 2;
    750             }
    751             /* Merge an IrNoX with a following Dw. */
    752             else
    753             if (ev2 && ev2->tag == Ev_Dw) {
    754                tl_assert(ev2->inode == ev->inode);
    755                helperName = "log_1IrNoX_1Dw_cache_access";
    756                helperAddr = &log_1IrNoX_1Dw_cache_access;
    757                argv = mkIRExprVec_3( i_node_expr,
    758                                      get_Event_dea(ev2),
    759                                      mkIRExpr_HWord( get_Event_dszB(ev2) ) );
    760                regparms = 3;
    761                i += 2;
    762             }
    763             /* Merge an IrNoX with two following IrNoX's. */
    764             else
    765             if (ev2 && ev3 && ev2->tag == Ev_IrNoX && ev3->tag == Ev_IrNoX)
    766             {
    767                if (clo_cache_sim) {
    768                   helperName = "log_3IrNoX_0D_cache_access";
    769                   helperAddr = &log_3IrNoX_0D_cache_access;
    770                } else {
    771                   helperName = "log_3Ir";
    772                   helperAddr = &log_3Ir;
    773                }
    774                argv = mkIRExprVec_3( i_node_expr,
    775                                      mkIRExpr_HWord( (HWord)ev2->inode ),
    776                                      mkIRExpr_HWord( (HWord)ev3->inode ) );
    777                regparms = 3;
    778                i += 3;
    779             }
    780             /* Merge an IrNoX with one following IrNoX. */
    781             else
    782             if (ev2 && ev2->tag == Ev_IrNoX) {
    783                if (clo_cache_sim) {
    784                   helperName = "log_2IrNoX_0D_cache_access";
    785                   helperAddr = &log_2IrNoX_0D_cache_access;
    786                } else {
    787                   helperName = "log_2Ir";
    788                   helperAddr = &log_2Ir;
    789                }
    790                argv = mkIRExprVec_2( i_node_expr,
    791                                      mkIRExpr_HWord( (HWord)ev2->inode ) );
    792                regparms = 2;
    793                i += 2;
    794             }
    795             /* No merging possible; emit as-is. */
    796             else {
    797                if (clo_cache_sim) {
    798                   helperName = "log_1IrNoX_0D_cache_access";
    799                   helperAddr = &log_1IrNoX_0D_cache_access;
    800                } else {
    801                   helperName = "log_1Ir";
    802                   helperAddr = &log_1Ir;
    803                }
    804                argv = mkIRExprVec_1( i_node_expr );
    805                regparms = 1;
    806                i++;
    807             }
    808             break;
    809          case Ev_IrGen:
    810             if (clo_cache_sim) {
    811 	       helperName = "log_1IrGen_0D_cache_access";
    812 	       helperAddr = &log_1IrGen_0D_cache_access;
    813 	    } else {
    814 	       helperName = "log_1Ir";
    815 	       helperAddr = &log_1Ir;
    816 	    }
    817 	    argv = mkIRExprVec_1( i_node_expr );
    818 	    regparms = 1;
    819 	    i++;
    820             break;
    821          case Ev_Dr:
    822          case Ev_Dm:
    823             /* Data read or modify */
    824             helperName = "log_0Ir_1Dr_cache_access";
    825             helperAddr = &log_0Ir_1Dr_cache_access;
    826             argv = mkIRExprVec_3( i_node_expr,
    827                                   get_Event_dea(ev),
    828                                   mkIRExpr_HWord( get_Event_dszB(ev) ) );
    829             regparms = 3;
    830             i++;
    831             break;
    832          case Ev_Dw:
    833             /* Data write */
    834             helperName = "log_0Ir_1Dw_cache_access";
    835             helperAddr = &log_0Ir_1Dw_cache_access;
    836             argv = mkIRExprVec_3( i_node_expr,
    837                                   get_Event_dea(ev),
    838                                   mkIRExpr_HWord( get_Event_dszB(ev) ) );
    839             regparms = 3;
    840             i++;
    841             break;
    842          case Ev_Bc:
    843             /* Conditional branch */
    844             helperName = "log_cond_branch";
    845             helperAddr = &log_cond_branch;
    846             argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
    847             regparms = 2;
    848             i++;
    849             break;
    850          case Ev_Bi:
    851             /* Branch to an unknown destination */
    852             helperName = "log_ind_branch";
    853             helperAddr = &log_ind_branch;
    854             argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
    855             regparms = 2;
    856             i++;
    857             break;
    858          default:
    859             tl_assert(0);
    860       }
    861 
    862       /* Add the helper. */
    863       tl_assert(helperName);
    864       tl_assert(helperAddr);
    865       tl_assert(argv);
    866       di = unsafeIRDirty_0_N( regparms,
    867                               helperName, VG_(fnptr_to_fnentry)( helperAddr ),
    868                               argv );
    869       addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
    870    }
    871 
    872    cgs->events_used = 0;
    873 }
    874 
    875 static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
    876 {
    877    Event* evt;
    878    if (cgs->events_used == N_EVENTS)
    879       flushEvents(cgs);
    880    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
    881    evt = &cgs->events[cgs->events_used];
    882    init_Event(evt);
    883    evt->inode    = inode;
    884    if (cachesim_is_IrNoX(inode->instr_addr, inode->instr_len)) {
    885       evt->tag = Ev_IrNoX;
    886       distinct_instrsNoX++;
    887    } else {
    888       evt->tag = Ev_IrGen;
    889       distinct_instrsGen++;
    890    }
    891    cgs->events_used++;
    892 }
    893 
    894 static
    895 void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
    896 {
    897    Event* evt;
    898    tl_assert(isIRAtom(ea));
    899    tl_assert(datasize >= 1 && datasize <= min_line_size);
    900    if (!clo_cache_sim)
    901       return;
    902    if (cgs->events_used == N_EVENTS)
    903       flushEvents(cgs);
    904    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
    905    evt = &cgs->events[cgs->events_used];
    906    init_Event(evt);
    907    evt->tag       = Ev_Dr;
    908    evt->inode     = inode;
    909    evt->Ev.Dr.szB = datasize;
    910    evt->Ev.Dr.ea  = ea;
    911    cgs->events_used++;
    912 }
    913 
    914 static
    915 void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
    916 {
    917    Event* lastEvt;
    918    Event* evt;
    919 
    920    tl_assert(isIRAtom(ea));
    921    tl_assert(datasize >= 1 && datasize <= min_line_size);
    922 
    923    if (!clo_cache_sim)
    924       return;
    925 
    926    /* Is it possible to merge this write with the preceding read? */
    927    lastEvt = &cgs->events[cgs->events_used-1];
    928    if (cgs->events_used > 0
    929        && lastEvt->tag       == Ev_Dr
    930        && lastEvt->Ev.Dr.szB == datasize
    931        && lastEvt->inode     == inode
    932        && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
    933    {
    934       lastEvt->tag   = Ev_Dm;
    935       return;
    936    }
    937 
    938    /* No.  Add as normal. */
    939    if (cgs->events_used == N_EVENTS)
    940       flushEvents(cgs);
    941    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
    942    evt = &cgs->events[cgs->events_used];
    943    init_Event(evt);
    944    evt->tag       = Ev_Dw;
    945    evt->inode     = inode;
    946    evt->Ev.Dw.szB = datasize;
    947    evt->Ev.Dw.ea  = ea;
    948    cgs->events_used++;
    949 }
    950 
    951 static
    952 void addEvent_D_guarded ( CgState* cgs, InstrInfo* inode,
    953                           Int datasize, IRAtom* ea, IRAtom* guard,
    954                           Bool isWrite )
    955 {
    956    tl_assert(isIRAtom(ea));
    957    tl_assert(guard);
    958    tl_assert(isIRAtom(guard));
    959    tl_assert(datasize >= 1 && datasize <= min_line_size);
    960 
    961    if (!clo_cache_sim)
    962       return;
    963 
    964    /* Adding guarded memory actions and merging them with the existing
    965       queue is too complex.  Simply flush the queue and add this
    966       action immediately.  Since guarded loads and stores are pretty
    967       rare, this is not thought likely to cause any noticeable
    968       performance loss as a result of the loss of event-merging
    969       opportunities. */
    970    tl_assert(cgs->events_used >= 0);
    971    flushEvents(cgs);
    972    tl_assert(cgs->events_used == 0);
    973    /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
    974    IRExpr*      i_node_expr;
    975    const HChar* helperName;
    976    void*        helperAddr;
    977    IRExpr**     argv;
    978    Int          regparms;
    979    IRDirty*     di;
    980    i_node_expr = mkIRExpr_HWord( (HWord)inode );
    981    helperName  = isWrite ? "log_0Ir_1Dw_cache_access"
    982                          : "log_0Ir_1Dr_cache_access";
    983    helperAddr  = isWrite ? &log_0Ir_1Dw_cache_access
    984                          : &log_0Ir_1Dr_cache_access;
    985    argv        = mkIRExprVec_3( i_node_expr,
    986                                 ea, mkIRExpr_HWord( datasize ) );
    987    regparms    = 3;
    988    di          = unsafeIRDirty_0_N(
    989                     regparms,
    990                     helperName, VG_(fnptr_to_fnentry)( helperAddr ),
    991                     argv );
    992    di->guard = guard;
    993    addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
    994 }
    995 
    996 
    997 static
    998 void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
    999 {
   1000    Event* evt;
   1001    tl_assert(isIRAtom(guard));
   1002    tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard)
   1003              == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
   1004    if (!clo_branch_sim)
   1005       return;
   1006    if (cgs->events_used == N_EVENTS)
   1007       flushEvents(cgs);
   1008    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
   1009    evt = &cgs->events[cgs->events_used];
   1010    init_Event(evt);
   1011    evt->tag         = Ev_Bc;
   1012    evt->inode       = inode;
   1013    evt->Ev.Bc.taken = guard;
   1014    cgs->events_used++;
   1015 }
   1016 
   1017 static
   1018 void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
   1019 {
   1020    Event* evt;
   1021    tl_assert(isIRAtom(whereTo));
   1022    tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo)
   1023              == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
   1024    if (!clo_branch_sim)
   1025       return;
   1026    if (cgs->events_used == N_EVENTS)
   1027       flushEvents(cgs);
   1028    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
   1029    evt = &cgs->events[cgs->events_used];
   1030    init_Event(evt);
   1031    evt->tag       = Ev_Bi;
   1032    evt->inode     = inode;
   1033    evt->Ev.Bi.dst = whereTo;
   1034    cgs->events_used++;
   1035 }
   1036 
   1037 ////////////////////////////////////////////////////////////
   1038 
   1039 
   1040 static
   1041 IRSB* cg_instrument ( VgCallbackClosure* closure,
   1042                       IRSB* sbIn,
   1043                       const VexGuestLayout* layout,
   1044                       const VexGuestExtents* vge,
   1045                       const VexArchInfo* archinfo_host,
   1046                       IRType gWordTy, IRType hWordTy )
   1047 {
   1048    Int        i;
   1049    UInt       isize;
   1050    IRStmt*    st;
   1051    Addr       cia; /* address of current insn */
   1052    CgState    cgs;
   1053    IRTypeEnv* tyenv = sbIn->tyenv;
   1054    InstrInfo* curr_inode = NULL;
   1055 
   1056    if (gWordTy != hWordTy) {
   1057       /* We don't currently support this case. */
   1058       VG_(tool_panic)("host/guest word size mismatch");
   1059    }
   1060 
   1061    // Set up new SB
   1062    cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
   1063 
   1064    // Copy verbatim any IR preamble preceding the first IMark
   1065    i = 0;
   1066    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
   1067       addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
   1068       i++;
   1069    }
   1070 
   1071    // Get the first statement, and initial cia from it
   1072    tl_assert(sbIn->stmts_used > 0);
   1073    tl_assert(i < sbIn->stmts_used);
   1074    st = sbIn->stmts[i];
   1075    tl_assert(Ist_IMark == st->tag);
   1076 
   1077    cia   = st->Ist.IMark.addr;
   1078    isize = st->Ist.IMark.len;
   1079    // If Vex fails to decode an instruction, the size will be zero.
   1080    // Pretend otherwise.
   1081    if (isize == 0) isize = VG_MIN_INSTR_SZB;
   1082 
   1083    // Set up running state and get block info
   1084    tl_assert(closure->readdr == vge->base[0]);
   1085    cgs.events_used = 0;
   1086    cgs.sbInfo      = get_SB_info(sbIn, (Addr)closure->readdr);
   1087    cgs.sbInfo_i    = 0;
   1088 
   1089    if (DEBUG_CG)
   1090       VG_(printf)("\n\n---------- cg_instrument ----------\n");
   1091 
   1092    // Traverse the block, initialising inodes, adding events and flushing as
   1093    // necessary.
   1094    for (/*use current i*/; i < sbIn->stmts_used; i++) {
   1095 
   1096       st = sbIn->stmts[i];
   1097       tl_assert(isFlatIRStmt(st));
   1098 
   1099       switch (st->tag) {
   1100          case Ist_NoOp:
   1101          case Ist_AbiHint:
   1102          case Ist_Put:
   1103          case Ist_PutI:
   1104          case Ist_MBE:
   1105             break;
   1106 
   1107          case Ist_IMark:
   1108             cia   = st->Ist.IMark.addr;
   1109             isize = st->Ist.IMark.len;
   1110 
   1111             // If Vex fails to decode an instruction, the size will be zero.
   1112             // Pretend otherwise.
   1113             if (isize == 0) isize = VG_MIN_INSTR_SZB;
   1114 
   1115             // Sanity-check size.
   1116             tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
   1117                      || VG_CLREQ_SZB == isize );
   1118 
   1119             // Get space for and init the inode, record it as the current one.
   1120             // Subsequent Dr/Dw/Dm events from the same instruction will
   1121             // also use it.
   1122             curr_inode = setup_InstrInfo(&cgs, cia, isize);
   1123 
   1124             addEvent_Ir( &cgs, curr_inode );
   1125             break;
   1126 
   1127          case Ist_WrTmp: {
   1128             IRExpr* data = st->Ist.WrTmp.data;
   1129             if (data->tag == Iex_Load) {
   1130                IRExpr* aexpr = data->Iex.Load.addr;
   1131                // Note also, endianness info is ignored.  I guess
   1132                // that's not interesting.
   1133                addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty),
   1134                                   aexpr );
   1135             }
   1136             break;
   1137          }
   1138 
   1139          case Ist_Store: {
   1140             IRExpr* data  = st->Ist.Store.data;
   1141             IRExpr* aexpr = st->Ist.Store.addr;
   1142             addEvent_Dw( &cgs, curr_inode,
   1143                          sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
   1144             break;
   1145          }
   1146 
   1147          case Ist_StoreG: {
   1148             IRStoreG* sg   = st->Ist.StoreG.details;
   1149             IRExpr*   data = sg->data;
   1150             IRExpr*   addr = sg->addr;
   1151             IRType    type = typeOfIRExpr(tyenv, data);
   1152             tl_assert(type != Ity_INVALID);
   1153             addEvent_D_guarded( &cgs, curr_inode,
   1154                                 sizeofIRType(type), addr, sg->guard,
   1155                                 True/*isWrite*/ );
   1156             break;
   1157          }
   1158 
   1159          case Ist_LoadG: {
   1160             IRLoadG* lg       = st->Ist.LoadG.details;
   1161             IRType   type     = Ity_INVALID; /* loaded type */
   1162             IRType   typeWide = Ity_INVALID; /* after implicit widening */
   1163             IRExpr*  addr     = lg->addr;
   1164             typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
   1165             tl_assert(type != Ity_INVALID);
   1166             addEvent_D_guarded( &cgs, curr_inode,
   1167                                 sizeofIRType(type), addr, lg->guard,
   1168                                 False/*!isWrite*/ );
   1169             break;
   1170          }
   1171 
   1172          case Ist_Dirty: {
   1173             Int      dataSize;
   1174             IRDirty* d = st->Ist.Dirty.details;
   1175             if (d->mFx != Ifx_None) {
   1176                /* This dirty helper accesses memory.  Collect the details. */
   1177                tl_assert(d->mAddr != NULL);
   1178                tl_assert(d->mSize != 0);
   1179                dataSize = d->mSize;
   1180                // Large (eg. 28B, 108B, 512B on x86) data-sized
   1181                // instructions will be done inaccurately, but they're
   1182                // very rare and this avoids errors from hitting more
   1183                // than two cache lines in the simulation.
   1184                if (dataSize > min_line_size)
   1185                   dataSize = min_line_size;
   1186                if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
   1187                   addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
   1188                if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
   1189                   addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
   1190             } else {
   1191                tl_assert(d->mAddr == NULL);
   1192                tl_assert(d->mSize == 0);
   1193             }
   1194             break;
   1195          }
   1196 
   1197          case Ist_CAS: {
   1198             /* We treat it as a read and a write of the location.  I
   1199                think that is the same behaviour as it was before IRCAS
   1200                was introduced, since prior to that point, the Vex
   1201                front ends would translate a lock-prefixed instruction
   1202                into a (normal) read followed by a (normal) write. */
   1203             Int    dataSize;
   1204             IRCAS* cas = st->Ist.CAS.details;
   1205             tl_assert(cas->addr != NULL);
   1206             tl_assert(cas->dataLo != NULL);
   1207             dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
   1208             if (cas->dataHi != NULL)
   1209                dataSize *= 2; /* since it's a doubleword-CAS */
   1210             /* I don't think this can ever happen, but play safe. */
   1211             if (dataSize > min_line_size)
   1212                dataSize = min_line_size;
   1213             addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
   1214             addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
   1215             break;
   1216          }
   1217 
   1218          case Ist_LLSC: {
   1219             IRType dataTy;
   1220             if (st->Ist.LLSC.storedata == NULL) {
   1221                /* LL */
   1222                dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
   1223                addEvent_Dr( &cgs, curr_inode,
   1224                             sizeofIRType(dataTy), st->Ist.LLSC.addr );
   1225                /* flush events before LL, should help SC to succeed */
   1226                flushEvents( &cgs );
   1227             } else {
   1228                /* SC */
   1229                dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
   1230                addEvent_Dw( &cgs, curr_inode,
   1231                             sizeofIRType(dataTy), st->Ist.LLSC.addr );
   1232             }
   1233             break;
   1234          }
   1235 
   1236          case Ist_Exit: {
   1237             // call branch predictor only if this is a branch in guest code
   1238             if ( (st->Ist.Exit.jk == Ijk_Boring) ||
   1239                  (st->Ist.Exit.jk == Ijk_Call) ||
   1240                  (st->Ist.Exit.jk == Ijk_Ret) )
   1241             {
   1242                /* Stuff to widen the guard expression to a host word, so
   1243                   we can pass it to the branch predictor simulation
   1244                   functions easily. */
   1245                Bool     inverted;
   1246                Addr     nia, sea;
   1247                IRConst* dst;
   1248                IRType   tyW    = hWordTy;
   1249                IROp     widen  = tyW==Ity_I32  ? Iop_1Uto32  : Iop_1Uto64;
   1250                IROp     opXOR  = tyW==Ity_I32  ? Iop_Xor32   : Iop_Xor64;
   1251                IRTemp   guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
   1252                IRTemp   guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
   1253                IRTemp   guard  = newIRTemp(cgs.sbOut->tyenv, tyW);
   1254                IRExpr*  one    = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
   1255                                               : IRExpr_Const(IRConst_U64(1));
   1256 
   1257                /* First we need to figure out whether the side exit got
   1258                   inverted by the ir optimiser.  To do that, figure out
   1259                   the next (fallthrough) instruction's address and the
   1260                   side exit address and see if they are the same. */
   1261                nia = cia + isize;
   1262 
   1263                /* Side exit address */
   1264                dst = st->Ist.Exit.dst;
   1265                if (tyW == Ity_I32) {
   1266                   tl_assert(dst->tag == Ico_U32);
   1267                   sea = dst->Ico.U32;
   1268                } else {
   1269                   tl_assert(tyW == Ity_I64);
   1270                   tl_assert(dst->tag == Ico_U64);
   1271                   sea = dst->Ico.U64;
   1272                }
   1273 
   1274                inverted = nia == sea;
   1275 
   1276                /* Widen the guard expression. */
   1277                addStmtToIRSB( cgs.sbOut,
   1278                               IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
   1279                addStmtToIRSB( cgs.sbOut,
   1280                               IRStmt_WrTmp( guardW,
   1281                                             IRExpr_Unop(widen,
   1282                                                         IRExpr_RdTmp(guard1))) );
   1283                /* If the exit is inverted, invert the sense of the guard. */
   1284                addStmtToIRSB(
   1285                      cgs.sbOut,
   1286                      IRStmt_WrTmp(
   1287                            guard,
   1288                            inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
   1289                                     : IRExpr_RdTmp(guardW)
   1290                               ));
   1291                /* And post the event. */
   1292                addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
   1293             }
   1294 
   1295             /* We may never reach the next statement, so need to flush
   1296                all outstanding transactions now. */
   1297             flushEvents( &cgs );
   1298             break;
   1299          }
   1300 
   1301          default:
   1302             ppIRStmt(st);
   1303             tl_assert(0);
   1304             break;
   1305       }
   1306 
   1307       /* Copy the original statement */
   1308       addStmtToIRSB( cgs.sbOut, st );
   1309 
   1310       if (DEBUG_CG) {
   1311          ppIRStmt(st);
   1312          VG_(printf)("\n");
   1313       }
   1314    }
   1315 
   1316    /* Deal with branches to unknown destinations.  Except ignore ones
   1317       which are function returns as we assume the return stack
   1318       predictor never mispredicts. */
   1319    if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
   1320       if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
   1321       switch (sbIn->next->tag) {
   1322          case Iex_Const:
   1323             break; /* boring - branch to known address */
   1324          case Iex_RdTmp:
   1325             /* looks like an indirect branch (branch to unknown) */
   1326             addEvent_Bi( &cgs, curr_inode, sbIn->next );
   1327             break;
   1328          default:
   1329             /* shouldn't happen - if the incoming IR is properly
   1330                flattened, should only have tmp and const cases to
   1331                consider. */
   1332             tl_assert(0);
   1333       }
   1334    }
   1335 
   1336    /* At the end of the bb.  Flush outstandings. */
   1337    flushEvents( &cgs );
   1338 
   1339    /* done.  stay sane ... */
   1340    tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
   1341 
   1342    if (DEBUG_CG) {
   1343       VG_(printf)( "goto {");
   1344       ppIRJumpKind(sbIn->jumpkind);
   1345       VG_(printf)( "} ");
   1346       ppIRExpr( sbIn->next );
   1347       VG_(printf)( "}\n");
   1348    }
   1349 
   1350    return cgs.sbOut;
   1351 }
   1352 
   1353 /*------------------------------------------------------------*/
   1354 /*--- Cache configuration                                  ---*/
   1355 /*------------------------------------------------------------*/
   1356 
   1357 static cache_t clo_I1_cache = UNDEFINED_CACHE;
   1358 static cache_t clo_D1_cache = UNDEFINED_CACHE;
   1359 static cache_t clo_LL_cache = UNDEFINED_CACHE;
   1360 
   1361 /*------------------------------------------------------------*/
   1362 /*--- cg_fini() and related function                       ---*/
   1363 /*------------------------------------------------------------*/
   1364 
   1365 // Total reads/writes/misses.  Calculated during CC traversal at the end.
   1366 // All auto-zeroed.
   1367 static CacheCC  Ir_total;
   1368 static CacheCC  Dr_total;
   1369 static CacheCC  Dw_total;
   1370 static BranchCC Bc_total;
   1371 static BranchCC Bi_total;
   1372 
   1373 static void fprint_CC_table_and_calc_totals(void)
   1374 {
   1375    Int     i;
   1376    VgFile  *fp;
   1377    HChar   *currFile = NULL;
   1378    const HChar *currFn = NULL;
   1379    LineCC* lineCC;
   1380 
   1381    // Setup output filename.  Nb: it's important to do this now, ie. as late
   1382    // as possible.  If we do it at start-up and the program forks and the
   1383    // output file format string contains a %p (pid) specifier, both the
   1384    // parent and child will incorrectly write to the same file;  this
   1385    // happened in 3.3.0.
   1386    HChar* cachegrind_out_file =
   1387       VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
   1388 
   1389    fp = VG_(fopen)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
   1390                                         VKI_S_IRUSR|VKI_S_IWUSR);
   1391    if (fp == NULL) {
   1392       // If the file can't be opened for whatever reason (conflict
   1393       // between multiple cachegrinded processes?), give up now.
   1394       VG_(umsg)("error: can't open cache simulation output file '%s'\n",
   1395                 cachegrind_out_file );
   1396       VG_(umsg)("       ... so simulation results will be missing.\n");
   1397       VG_(free)(cachegrind_out_file);
   1398       return;
   1399    } else {
   1400       VG_(free)(cachegrind_out_file);
   1401    }
   1402 
   1403    // "desc:" lines (giving I1/D1/LL cache configuration).  The spaces after
   1404    // the 2nd colon makes cg_annotate's output look nicer.
   1405    VG_(fprintf)(fp,  "desc: I1 cache:         %s\n"
   1406                      "desc: D1 cache:         %s\n"
   1407                      "desc: LL cache:         %s\n",
   1408                      I1.desc_line, D1.desc_line, LL.desc_line);
   1409 
   1410    // "cmd:" line
   1411    VG_(fprintf)(fp, "cmd: %s", VG_(args_the_exename));
   1412    for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
   1413       HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
   1414       VG_(fprintf)(fp, " %s", arg);
   1415    }
   1416    // "events:" line
   1417    if (clo_cache_sim && clo_branch_sim) {
   1418       VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
   1419                                   "Bc Bcm Bi Bim\n");
   1420    }
   1421    else if (clo_cache_sim && !clo_branch_sim) {
   1422       VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
   1423                                   "\n");
   1424    }
   1425    else if (!clo_cache_sim && clo_branch_sim) {
   1426       VG_(fprintf)(fp, "\nevents: Ir Bc Bcm Bi Bim\n");
   1427    }
   1428    else {
   1429       VG_(fprintf)(fp, "\nevents: Ir\n");
   1430    }
   1431 
   1432    // Traverse every lineCC
   1433    VG_(OSetGen_ResetIter)(CC_table);
   1434    while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
   1435       Bool just_hit_a_new_file = False;
   1436       // If we've hit a new file, print a "fl=" line.  Note that because
   1437       // each string is stored exactly once in the string table, we can use
   1438       // pointer comparison rather than strcmp() to test for equality, which
   1439       // is good because most of the time the comparisons are equal and so
   1440       // the whole strings would have to be checked.
   1441       if ( lineCC->loc.file != currFile ) {
   1442          currFile = lineCC->loc.file;
   1443          VG_(fprintf)(fp, "fl=%s\n", currFile);
   1444          distinct_files++;
   1445          just_hit_a_new_file = True;
   1446       }
   1447       // If we've hit a new function, print a "fn=" line.  We know to do
   1448       // this when the function name changes, and also every time we hit a
   1449       // new file (in which case the new function name might be the same as
   1450       // in the old file, hence the just_hit_a_new_file test).
   1451       if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
   1452          currFn = lineCC->loc.fn;
   1453          VG_(fprintf)(fp, "fn=%s\n", currFn);
   1454          distinct_fns++;
   1455       }
   1456 
   1457       // Print the LineCC
   1458       if (clo_cache_sim && clo_branch_sim) {
   1459          VG_(fprintf)(fp,  "%d %llu %llu %llu"
   1460                              " %llu %llu %llu"
   1461                              " %llu %llu %llu"
   1462                              " %llu %llu %llu %llu\n",
   1463                             lineCC->loc.line,
   1464                             lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
   1465                             lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
   1466                             lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL,
   1467                             lineCC->Bc.b, lineCC->Bc.mp,
   1468                             lineCC->Bi.b, lineCC->Bi.mp);
   1469       }
   1470       else if (clo_cache_sim && !clo_branch_sim) {
   1471          VG_(fprintf)(fp,  "%d %llu %llu %llu"
   1472                              " %llu %llu %llu"
   1473                              " %llu %llu %llu\n",
   1474                             lineCC->loc.line,
   1475                             lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
   1476                             lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
   1477                             lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL);
   1478       }
   1479       else if (!clo_cache_sim && clo_branch_sim) {
   1480          VG_(fprintf)(fp,  "%d %llu"
   1481                              " %llu %llu %llu %llu\n",
   1482                             lineCC->loc.line,
   1483                             lineCC->Ir.a,
   1484                             lineCC->Bc.b, lineCC->Bc.mp,
   1485                             lineCC->Bi.b, lineCC->Bi.mp);
   1486       }
   1487       else {
   1488          VG_(fprintf)(fp,  "%d %llu\n",
   1489                             lineCC->loc.line,
   1490                             lineCC->Ir.a);
   1491       }
   1492 
   1493       // Update summary stats
   1494       Ir_total.a  += lineCC->Ir.a;
   1495       Ir_total.m1 += lineCC->Ir.m1;
   1496       Ir_total.mL += lineCC->Ir.mL;
   1497       Dr_total.a  += lineCC->Dr.a;
   1498       Dr_total.m1 += lineCC->Dr.m1;
   1499       Dr_total.mL += lineCC->Dr.mL;
   1500       Dw_total.a  += lineCC->Dw.a;
   1501       Dw_total.m1 += lineCC->Dw.m1;
   1502       Dw_total.mL += lineCC->Dw.mL;
   1503       Bc_total.b  += lineCC->Bc.b;
   1504       Bc_total.mp += lineCC->Bc.mp;
   1505       Bi_total.b  += lineCC->Bi.b;
   1506       Bi_total.mp += lineCC->Bi.mp;
   1507 
   1508       distinct_lines++;
   1509    }
   1510 
   1511    // Summary stats must come after rest of table, since we calculate them
   1512    // during traversal.  */
   1513    if (clo_cache_sim && clo_branch_sim) {
   1514       VG_(fprintf)(fp,  "summary:"
   1515                         " %llu %llu %llu"
   1516                         " %llu %llu %llu"
   1517                         " %llu %llu %llu"
   1518                         " %llu %llu %llu %llu\n",
   1519                         Ir_total.a, Ir_total.m1, Ir_total.mL,
   1520                         Dr_total.a, Dr_total.m1, Dr_total.mL,
   1521                         Dw_total.a, Dw_total.m1, Dw_total.mL,
   1522                         Bc_total.b, Bc_total.mp,
   1523                         Bi_total.b, Bi_total.mp);
   1524    }
   1525    else if (clo_cache_sim && !clo_branch_sim) {
   1526       VG_(fprintf)(fp,  "summary:"
   1527                         " %llu %llu %llu"
   1528                         " %llu %llu %llu"
   1529                         " %llu %llu %llu\n",
   1530                         Ir_total.a, Ir_total.m1, Ir_total.mL,
   1531                         Dr_total.a, Dr_total.m1, Dr_total.mL,
   1532                         Dw_total.a, Dw_total.m1, Dw_total.mL);
   1533    }
   1534    else if (!clo_cache_sim && clo_branch_sim) {
   1535       VG_(fprintf)(fp,  "summary:"
   1536                         " %llu"
   1537                         " %llu %llu %llu %llu\n",
   1538                         Ir_total.a,
   1539                         Bc_total.b, Bc_total.mp,
   1540                         Bi_total.b, Bi_total.mp);
   1541    }
   1542    else {
   1543       VG_(fprintf)(fp, "summary:"
   1544                         " %llu\n",
   1545                         Ir_total.a);
   1546    }
   1547 
   1548    VG_(fclose)(fp);
   1549 }
   1550 
   1551 static UInt ULong_width(ULong n)
   1552 {
   1553    UInt w = 0;
   1554    while (n > 0) {
   1555       n = n / 10;
   1556       w++;
   1557    }
   1558    if (w == 0) w = 1;
   1559    return w + (w-1)/3;   // add space for commas
   1560 }
   1561 
   1562 static void cg_fini(Int exitcode)
   1563 {
   1564    static HChar fmt[128];   // OK; large enough
   1565 
   1566    CacheCC  D_total;
   1567    BranchCC B_total;
   1568    ULong LL_total_m, LL_total_mr, LL_total_mw,
   1569          LL_total, LL_total_r, LL_total_w;
   1570    Int l1, l2, l3;
   1571 
   1572    fprint_CC_table_and_calc_totals();
   1573 
   1574    if (VG_(clo_verbosity) == 0)
   1575       return;
   1576 
   1577    // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
   1578    #define CG_MAX(a, b)  ((a) >= (b) ? (a) : (b))
   1579 
   1580    /* I cache results.  Use the I_refs value to determine the first column
   1581     * width. */
   1582    l1 = ULong_width(Ir_total.a);
   1583    l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b));
   1584    l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b));
   1585 
   1586    /* Make format string, getting width right for numbers */
   1587    VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
   1588 
   1589    /* Always print this */
   1590    VG_(umsg)(fmt, "I   refs:     ", Ir_total.a);
   1591 
   1592    /* If cache profiling is enabled, show D access numbers and all
   1593       miss numbers */
   1594    if (clo_cache_sim) {
   1595       VG_(umsg)(fmt, "I1  misses:   ", Ir_total.m1);
   1596       VG_(umsg)(fmt, "LLi misses:   ", Ir_total.mL);
   1597 
   1598       if (0 == Ir_total.a) Ir_total.a = 1;
   1599       VG_(umsg)("I1  miss rate: %*.2f%%\n", l1,
   1600                 Ir_total.m1 * 100.0 / Ir_total.a);
   1601       VG_(umsg)("LLi miss rate: %*.2f%%\n", l1,
   1602                 Ir_total.mL * 100.0 / Ir_total.a);
   1603       VG_(umsg)("\n");
   1604 
   1605       /* D cache results.  Use the D_refs.rd and D_refs.wr values to
   1606        * determine the width of columns 2 & 3. */
   1607       D_total.a  = Dr_total.a  + Dw_total.a;
   1608       D_total.m1 = Dr_total.m1 + Dw_total.m1;
   1609       D_total.mL = Dr_total.mL + Dw_total.mL;
   1610 
   1611       /* Make format string, getting width right for numbers */
   1612       VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu rd   + %%,%dllu wr)\n",
   1613                         l1, l2, l3);
   1614 
   1615       VG_(umsg)(fmt, "D   refs:     ",
   1616                      D_total.a, Dr_total.a, Dw_total.a);
   1617       VG_(umsg)(fmt, "D1  misses:   ",
   1618                      D_total.m1, Dr_total.m1, Dw_total.m1);
   1619       VG_(umsg)(fmt, "LLd misses:   ",
   1620                      D_total.mL, Dr_total.mL, Dw_total.mL);
   1621 
   1622       if (0 == D_total.a)  D_total.a = 1;
   1623       if (0 == Dr_total.a) Dr_total.a = 1;
   1624       if (0 == Dw_total.a) Dw_total.a = 1;
   1625       VG_(umsg)("D1  miss rate: %*.1f%% (%*.1f%%     + %*.1f%%  )\n",
   1626                 l1, D_total.m1  * 100.0 / D_total.a,
   1627                 l2, Dr_total.m1 * 100.0 / Dr_total.a,
   1628                 l3, Dw_total.m1 * 100.0 / Dw_total.a);
   1629       VG_(umsg)("LLd miss rate: %*.1f%% (%*.1f%%     + %*.1f%%  )\n",
   1630                 l1, D_total.mL  * 100.0 / D_total.a,
   1631                 l2, Dr_total.mL * 100.0 / Dr_total.a,
   1632                 l3, Dw_total.mL * 100.0 / Dw_total.a);
   1633       VG_(umsg)("\n");
   1634 
   1635       /* LL overall results */
   1636 
   1637       LL_total   = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
   1638       LL_total_r = Dr_total.m1 + Ir_total.m1;
   1639       LL_total_w = Dw_total.m1;
   1640       VG_(umsg)(fmt, "LL refs:      ",
   1641                      LL_total, LL_total_r, LL_total_w);
   1642 
   1643       LL_total_m  = Dr_total.mL + Dw_total.mL + Ir_total.mL;
   1644       LL_total_mr = Dr_total.mL + Ir_total.mL;
   1645       LL_total_mw = Dw_total.mL;
   1646       VG_(umsg)(fmt, "LL misses:    ",
   1647                      LL_total_m, LL_total_mr, LL_total_mw);
   1648 
   1649       VG_(umsg)("LL miss rate:  %*.1f%% (%*.1f%%     + %*.1f%%  )\n",
   1650                 l1, LL_total_m  * 100.0 / (Ir_total.a + D_total.a),
   1651                 l2, LL_total_mr * 100.0 / (Ir_total.a + Dr_total.a),
   1652                 l3, LL_total_mw * 100.0 / Dw_total.a);
   1653    }
   1654 
   1655    /* If branch profiling is enabled, show branch overall results. */
   1656    if (clo_branch_sim) {
   1657       /* Make format string, getting width right for numbers */
   1658       VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu cond + %%,%dllu ind)\n",
   1659                         l1, l2, l3);
   1660 
   1661       if (0 == Bc_total.b)  Bc_total.b = 1;
   1662       if (0 == Bi_total.b)  Bi_total.b = 1;
   1663       B_total.b  = Bc_total.b  + Bi_total.b;
   1664       B_total.mp = Bc_total.mp + Bi_total.mp;
   1665 
   1666       VG_(umsg)("\n");
   1667       VG_(umsg)(fmt, "Branches:     ",
   1668                      B_total.b, Bc_total.b, Bi_total.b);
   1669 
   1670       VG_(umsg)(fmt, "Mispredicts:  ",
   1671                      B_total.mp, Bc_total.mp, Bi_total.mp);
   1672 
   1673       VG_(umsg)("Mispred rate:  %*.1f%% (%*.1f%%     + %*.1f%%   )\n",
   1674                 l1, B_total.mp  * 100.0 / B_total.b,
   1675                 l2, Bc_total.mp * 100.0 / Bc_total.b,
   1676                 l3, Bi_total.mp * 100.0 / Bi_total.b);
   1677    }
   1678 
   1679    // Various stats
   1680    if (VG_(clo_stats)) {
   1681       Int debug_lookups = full_debugs      + fn_debugs +
   1682                           file_line_debugs + no_debugs;
   1683 
   1684       VG_(dmsg)("\n");
   1685       VG_(dmsg)("cachegrind: distinct files     : %d\n", distinct_files);
   1686       VG_(dmsg)("cachegrind: distinct functions : %d\n", distinct_fns);
   1687       VG_(dmsg)("cachegrind: distinct lines     : %d\n", distinct_lines);
   1688       VG_(dmsg)("cachegrind: distinct instrs NoX: %d\n", distinct_instrsNoX);
   1689       VG_(dmsg)("cachegrind: distinct instrs Gen: %d\n", distinct_instrsGen);
   1690       VG_(dmsg)("cachegrind: debug lookups      : %d\n", debug_lookups);
   1691 
   1692       VG_(dmsg)("cachegrind: with full      info:%6.1f%% (%d)\n",
   1693                 full_debugs * 100.0 / debug_lookups, full_debugs);
   1694       VG_(dmsg)("cachegrind: with file/line info:%6.1f%% (%d)\n",
   1695                 file_line_debugs * 100.0 / debug_lookups, file_line_debugs);
   1696       VG_(dmsg)("cachegrind: with fn name   info:%6.1f%% (%d)\n",
   1697                 fn_debugs * 100.0 / debug_lookups, fn_debugs);
   1698       VG_(dmsg)("cachegrind: with zero      info:%6.1f%% (%d)\n",
   1699                 no_debugs * 100.0 / debug_lookups, no_debugs);
   1700 
   1701       VG_(dmsg)("cachegrind: string table size: %u\n",
   1702                 VG_(OSetGen_Size)(stringTable));
   1703       VG_(dmsg)("cachegrind: CC table size: %u\n",
   1704                 VG_(OSetGen_Size)(CC_table));
   1705       VG_(dmsg)("cachegrind: InstrInfo table size: %u\n",
   1706                 VG_(OSetGen_Size)(instrInfoTable));
   1707    }
   1708 }
   1709 
   1710 /*--------------------------------------------------------------------*/
   1711 /*--- Discarding BB info                                           ---*/
   1712 /*--------------------------------------------------------------------*/
   1713 
   1714 // Called when a translation is removed from the translation cache for
   1715 // any reason at all: to free up space, because the guest code was
   1716 // unmapped or modified, or for any arbitrary reason.
   1717 static
   1718 void cg_discard_superblock_info ( Addr orig_addr64, VexGuestExtents vge )
   1719 {
   1720    SB_info* sbInfo;
   1721    Addr     orig_addr = vge.base[0];
   1722 
   1723    tl_assert(vge.n_used > 0);
   1724 
   1725    if (DEBUG_CG)
   1726       VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
   1727                    (void*)orig_addr,
   1728                    (void*)vge.base[0], (ULong)vge.len[0]);
   1729 
   1730    // Get BB info, remove from table, free BB info.  Simple!  Note that we
   1731    // use orig_addr, not the first instruction address in vge.
   1732    sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
   1733    tl_assert(NULL != sbInfo);
   1734    VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
   1735 }
   1736 
   1737 /*--------------------------------------------------------------------*/
   1738 /*--- Command line processing                                      ---*/
   1739 /*--------------------------------------------------------------------*/
   1740 
   1741 static Bool cg_process_cmd_line_option(const HChar* arg)
   1742 {
   1743    if (VG_(str_clo_cache_opt)(arg,
   1744                               &clo_I1_cache,
   1745                               &clo_D1_cache,
   1746                               &clo_LL_cache)) {}
   1747 
   1748    else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
   1749    else if VG_BOOL_CLO(arg, "--cache-sim",  clo_cache_sim)  {}
   1750    else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
   1751    else
   1752       return False;
   1753 
   1754    return True;
   1755 }
   1756 
   1757 static void cg_print_usage(void)
   1758 {
   1759    VG_(print_cache_clo_opts)();
   1760    VG_(printf)(
   1761 "    --cache-sim=yes|no  [yes]        collect cache stats?\n"
   1762 "    --branch-sim=yes|no [no]         collect branch prediction stats?\n"
   1763 "    --cachegrind-out-file=<file>     output file name [cachegrind.out.%%p]\n"
   1764    );
   1765 }
   1766 
   1767 static void cg_print_debug_usage(void)
   1768 {
   1769    VG_(printf)(
   1770 "    (none)\n"
   1771    );
   1772 }
   1773 
   1774 /*--------------------------------------------------------------------*/
   1775 /*--- Setup                                                        ---*/
   1776 /*--------------------------------------------------------------------*/
   1777 
   1778 static void cg_post_clo_init(void); /* just below */
   1779 
   1780 static void cg_pre_clo_init(void)
   1781 {
   1782    VG_(details_name)            ("Cachegrind");
   1783    VG_(details_version)         (NULL);
   1784    VG_(details_description)     ("a cache and branch-prediction profiler");
   1785    VG_(details_copyright_author)(
   1786       "Copyright (C) 2002-2015, and GNU GPL'd, by Nicholas Nethercote et al.");
   1787    VG_(details_bug_reports_to)  (VG_BUGS_TO);
   1788    VG_(details_avg_translation_sizeB) ( 500 );
   1789 
   1790    VG_(clo_vex_control).iropt_register_updates_default
   1791       = VG_(clo_px_file_backed)
   1792       = VexRegUpdSpAtMemAccess; // overridable by the user.
   1793 
   1794    VG_(basic_tool_funcs)          (cg_post_clo_init,
   1795                                    cg_instrument,
   1796                                    cg_fini);
   1797 
   1798    VG_(needs_superblock_discards)(cg_discard_superblock_info);
   1799    VG_(needs_command_line_options)(cg_process_cmd_line_option,
   1800                                    cg_print_usage,
   1801                                    cg_print_debug_usage);
   1802 }
   1803 
   1804 static void cg_post_clo_init(void)
   1805 {
   1806    cache_t I1c, D1c, LLc;
   1807 
   1808    CC_table =
   1809       VG_(OSetGen_Create)(offsetof(LineCC, loc),
   1810                           cmp_CodeLoc_LineCC,
   1811                           VG_(malloc), "cg.main.cpci.1",
   1812                           VG_(free));
   1813    instrInfoTable =
   1814       VG_(OSetGen_Create)(/*keyOff*/0,
   1815                           NULL,
   1816                           VG_(malloc), "cg.main.cpci.2",
   1817                           VG_(free));
   1818    stringTable =
   1819       VG_(OSetGen_Create)(/*keyOff*/0,
   1820                           stringCmp,
   1821                           VG_(malloc), "cg.main.cpci.3",
   1822                           VG_(free));
   1823 
   1824    VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc,
   1825                                        &clo_I1_cache,
   1826                                        &clo_D1_cache,
   1827                                        &clo_LL_cache);
   1828 
   1829    // min_line_size is used to make sure that we never feed
   1830    // accesses to the simulator straddling more than two
   1831    // cache lines at any cache level
   1832    min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size;
   1833    min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size;
   1834 
   1835    Int largest_load_or_store_size
   1836       = VG_(machine_get_size_of_largest_guest_register)();
   1837    if (min_line_size < largest_load_or_store_size) {
   1838       /* We can't continue, because the cache simulation might
   1839          straddle more than 2 lines, and it will assert.  So let's
   1840          just stop before we start. */
   1841       VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n",
   1842                 (Int)min_line_size);
   1843       VG_(umsg)("  must be equal to or larger than the maximum register size (%d)\n",
   1844                 largest_load_or_store_size );
   1845       VG_(umsg)("  but it is not.  Exiting now.\n");
   1846       VG_(exit)(1);
   1847    }
   1848 
   1849    cachesim_initcaches(I1c, D1c, LLc);
   1850 }
   1851 
   1852 VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
   1853 
   1854 /*--------------------------------------------------------------------*/
   1855 /*--- end                                                          ---*/
   1856 /*--------------------------------------------------------------------*/
   1857 
   1858