Home | History | Annotate | Download | only in cachegrind
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Cachegrind: everything but the simulation itself.            ---*/
      4 /*---                                                    cg_main.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Cachegrind, a Valgrind tool for cache
      9    profiling programs.
     10 
     11    Copyright (C) 2002-2012 Nicholas Nethercote
     12       njn (at) valgrind.org
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #include "pub_tool_basics.h"
     33 #include "pub_tool_vki.h"
     34 #include "pub_tool_debuginfo.h"
     35 #include "pub_tool_libcbase.h"
     36 #include "pub_tool_libcassert.h"
     37 #include "pub_tool_libcfile.h"
     38 #include "pub_tool_libcprint.h"
     39 #include "pub_tool_libcproc.h"
     40 #include "pub_tool_machine.h"
     41 #include "pub_tool_mallocfree.h"
     42 #include "pub_tool_options.h"
     43 #include "pub_tool_oset.h"
     44 #include "pub_tool_tooliface.h"
     45 #include "pub_tool_xarray.h"
     46 #include "pub_tool_clientstate.h"
     47 #include "pub_tool_machine.h"      // VG_(fnptr_to_fnentry)
     48 
     49 #include "cg_arch.h"
     50 #include "cg_sim.c"
     51 #include "cg_branchpred.c"
     52 
     53 /*------------------------------------------------------------*/
     54 /*--- Constants                                            ---*/
     55 /*------------------------------------------------------------*/
     56 
     57 /* Set to 1 for very verbose debugging */
     58 #define DEBUG_CG 0
     59 
     60 #define MIN_LINE_SIZE         16
     61 #define FILE_LEN              VKI_PATH_MAX
     62 #define FN_LEN                256
     63 
     64 /*------------------------------------------------------------*/
     65 /*--- Options                                              ---*/
     66 /*------------------------------------------------------------*/
     67 
     68 static Bool  clo_cache_sim  = True;  /* do cache simulation? */
     69 static Bool  clo_branch_sim = False; /* do branch simulation? */
     70 static Char* clo_cachegrind_out_file = "cachegrind.out.%p";
     71 
     72 /*------------------------------------------------------------*/
     73 /*--- Cachesim configuration                               ---*/
     74 /*------------------------------------------------------------*/
     75 
     76 static Int min_line_size = 0; /* min of L1 and LL cache line sizes */
     77 
     78 /*------------------------------------------------------------*/
     79 /*--- Types and Data Structures                            ---*/
     80 /*------------------------------------------------------------*/
     81 
     82 typedef
     83    struct {
     84       ULong a;  /* total # memory accesses of this kind */
     85       ULong m1; /* misses in the first level cache */
     86       ULong mL; /* misses in the second level cache */
     87    }
     88    CacheCC;
     89 
     90 typedef
     91    struct {
     92       ULong b;  /* total # branches of this kind */
     93       ULong mp; /* number of branches mispredicted */
     94    }
     95    BranchCC;
     96 
     97 //------------------------------------------------------------
     98 // Primary data structure #1: CC table
     99 // - Holds the per-source-line hit/miss stats, grouped by file/function/line.
    100 // - an ordered set of CCs.  CC indexing done by file/function/line (as
    101 //   determined from the instrAddr).
    102 // - Traversed for dumping stats at end in file/func/line hierarchy.
    103 
    104 typedef struct {
    105    Char* file;
    106    Char* fn;
    107    Int   line;
    108 }
    109 CodeLoc;
    110 
    111 typedef struct {
    112    CodeLoc  loc; /* Source location that these counts pertain to */
    113    CacheCC  Ir;  /* Insn read counts */
    114    CacheCC  Dr;  /* Data read counts */
    115    CacheCC  Dw;  /* Data write/modify counts */
    116    BranchCC Bc;  /* Conditional branch counts */
    117    BranchCC Bi;  /* Indirect branch counts */
    118 } LineCC;
    119 
    120 // First compare file, then fn, then line.
    121 static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
    122 {
    123    Word res;
    124    CodeLoc* a = (CodeLoc*)vloc;
    125    CodeLoc* b = &(((LineCC*)vcc)->loc);
    126 
    127    res = VG_(strcmp)(a->file, b->file);
    128    if (0 != res)
    129       return res;
    130 
    131    res = VG_(strcmp)(a->fn, b->fn);
    132    if (0 != res)
    133       return res;
    134 
    135    return a->line - b->line;
    136 }
    137 
    138 static OSet* CC_table;
    139 
    140 //------------------------------------------------------------
    141 // Primary data structure #2: InstrInfo table
    142 // - Holds the cached info about each instr that is used for simulation.
    143 // - table(SB_start_addr, list(InstrInfo))
    144 // - For each SB, each InstrInfo in the list holds info about the
    145 //   instruction (instrLen, instrAddr, etc), plus a pointer to its line
    146 //   CC.  This node is what's passed to the simulation function.
    147 // - When SBs are discarded the relevant list(instr_details) is freed.
    148 
    149 typedef struct _InstrInfo InstrInfo;
    150 struct _InstrInfo {
    151    Addr    instr_addr;
    152    UChar   instr_len;
    153    LineCC* parent;         // parent line-CC
    154 };
    155 
    156 typedef struct _SB_info SB_info;
    157 struct _SB_info {
    158    Addr      SB_addr;      // key;  MUST BE FIRST
    159    Int       n_instrs;
    160    InstrInfo instrs[0];
    161 };
    162 
    163 static OSet* instrInfoTable;
    164 
    165 //------------------------------------------------------------
    166 // Secondary data structure: string table
    167 // - holds strings, avoiding dups
    168 // - used for filenames and function names, each of which will be
    169 //   pointed to by one or more CCs.
    170 // - it also allows equality checks just by pointer comparison, which
    171 //   is good when printing the output file at the end.
    172 
    173 static OSet* stringTable;
    174 
    175 //------------------------------------------------------------
    176 // Stats
    177 static Int  distinct_files      = 0;
    178 static Int  distinct_fns        = 0;
    179 static Int  distinct_lines      = 0;
    180 static Int  distinct_instrs     = 0;
    181 
    182 static Int  full_debugs         = 0;
    183 static Int  file_line_debugs    = 0;
    184 static Int  fn_debugs           = 0;
    185 static Int  no_debugs           = 0;
    186 
    187 /*------------------------------------------------------------*/
    188 /*--- String table operations                              ---*/
    189 /*------------------------------------------------------------*/
    190 
    191 static Word stringCmp( const void* key, const void* elem )
    192 {
    193    return VG_(strcmp)(*(Char**)key, *(Char**)elem);
    194 }
    195 
    196 // Get a permanent string;  either pull it out of the string table if it's
    197 // been encountered before, or dup it and put it into the string table.
    198 static Char* get_perm_string(Char* s)
    199 {
    200    Char** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
    201    if (s_ptr) {
    202       return *s_ptr;
    203    } else {
    204       Char** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(Char*));
    205       *s_node = VG_(strdup)("cg.main.gps.1", s);
    206       VG_(OSetGen_Insert)(stringTable, s_node);
    207       return *s_node;
    208    }
    209 }
    210 
    211 /*------------------------------------------------------------*/
    212 /*--- CC table operations                                  ---*/
    213 /*------------------------------------------------------------*/
    214 
    215 static void get_debug_info(Addr instr_addr, Char file[FILE_LEN],
    216                            Char fn[FN_LEN], Int* line)
    217 {
    218    Char dir[FILE_LEN];
    219    Bool found_dirname;
    220    Bool found_file_line = VG_(get_filename_linenum)(
    221                              instr_addr,
    222                              file, FILE_LEN,
    223                              dir,  FILE_LEN, &found_dirname,
    224                              line
    225                           );
    226    Bool found_fn        = VG_(get_fnname)(instr_addr, fn, FN_LEN);
    227 
    228    if (!found_file_line) {
    229       VG_(strcpy)(file, "???");
    230       *line = 0;
    231    }
    232    if (!found_fn) {
    233       VG_(strcpy)(fn,  "???");
    234    }
    235 
    236    if (found_dirname) {
    237       // +1 for the '/'.
    238       tl_assert(VG_(strlen)(dir) + VG_(strlen)(file) + 1 < FILE_LEN);
    239       VG_(strcat)(dir, "/");     // Append '/'
    240       VG_(strcat)(dir, file);    // Append file to dir
    241       VG_(strcpy)(file, dir);    // Move dir+file to file
    242    }
    243 
    244    if (found_file_line) {
    245       if (found_fn) full_debugs++;
    246       else          file_line_debugs++;
    247    } else {
    248       if (found_fn) fn_debugs++;
    249       else          no_debugs++;
    250    }
    251 }
    252 
    253 // Do a three step traversal: by file, then fn, then line.
    254 // Returns a pointer to the line CC, creates a new one if necessary.
    255 static LineCC* get_lineCC(Addr origAddr)
    256 {
    257    Char    file[FILE_LEN], fn[FN_LEN];
    258    Int     line;
    259    CodeLoc loc;
    260    LineCC* lineCC;
    261 
    262    get_debug_info(origAddr, file, fn, &line);
    263 
    264    loc.file = file;
    265    loc.fn   = fn;
    266    loc.line = line;
    267 
    268    lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
    269    if (!lineCC) {
    270       // Allocate and zero a new node.
    271       lineCC           = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
    272       lineCC->loc.file = get_perm_string(loc.file);
    273       lineCC->loc.fn   = get_perm_string(loc.fn);
    274       lineCC->loc.line = loc.line;
    275       lineCC->Ir.a     = 0;
    276       lineCC->Ir.m1    = 0;
    277       lineCC->Ir.mL    = 0;
    278       lineCC->Dr.a     = 0;
    279       lineCC->Dr.m1    = 0;
    280       lineCC->Dr.mL    = 0;
    281       lineCC->Dw.a     = 0;
    282       lineCC->Dw.m1    = 0;
    283       lineCC->Dw.mL    = 0;
    284       lineCC->Bc.b     = 0;
    285       lineCC->Bc.mp    = 0;
    286       lineCC->Bi.b     = 0;
    287       lineCC->Bi.mp    = 0;
    288       VG_(OSetGen_Insert)(CC_table, lineCC);
    289    }
    290 
    291    return lineCC;
    292 }
    293 
    294 /*------------------------------------------------------------*/
    295 /*--- Cache simulation functions                           ---*/
    296 /*------------------------------------------------------------*/
    297 
    298 // Only used with --cache-sim=no.
    299 static VG_REGPARM(1)
    300 void log_1I(InstrInfo* n)
    301 {
    302    n->parent->Ir.a++;
    303 }
    304 
    305 // Only used with --cache-sim=no.
    306 static VG_REGPARM(2)
    307 void log_2I(InstrInfo* n, InstrInfo* n2)
    308 {
    309    n->parent->Ir.a++;
    310    n2->parent->Ir.a++;
    311 }
    312 
    313 // Only used with --cache-sim=no.
    314 static VG_REGPARM(3)
    315 void log_3I(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
    316 {
    317    n->parent->Ir.a++;
    318    n2->parent->Ir.a++;
    319    n3->parent->Ir.a++;
    320 }
    321 
    322 static VG_REGPARM(1)
    323 void log_1I_0D_cache_access(InstrInfo* n)
    324 {
    325    //VG_(printf)("1I_0D :  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n",
    326    //             n, n->instr_addr, n->instr_len);
    327    cachesim_I1_doref(n->instr_addr, n->instr_len,
    328                      &n->parent->Ir.m1, &n->parent->Ir.mL);
    329    n->parent->Ir.a++;
    330 }
    331 
    332 static VG_REGPARM(2)
    333 void log_2I_0D_cache_access(InstrInfo* n, InstrInfo* n2)
    334 {
    335    //VG_(printf)("2I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
    336    //            "        CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
    337    //            n,  n->instr_addr,  n->instr_len,
    338    //            n2, n2->instr_addr, n2->instr_len);
    339    cachesim_I1_doref(n->instr_addr, n->instr_len,
    340                      &n->parent->Ir.m1, &n->parent->Ir.mL);
    341    n->parent->Ir.a++;
    342    cachesim_I1_doref(n2->instr_addr, n2->instr_len,
    343                      &n2->parent->Ir.m1, &n2->parent->Ir.mL);
    344    n2->parent->Ir.a++;
    345 }
    346 
    347 static VG_REGPARM(3)
    348 void log_3I_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
    349 {
    350    //VG_(printf)("3I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
    351    //            "        CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
    352    //            "        CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
    353    //            n,  n->instr_addr,  n->instr_len,
    354    //            n2, n2->instr_addr, n2->instr_len,
    355    //            n3, n3->instr_addr, n3->instr_len);
    356    cachesim_I1_doref(n->instr_addr, n->instr_len,
    357                      &n->parent->Ir.m1, &n->parent->Ir.mL);
    358    n->parent->Ir.a++;
    359    cachesim_I1_doref(n2->instr_addr, n2->instr_len,
    360                      &n2->parent->Ir.m1, &n2->parent->Ir.mL);
    361    n2->parent->Ir.a++;
    362    cachesim_I1_doref(n3->instr_addr, n3->instr_len,
    363                      &n3->parent->Ir.m1, &n3->parent->Ir.mL);
    364    n3->parent->Ir.a++;
    365 }
    366 
    367 static VG_REGPARM(3)
    368 void log_1I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
    369 {
    370    //VG_(printf)("1I_1Dr:  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n"
    371    //            "                               daddr=0x%010lx,  dsize=%lu\n",
    372    //            n, n->instr_addr, n->instr_len, data_addr, data_size);
    373    cachesim_I1_doref(n->instr_addr, n->instr_len,
    374                      &n->parent->Ir.m1, &n->parent->Ir.mL);
    375    n->parent->Ir.a++;
    376 
    377    cachesim_D1_doref(data_addr, data_size,
    378                      &n->parent->Dr.m1, &n->parent->Dr.mL);
    379    n->parent->Dr.a++;
    380 }
    381 
    382 static VG_REGPARM(3)
    383 void log_1I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
    384 {
    385    //VG_(printf)("1I_1Dw:  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n"
    386    //            "                               daddr=0x%010lx,  dsize=%lu\n",
    387    //            n, n->instr_addr, n->instr_len, data_addr, data_size);
    388    cachesim_I1_doref(n->instr_addr, n->instr_len,
    389                      &n->parent->Ir.m1, &n->parent->Ir.mL);
    390    n->parent->Ir.a++;
    391 
    392    cachesim_D1_doref(data_addr, data_size,
    393                      &n->parent->Dw.m1, &n->parent->Dw.mL);
    394    n->parent->Dw.a++;
    395 }
    396 
    397 static VG_REGPARM(3)
    398 void log_0I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
    399 {
    400    //VG_(printf)("0I_1Dr:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
    401    //            n, data_addr, data_size);
    402    cachesim_D1_doref(data_addr, data_size,
    403                      &n->parent->Dr.m1, &n->parent->Dr.mL);
    404    n->parent->Dr.a++;
    405 }
    406 
    407 static VG_REGPARM(3)
    408 void log_0I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
    409 {
    410    //VG_(printf)("0I_1Dw:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
    411    //            n, data_addr, data_size);
    412    cachesim_D1_doref(data_addr, data_size,
    413                      &n->parent->Dw.m1, &n->parent->Dw.mL);
    414    n->parent->Dw.a++;
    415 }
    416 
    417 /* For branches, we consult two different predictors, one which
    418    predicts taken/untaken for conditional branches, and the other
    419    which predicts the branch target address for indirect branches
    420    (jump-to-register style ones). */
    421 
    422 static VG_REGPARM(2)
    423 void log_cond_branch(InstrInfo* n, Word taken)
    424 {
    425    //VG_(printf)("cbrnch:  CCaddr=0x%010lx,  taken=0x%010lx\n",
    426    //             n, taken);
    427    n->parent->Bc.b++;
    428    n->parent->Bc.mp
    429       += (1 & do_cond_branch_predict(n->instr_addr, taken));
    430 }
    431 
    432 static VG_REGPARM(2)
    433 void log_ind_branch(InstrInfo* n, UWord actual_dst)
    434 {
    435    //VG_(printf)("ibrnch:  CCaddr=0x%010lx,    dst=0x%010lx\n",
    436    //             n, actual_dst);
    437    n->parent->Bi.b++;
    438    n->parent->Bi.mp
    439       += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
    440 }
    441 
    442 
    443 /*------------------------------------------------------------*/
    444 /*--- Instrumentation types and structures                 ---*/
    445 /*------------------------------------------------------------*/
    446 
    447 /* Maintain an ordered list of memory events which are outstanding, in
    448    the sense that no IR has yet been generated to do the relevant
    449    helper calls.  The BB is scanned top to bottom and memory events
    450    are added to the end of the list, merging with the most recent
    451    notified event where possible (Dw immediately following Dr and
    452    having the same size and EA can be merged).
    453 
    454    This merging is done so that for architectures which have
    455    load-op-store instructions (x86, amd64), the insn is treated as if
    456    it makes just one memory reference (a modify), rather than two (a
    457    read followed by a write at the same address).
    458 
    459    At various points the list will need to be flushed, that is, IR
    460    generated from it.  That must happen before any possible exit from
    461    the block (the end, or an IRStmt_Exit).  Flushing also takes place
    462    when there is no space to add a new event.
    463 
    464    If we require the simulation statistics to be up to date with
    465    respect to possible memory exceptions, then the list would have to
    466    be flushed before each memory reference.  That would however lose
    467    performance by inhibiting event-merging during flushing.
    468 
    469    Flushing the list consists of walking it start to end and emitting
    470    instrumentation IR for each event, in the order in which they
    471    appear.  It may be possible to emit a single call for two adjacent
    472    events in order to reduce the number of helper function calls made.
    473    For example, it could well be profitable to handle two adjacent Ir
    474    events with a single helper call.  */
    475 
    476 typedef
    477    IRExpr
    478    IRAtom;
    479 
    480 typedef
    481    enum {
    482       Ev_Ir,  // Instruction read
    483       Ev_Dr,  // Data read
    484       Ev_Dw,  // Data write
    485       Ev_Dm,  // Data modify (read then write)
    486       Ev_Bc,  // branch conditional
    487       Ev_Bi   // branch indirect (to unknown destination)
    488    }
    489    EventTag;
    490 
    491 typedef
    492    struct {
    493       EventTag   tag;
    494       InstrInfo* inode;
    495       union {
    496          struct {
    497          } Ir;
    498          struct {
    499             IRAtom* ea;
    500             Int     szB;
    501          } Dr;
    502          struct {
    503             IRAtom* ea;
    504             Int     szB;
    505          } Dw;
    506          struct {
    507             IRAtom* ea;
    508             Int     szB;
    509          } Dm;
    510          struct {
    511             IRAtom* taken; /* :: Ity_I1 */
    512          } Bc;
    513          struct {
    514             IRAtom* dst;
    515          } Bi;
    516       } Ev;
    517    }
    518    Event;
    519 
    520 static void init_Event ( Event* ev ) {
    521    VG_(memset)(ev, 0, sizeof(Event));
    522 }
    523 
    524 static IRAtom* get_Event_dea ( Event* ev ) {
    525    switch (ev->tag) {
    526       case Ev_Dr: return ev->Ev.Dr.ea;
    527       case Ev_Dw: return ev->Ev.Dw.ea;
    528       case Ev_Dm: return ev->Ev.Dm.ea;
    529       default:    tl_assert(0);
    530    }
    531 }
    532 
    533 static Int get_Event_dszB ( Event* ev ) {
    534    switch (ev->tag) {
    535       case Ev_Dr: return ev->Ev.Dr.szB;
    536       case Ev_Dw: return ev->Ev.Dw.szB;
    537       case Ev_Dm: return ev->Ev.Dm.szB;
    538       default:    tl_assert(0);
    539    }
    540 }
    541 
    542 
    543 /* Up to this many unnotified events are allowed.  Number is
    544    arbitrary.  Larger numbers allow more event merging to occur, but
    545    potentially induce more spilling due to extending live ranges of
    546    address temporaries. */
    547 #define N_EVENTS 16
    548 
    549 
    550 /* A struct which holds all the running state during instrumentation.
    551    Mostly to avoid passing loads of parameters everywhere. */
    552 typedef
    553    struct {
    554       /* The current outstanding-memory-event list. */
    555       Event events[N_EVENTS];
    556       Int   events_used;
    557 
    558       /* The array of InstrInfo bins for the BB. */
    559       SB_info* sbInfo;
    560 
    561       /* Number InstrInfo bins 'used' so far. */
    562       Int sbInfo_i;
    563 
    564       /* The output SB being constructed. */
    565       IRSB* sbOut;
    566    }
    567    CgState;
    568 
    569 
    570 /*------------------------------------------------------------*/
    571 /*--- Instrumentation main                                 ---*/
    572 /*------------------------------------------------------------*/
    573 
    574 // Note that origAddr is the real origAddr, not the address of the first
    575 // instruction in the block (they can be different due to redirection).
    576 static
    577 SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
    578 {
    579    Int      i, n_instrs;
    580    IRStmt*  st;
    581    SB_info* sbInfo;
    582 
    583    // Count number of original instrs in SB
    584    n_instrs = 0;
    585    for (i = 0; i < sbIn->stmts_used; i++) {
    586       st = sbIn->stmts[i];
    587       if (Ist_IMark == st->tag) n_instrs++;
    588    }
    589 
    590    // Check that we don't have an entry for this BB in the instr-info table.
    591    // If this assertion fails, there has been some screwup:  some
    592    // translations must have been discarded but Cachegrind hasn't discarded
    593    // the corresponding entries in the instr-info table.
    594    sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
    595    tl_assert(NULL == sbInfo);
    596 
    597    // BB never translated before (at this address, at least;  could have
    598    // been unloaded and then reloaded elsewhere in memory)
    599    sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
    600                                 sizeof(SB_info) + n_instrs*sizeof(InstrInfo));
    601    sbInfo->SB_addr  = origAddr;
    602    sbInfo->n_instrs = n_instrs;
    603    VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
    604    distinct_instrs++;
    605 
    606    return sbInfo;
    607 }
    608 
    609 
    610 static void showEvent ( Event* ev )
    611 {
    612    switch (ev->tag) {
    613       case Ev_Ir:
    614          VG_(printf)("Ir %p\n", ev->inode);
    615          break;
    616       case Ev_Dr:
    617          VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
    618          ppIRExpr(ev->Ev.Dr.ea);
    619          VG_(printf)("\n");
    620          break;
    621       case Ev_Dw:
    622          VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
    623          ppIRExpr(ev->Ev.Dw.ea);
    624          VG_(printf)("\n");
    625          break;
    626       case Ev_Dm:
    627          VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
    628          ppIRExpr(ev->Ev.Dm.ea);
    629          VG_(printf)("\n");
    630          break;
    631       case Ev_Bc:
    632          VG_(printf)("Bc %p   GA=", ev->inode);
    633          ppIRExpr(ev->Ev.Bc.taken);
    634          VG_(printf)("\n");
    635          break;
    636       case Ev_Bi:
    637          VG_(printf)("Bi %p  DST=", ev->inode);
    638          ppIRExpr(ev->Ev.Bi.dst);
    639          VG_(printf)("\n");
    640          break;
    641       default:
    642          tl_assert(0);
    643          break;
    644    }
    645 }
    646 
    647 // Reserve and initialise an InstrInfo for the first mention of a new insn.
    648 static
    649 InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
    650 {
    651    InstrInfo* i_node;
    652    tl_assert(cgs->sbInfo_i >= 0);
    653    tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
    654    i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
    655    i_node->instr_addr = instr_addr;
    656    i_node->instr_len  = instr_len;
    657    i_node->parent     = get_lineCC(instr_addr);
    658    cgs->sbInfo_i++;
    659    return i_node;
    660 }
    661 
    662 
    663 /* Generate code for all outstanding memory events, and mark the queue
    664    empty.  Code is generated into cgs->bbOut, and this activity
    665    'consumes' slots in cgs->sbInfo. */
    666 
    667 static void flushEvents ( CgState* cgs )
    668 {
    669    Int        i, regparms;
    670    Char*      helperName;
    671    void*      helperAddr;
    672    IRExpr**   argv;
    673    IRExpr*    i_node_expr;
    674    IRDirty*   di;
    675    Event*     ev;
    676    Event*     ev2;
    677    Event*     ev3;
    678 
    679    i = 0;
    680    while (i < cgs->events_used) {
    681 
    682       helperName = NULL;
    683       helperAddr = NULL;
    684       argv       = NULL;
    685       regparms   = 0;
    686 
    687       /* generate IR to notify event i and possibly the ones
    688          immediately following it. */
    689       tl_assert(i >= 0 && i < cgs->events_used);
    690 
    691       ev  = &cgs->events[i];
    692       ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
    693       ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
    694 
    695       if (DEBUG_CG) {
    696          VG_(printf)("   flush ");
    697          showEvent( ev );
    698       }
    699 
    700       i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
    701 
    702       /* Decide on helper fn to call and args to pass it, and advance
    703          i appropriately. */
    704       switch (ev->tag) {
    705          case Ev_Ir:
    706             /* Merge an Ir with a following Dr/Dm. */
    707             if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
    708                /* Why is this true?  It's because we're merging an Ir
    709                   with a following Dr or Dm.  The Ir derives from the
    710                   instruction's IMark and the Dr/Dm from data
    711                   references which follow it.  In short it holds
    712                   because each insn starts with an IMark, hence an
    713                   Ev_Ir, and so these Dr/Dm must pertain to the
    714                   immediately preceding Ir.  Same applies to analogous
    715                   assertions in the subsequent cases. */
    716                tl_assert(ev2->inode == ev->inode);
    717                helperName = "log_1I_1Dr_cache_access";
    718                helperAddr = &log_1I_1Dr_cache_access;
    719                argv = mkIRExprVec_3( i_node_expr,
    720                                      get_Event_dea(ev2),
    721                                      mkIRExpr_HWord( get_Event_dszB(ev2) ) );
    722                regparms = 3;
    723                i += 2;
    724             }
    725             /* Merge an Ir with a following Dw. */
    726             else
    727             if (ev2 && ev2->tag == Ev_Dw) {
    728                tl_assert(ev2->inode == ev->inode);
    729                helperName = "log_1I_1Dw_cache_access";
    730                helperAddr = &log_1I_1Dw_cache_access;
    731                argv = mkIRExprVec_3( i_node_expr,
    732                                      get_Event_dea(ev2),
    733                                      mkIRExpr_HWord( get_Event_dszB(ev2) ) );
    734                regparms = 3;
    735                i += 2;
    736             }
    737             /* Merge an Ir with two following Irs. */
    738             else
    739             if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir)
    740             {
    741                if (clo_cache_sim) {
    742                   helperName = "log_3I_0D_cache_access";
    743                   helperAddr = &log_3I_0D_cache_access;
    744                } else {
    745                   helperName = "log_3I";
    746                   helperAddr = &log_3I;
    747                }
    748                argv = mkIRExprVec_3( i_node_expr,
    749                                      mkIRExpr_HWord( (HWord)ev2->inode ),
    750                                      mkIRExpr_HWord( (HWord)ev3->inode ) );
    751                regparms = 3;
    752                i += 3;
    753             }
    754             /* Merge an Ir with one following Ir. */
    755             else
    756             if (ev2 && ev2->tag == Ev_Ir) {
    757                if (clo_cache_sim) {
    758                   helperName = "log_2I_0D_cache_access";
    759                   helperAddr = &log_2I_0D_cache_access;
    760                } else {
    761                   helperName = "log_2I";
    762                   helperAddr = &log_2I;
    763                }
    764                argv = mkIRExprVec_2( i_node_expr,
    765                                      mkIRExpr_HWord( (HWord)ev2->inode ) );
    766                regparms = 2;
    767                i += 2;
    768             }
    769             /* No merging possible; emit as-is. */
    770             else {
    771                if (clo_cache_sim) {
    772                   helperName = "log_1I_0D_cache_access";
    773                   helperAddr = &log_1I_0D_cache_access;
    774                } else {
    775                   helperName = "log_1I";
    776                   helperAddr = &log_1I;
    777                }
    778                argv = mkIRExprVec_1( i_node_expr );
    779                regparms = 1;
    780                i++;
    781             }
    782             break;
    783          case Ev_Dr:
    784          case Ev_Dm:
    785             /* Data read or modify */
    786             helperName = "log_0I_1Dr_cache_access";
    787             helperAddr = &log_0I_1Dr_cache_access;
    788             argv = mkIRExprVec_3( i_node_expr,
    789                                   get_Event_dea(ev),
    790                                   mkIRExpr_HWord( get_Event_dszB(ev) ) );
    791             regparms = 3;
    792             i++;
    793             break;
    794          case Ev_Dw:
    795             /* Data write */
    796             helperName = "log_0I_1Dw_cache_access";
    797             helperAddr = &log_0I_1Dw_cache_access;
    798             argv = mkIRExprVec_3( i_node_expr,
    799                                   get_Event_dea(ev),
    800                                   mkIRExpr_HWord( get_Event_dszB(ev) ) );
    801             regparms = 3;
    802             i++;
    803             break;
    804          case Ev_Bc:
    805             /* Conditional branch */
    806             helperName = "log_cond_branch";
    807             helperAddr = &log_cond_branch;
    808             argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
    809             regparms = 2;
    810             i++;
    811             break;
    812          case Ev_Bi:
    813             /* Branch to an unknown destination */
    814             helperName = "log_ind_branch";
    815             helperAddr = &log_ind_branch;
    816             argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
    817             regparms = 2;
    818             i++;
    819             break;
    820          default:
    821             tl_assert(0);
    822       }
    823 
    824       /* Add the helper. */
    825       tl_assert(helperName);
    826       tl_assert(helperAddr);
    827       tl_assert(argv);
    828       di = unsafeIRDirty_0_N( regparms,
    829                               helperName, VG_(fnptr_to_fnentry)( helperAddr ),
    830                               argv );
    831       addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
    832    }
    833 
    834    cgs->events_used = 0;
    835 }
    836 
    837 static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
    838 {
    839    Event* evt;
    840    if (cgs->events_used == N_EVENTS)
    841       flushEvents(cgs);
    842    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
    843    evt = &cgs->events[cgs->events_used];
    844    init_Event(evt);
    845    evt->tag      = Ev_Ir;
    846    evt->inode    = inode;
    847    cgs->events_used++;
    848 }
    849 
    850 static
    851 void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
    852 {
    853    Event* evt;
    854    tl_assert(isIRAtom(ea));
    855    tl_assert(datasize >= 1 && datasize <= min_line_size);
    856    if (!clo_cache_sim)
    857       return;
    858    if (cgs->events_used == N_EVENTS)
    859       flushEvents(cgs);
    860    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
    861    evt = &cgs->events[cgs->events_used];
    862    init_Event(evt);
    863    evt->tag       = Ev_Dr;
    864    evt->inode     = inode;
    865    evt->Ev.Dr.szB = datasize;
    866    evt->Ev.Dr.ea  = ea;
    867    cgs->events_used++;
    868 }
    869 
    870 static
    871 void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
    872 {
    873    Event* lastEvt;
    874    Event* evt;
    875 
    876    tl_assert(isIRAtom(ea));
    877    tl_assert(datasize >= 1 && datasize <= min_line_size);
    878 
    879    if (!clo_cache_sim)
    880       return;
    881 
    882    /* Is it possible to merge this write with the preceding read? */
    883    lastEvt = &cgs->events[cgs->events_used-1];
    884    if (cgs->events_used > 0
    885     && lastEvt->tag       == Ev_Dr
    886     && lastEvt->Ev.Dr.szB == datasize
    887     && lastEvt->inode     == inode
    888     && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
    889    {
    890       lastEvt->tag   = Ev_Dm;
    891       return;
    892    }
    893 
    894    /* No.  Add as normal. */
    895    if (cgs->events_used == N_EVENTS)
    896       flushEvents(cgs);
    897    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
    898    evt = &cgs->events[cgs->events_used];
    899    init_Event(evt);
    900    evt->tag       = Ev_Dw;
    901    evt->inode     = inode;
    902    evt->Ev.Dw.szB = datasize;
    903    evt->Ev.Dw.ea  = ea;
    904    cgs->events_used++;
    905 }
    906 
    907 static
    908 void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
    909 {
    910    Event* evt;
    911    tl_assert(isIRAtom(guard));
    912    tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard)
    913              == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
    914    if (!clo_branch_sim)
    915       return;
    916    if (cgs->events_used == N_EVENTS)
    917       flushEvents(cgs);
    918    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
    919    evt = &cgs->events[cgs->events_used];
    920    init_Event(evt);
    921    evt->tag         = Ev_Bc;
    922    evt->inode       = inode;
    923    evt->Ev.Bc.taken = guard;
    924    cgs->events_used++;
    925 }
    926 
    927 static
    928 void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
    929 {
    930    Event* evt;
    931    tl_assert(isIRAtom(whereTo));
    932    tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo)
    933              == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
    934    if (!clo_branch_sim)
    935       return;
    936    if (cgs->events_used == N_EVENTS)
    937       flushEvents(cgs);
    938    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
    939    evt = &cgs->events[cgs->events_used];
    940    init_Event(evt);
    941    evt->tag       = Ev_Bi;
    942    evt->inode     = inode;
    943    evt->Ev.Bi.dst = whereTo;
    944    cgs->events_used++;
    945 }
    946 
    947 ////////////////////////////////////////////////////////////
    948 
    949 
    950 static
    951 IRSB* cg_instrument ( VgCallbackClosure* closure,
    952                       IRSB* sbIn,
    953                       VexGuestLayout* layout,
    954                       VexGuestExtents* vge,
    955                       IRType gWordTy, IRType hWordTy )
    956 {
    957    Int        i, isize;
    958    IRStmt*    st;
    959    Addr64     cia; /* address of current insn */
    960    CgState    cgs;
    961    IRTypeEnv* tyenv = sbIn->tyenv;
    962    InstrInfo* curr_inode = NULL;
    963 
    964    if (gWordTy != hWordTy) {
    965       /* We don't currently support this case. */
    966       VG_(tool_panic)("host/guest word size mismatch");
    967    }
    968 
    969    // Set up new SB
    970    cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
    971 
    972    // Copy verbatim any IR preamble preceding the first IMark
    973    i = 0;
    974    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
    975       addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
    976       i++;
    977    }
    978 
    979    // Get the first statement, and initial cia from it
    980    tl_assert(sbIn->stmts_used > 0);
    981    tl_assert(i < sbIn->stmts_used);
    982    st = sbIn->stmts[i];
    983    tl_assert(Ist_IMark == st->tag);
    984 
    985    cia   = st->Ist.IMark.addr;
    986    isize = st->Ist.IMark.len;
    987    // If Vex fails to decode an instruction, the size will be zero.
    988    // Pretend otherwise.
    989    if (isize == 0) isize = VG_MIN_INSTR_SZB;
    990 
    991    // Set up running state and get block info
    992    tl_assert(closure->readdr == vge->base[0]);
    993    cgs.events_used = 0;
    994    cgs.sbInfo      = get_SB_info(sbIn, (Addr)closure->readdr);
    995    cgs.sbInfo_i    = 0;
    996 
    997    if (DEBUG_CG)
    998       VG_(printf)("\n\n---------- cg_instrument ----------\n");
    999 
   1000    // Traverse the block, initialising inodes, adding events and flushing as
   1001    // necessary.
   1002    for (/*use current i*/; i < sbIn->stmts_used; i++) {
   1003 
   1004       st = sbIn->stmts[i];
   1005       tl_assert(isFlatIRStmt(st));
   1006 
   1007       switch (st->tag) {
   1008          case Ist_NoOp:
   1009          case Ist_AbiHint:
   1010          case Ist_Put:
   1011          case Ist_PutI:
   1012          case Ist_MBE:
   1013             break;
   1014 
   1015          case Ist_IMark:
   1016             cia   = st->Ist.IMark.addr;
   1017             isize = st->Ist.IMark.len;
   1018 
   1019             // If Vex fails to decode an instruction, the size will be zero.
   1020             // Pretend otherwise.
   1021             if (isize == 0) isize = VG_MIN_INSTR_SZB;
   1022 
   1023             // Sanity-check size.
   1024             tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
   1025                      || VG_CLREQ_SZB == isize );
   1026 
   1027             // Get space for and init the inode, record it as the current one.
   1028             // Subsequent Dr/Dw/Dm events from the same instruction will
   1029             // also use it.
   1030             curr_inode = setup_InstrInfo(&cgs, cia, isize);
   1031 
   1032             addEvent_Ir( &cgs, curr_inode );
   1033             break;
   1034 
   1035          case Ist_WrTmp: {
   1036             IRExpr* data = st->Ist.WrTmp.data;
   1037             if (data->tag == Iex_Load) {
   1038                IRExpr* aexpr = data->Iex.Load.addr;
   1039                // Note also, endianness info is ignored.  I guess
   1040                // that's not interesting.
   1041                addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty),
   1042                                   aexpr );
   1043             }
   1044             break;
   1045          }
   1046 
   1047          case Ist_Store: {
   1048             IRExpr* data  = st->Ist.Store.data;
   1049             IRExpr* aexpr = st->Ist.Store.addr;
   1050             addEvent_Dw( &cgs, curr_inode,
   1051                          sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
   1052             break;
   1053          }
   1054 
   1055          case Ist_Dirty: {
   1056             Int      dataSize;
   1057             IRDirty* d = st->Ist.Dirty.details;
   1058             if (d->mFx != Ifx_None) {
   1059                /* This dirty helper accesses memory.  Collect the details. */
   1060                tl_assert(d->mAddr != NULL);
   1061                tl_assert(d->mSize != 0);
   1062                dataSize = d->mSize;
   1063                // Large (eg. 28B, 108B, 512B on x86) data-sized
   1064                // instructions will be done inaccurately, but they're
   1065                // very rare and this avoids errors from hitting more
   1066                // than two cache lines in the simulation.
   1067                if (dataSize > min_line_size)
   1068                   dataSize = min_line_size;
   1069                if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
   1070                   addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
   1071                if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
   1072                   addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
   1073             } else {
   1074                tl_assert(d->mAddr == NULL);
   1075                tl_assert(d->mSize == 0);
   1076             }
   1077             break;
   1078          }
   1079 
   1080          case Ist_CAS: {
   1081             /* We treat it as a read and a write of the location.  I
   1082                think that is the same behaviour as it was before IRCAS
   1083                was introduced, since prior to that point, the Vex
   1084                front ends would translate a lock-prefixed instruction
   1085                into a (normal) read followed by a (normal) write. */
   1086             Int    dataSize;
   1087             IRCAS* cas = st->Ist.CAS.details;
   1088             tl_assert(cas->addr != NULL);
   1089             tl_assert(cas->dataLo != NULL);
   1090             dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
   1091             if (cas->dataHi != NULL)
   1092                dataSize *= 2; /* since it's a doubleword-CAS */
   1093             /* I don't think this can ever happen, but play safe. */
   1094             if (dataSize > min_line_size)
   1095                dataSize = min_line_size;
   1096             addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
   1097             addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
   1098             break;
   1099          }
   1100 
   1101          case Ist_LLSC: {
   1102             IRType dataTy;
   1103             if (st->Ist.LLSC.storedata == NULL) {
   1104                /* LL */
   1105                dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
   1106                addEvent_Dr( &cgs, curr_inode,
   1107                             sizeofIRType(dataTy), st->Ist.LLSC.addr );
   1108             } else {
   1109                /* SC */
   1110                dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
   1111                addEvent_Dw( &cgs, curr_inode,
   1112                             sizeofIRType(dataTy), st->Ist.LLSC.addr );
   1113             }
   1114             break;
   1115          }
   1116 
   1117          case Ist_Exit: {
   1118             // call branch predictor only if this is a branch in guest code
   1119             if ( (st->Ist.Exit.jk == Ijk_Boring) ||
   1120                  (st->Ist.Exit.jk == Ijk_Call) ||
   1121                  (st->Ist.Exit.jk == Ijk_Ret) )
   1122             {
   1123                /* Stuff to widen the guard expression to a host word, so
   1124                   we can pass it to the branch predictor simulation
   1125                   functions easily. */
   1126                Bool     inverted;
   1127                Addr64   nia, sea;
   1128                IRConst* dst;
   1129                IRType   tyW    = hWordTy;
   1130                IROp     widen  = tyW==Ity_I32  ? Iop_1Uto32  : Iop_1Uto64;
   1131                IROp     opXOR  = tyW==Ity_I32  ? Iop_Xor32   : Iop_Xor64;
   1132                IRTemp   guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
   1133                IRTemp   guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
   1134                IRTemp   guard  = newIRTemp(cgs.sbOut->tyenv, tyW);
   1135                IRExpr*  one    = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
   1136                                               : IRExpr_Const(IRConst_U64(1));
   1137 
   1138                /* First we need to figure out whether the side exit got
   1139                   inverted by the ir optimiser.  To do that, figure out
   1140                   the next (fallthrough) instruction's address and the
   1141                   side exit address and see if they are the same. */
   1142                nia = cia + (Addr64)isize;
   1143                if (tyW == Ity_I32)
   1144                   nia &= 0xFFFFFFFFULL;
   1145 
   1146                /* Side exit address */
   1147                dst = st->Ist.Exit.dst;
   1148                if (tyW == Ity_I32) {
   1149                   tl_assert(dst->tag == Ico_U32);
   1150                   sea = (Addr64)(UInt)dst->Ico.U32;
   1151                } else {
   1152                   tl_assert(tyW == Ity_I64);
   1153                   tl_assert(dst->tag == Ico_U64);
   1154                   sea = dst->Ico.U64;
   1155                }
   1156 
   1157                inverted = nia == sea;
   1158 
   1159                /* Widen the guard expression. */
   1160                addStmtToIRSB( cgs.sbOut,
   1161                               IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
   1162                addStmtToIRSB( cgs.sbOut,
   1163                               IRStmt_WrTmp( guardW,
   1164                                             IRExpr_Unop(widen,
   1165                                                         IRExpr_RdTmp(guard1))) );
   1166                /* If the exit is inverted, invert the sense of the guard. */
   1167                addStmtToIRSB(
   1168                      cgs.sbOut,
   1169                      IRStmt_WrTmp(
   1170                            guard,
   1171                            inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
   1172                                     : IRExpr_RdTmp(guardW)
   1173                               ));
   1174                /* And post the event. */
   1175                addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
   1176             }
   1177 
   1178             /* We may never reach the next statement, so need to flush
   1179                all outstanding transactions now. */
   1180             flushEvents( &cgs );
   1181             break;
   1182          }
   1183 
   1184          default:
   1185             tl_assert(0);
   1186             break;
   1187       }
   1188 
   1189       /* Copy the original statement */
   1190       addStmtToIRSB( cgs.sbOut, st );
   1191 
   1192       if (DEBUG_CG) {
   1193          ppIRStmt(st);
   1194          VG_(printf)("\n");
   1195       }
   1196    }
   1197 
   1198    /* Deal with branches to unknown destinations.  Except ignore ones
   1199       which are function returns as we assume the return stack
   1200       predictor never mispredicts. */
   1201    if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
   1202       if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
   1203       switch (sbIn->next->tag) {
   1204          case Iex_Const:
   1205             break; /* boring - branch to known address */
   1206          case Iex_RdTmp:
   1207             /* looks like an indirect branch (branch to unknown) */
   1208             addEvent_Bi( &cgs, curr_inode, sbIn->next );
   1209             break;
   1210          default:
   1211             /* shouldn't happen - if the incoming IR is properly
   1212                flattened, should only have tmp and const cases to
   1213                consider. */
   1214             tl_assert(0);
   1215       }
   1216    }
   1217 
   1218    /* At the end of the bb.  Flush outstandings. */
   1219    flushEvents( &cgs );
   1220 
   1221    /* done.  stay sane ... */
   1222    tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
   1223 
   1224    if (DEBUG_CG) {
   1225       VG_(printf)( "goto {");
   1226       ppIRJumpKind(sbIn->jumpkind);
   1227       VG_(printf)( "} ");
   1228       ppIRExpr( sbIn->next );
   1229       VG_(printf)( "}\n");
   1230    }
   1231 
   1232    return cgs.sbOut;
   1233 }
   1234 
   1235 /*------------------------------------------------------------*/
   1236 /*--- Cache configuration                                  ---*/
   1237 /*------------------------------------------------------------*/
   1238 
   1239 #define UNDEFINED_CACHE     { -1, -1, -1 }
   1240 
   1241 static cache_t clo_I1_cache = UNDEFINED_CACHE;
   1242 static cache_t clo_D1_cache = UNDEFINED_CACHE;
   1243 static cache_t clo_LL_cache = UNDEFINED_CACHE;
   1244 
   1245 /*------------------------------------------------------------*/
   1246 /*--- cg_fini() and related function                       ---*/
   1247 /*------------------------------------------------------------*/
   1248 
   1249 // Total reads/writes/misses.  Calculated during CC traversal at the end.
   1250 // All auto-zeroed.
   1251 static CacheCC  Ir_total;
   1252 static CacheCC  Dr_total;
   1253 static CacheCC  Dw_total;
   1254 static BranchCC Bc_total;
   1255 static BranchCC Bi_total;
   1256 
   1257 static void fprint_CC_table_and_calc_totals(void)
   1258 {
   1259    Int     i, fd;
   1260    SysRes  sres;
   1261    Char    buf[512], *currFile = NULL, *currFn = NULL;
   1262    LineCC* lineCC;
   1263 
   1264    // Setup output filename.  Nb: it's important to do this now, ie. as late
   1265    // as possible.  If we do it at start-up and the program forks and the
   1266    // output file format string contains a %p (pid) specifier, both the
   1267    // parent and child will incorrectly write to the same file;  this
   1268    // happened in 3.3.0.
   1269    Char* cachegrind_out_file =
   1270       VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
   1271 
   1272    sres = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
   1273                                          VKI_S_IRUSR|VKI_S_IWUSR);
   1274    if (sr_isError(sres)) {
   1275       // If the file can't be opened for whatever reason (conflict
   1276       // between multiple cachegrinded processes?), give up now.
   1277       VG_(umsg)("error: can't open cache simulation output file '%s'\n",
   1278                 cachegrind_out_file );
   1279       VG_(umsg)("       ... so simulation results will be missing.\n");
   1280       VG_(free)(cachegrind_out_file);
   1281       return;
   1282    } else {
   1283       fd = sr_Res(sres);
   1284       VG_(free)(cachegrind_out_file);
   1285    }
   1286 
   1287    // "desc:" lines (giving I1/D1/LL cache configuration).  The spaces after
   1288    // the 2nd colon makes cg_annotate's output look nicer.
   1289    VG_(sprintf)(buf, "desc: I1 cache:         %s\n"
   1290                      "desc: D1 cache:         %s\n"
   1291                      "desc: LL cache:         %s\n",
   1292                      I1.desc_line, D1.desc_line, LL.desc_line);
   1293    VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
   1294 
   1295    // "cmd:" line
   1296    VG_(strcpy)(buf, "cmd:");
   1297    VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
   1298    if (VG_(args_the_exename)) {
   1299       VG_(write)(fd, " ", 1);
   1300       VG_(write)(fd, VG_(args_the_exename),
   1301                      VG_(strlen)( VG_(args_the_exename) ));
   1302    }
   1303    for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
   1304       HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
   1305       if (arg) {
   1306          VG_(write)(fd, " ", 1);
   1307          VG_(write)(fd, arg, VG_(strlen)( arg ));
   1308       }
   1309    }
   1310    // "events:" line
   1311    if (clo_cache_sim && clo_branch_sim) {
   1312       VG_(sprintf)(buf, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
   1313                                   "Bc Bcm Bi Bim\n");
   1314    }
   1315    else if (clo_cache_sim && !clo_branch_sim) {
   1316       VG_(sprintf)(buf, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
   1317                                   "\n");
   1318    }
   1319    else if (!clo_cache_sim && clo_branch_sim) {
   1320       VG_(sprintf)(buf, "\nevents: Ir "
   1321                                   "Bc Bcm Bi Bim\n");
   1322    }
   1323    else {
   1324       VG_(sprintf)(buf, "\nevents: Ir\n");
   1325    }
   1326 
   1327    VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
   1328 
   1329    // Traverse every lineCC
   1330    VG_(OSetGen_ResetIter)(CC_table);
   1331    while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
   1332       Bool just_hit_a_new_file = False;
   1333       // If we've hit a new file, print a "fl=" line.  Note that because
   1334       // each string is stored exactly once in the string table, we can use
   1335       // pointer comparison rather than strcmp() to test for equality, which
   1336       // is good because most of the time the comparisons are equal and so
   1337       // the whole strings would have to be checked.
   1338       if ( lineCC->loc.file != currFile ) {
   1339          currFile = lineCC->loc.file;
   1340          VG_(sprintf)(buf, "fl=%s\n", currFile);
   1341          VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
   1342          distinct_files++;
   1343          just_hit_a_new_file = True;
   1344       }
   1345       // If we've hit a new function, print a "fn=" line.  We know to do
   1346       // this when the function name changes, and also every time we hit a
   1347       // new file (in which case the new function name might be the same as
   1348       // in the old file, hence the just_hit_a_new_file test).
   1349       if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
   1350          currFn = lineCC->loc.fn;
   1351          VG_(sprintf)(buf, "fn=%s\n", currFn);
   1352          VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
   1353          distinct_fns++;
   1354       }
   1355 
   1356       // Print the LineCC
   1357       if (clo_cache_sim && clo_branch_sim) {
   1358          VG_(sprintf)(buf, "%u %llu %llu %llu"
   1359                              " %llu %llu %llu"
   1360                              " %llu %llu %llu"
   1361                              " %llu %llu %llu %llu\n",
   1362                             lineCC->loc.line,
   1363                             lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
   1364                             lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
   1365                             lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL,
   1366                             lineCC->Bc.b, lineCC->Bc.mp,
   1367                             lineCC->Bi.b, lineCC->Bi.mp);
   1368       }
   1369       else if (clo_cache_sim && !clo_branch_sim) {
   1370          VG_(sprintf)(buf, "%u %llu %llu %llu"
   1371                              " %llu %llu %llu"
   1372                              " %llu %llu %llu\n",
   1373                             lineCC->loc.line,
   1374                             lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
   1375                             lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
   1376                             lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL);
   1377       }
   1378       else if (!clo_cache_sim && clo_branch_sim) {
   1379          VG_(sprintf)(buf, "%u %llu"
   1380                              " %llu %llu %llu %llu\n",
   1381                             lineCC->loc.line,
   1382                             lineCC->Ir.a,
   1383                             lineCC->Bc.b, lineCC->Bc.mp,
   1384                             lineCC->Bi.b, lineCC->Bi.mp);
   1385       }
   1386       else {
   1387          VG_(sprintf)(buf, "%u %llu\n",
   1388                             lineCC->loc.line,
   1389                             lineCC->Ir.a);
   1390       }
   1391 
   1392       VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
   1393 
   1394       // Update summary stats
   1395       Ir_total.a  += lineCC->Ir.a;
   1396       Ir_total.m1 += lineCC->Ir.m1;
   1397       Ir_total.mL += lineCC->Ir.mL;
   1398       Dr_total.a  += lineCC->Dr.a;
   1399       Dr_total.m1 += lineCC->Dr.m1;
   1400       Dr_total.mL += lineCC->Dr.mL;
   1401       Dw_total.a  += lineCC->Dw.a;
   1402       Dw_total.m1 += lineCC->Dw.m1;
   1403       Dw_total.mL += lineCC->Dw.mL;
   1404       Bc_total.b  += lineCC->Bc.b;
   1405       Bc_total.mp += lineCC->Bc.mp;
   1406       Bi_total.b  += lineCC->Bi.b;
   1407       Bi_total.mp += lineCC->Bi.mp;
   1408 
   1409       distinct_lines++;
   1410    }
   1411 
   1412    // Summary stats must come after rest of table, since we calculate them
   1413    // during traversal.  */
   1414    if (clo_cache_sim && clo_branch_sim) {
   1415       VG_(sprintf)(buf, "summary:"
   1416                         " %llu %llu %llu"
   1417                         " %llu %llu %llu"
   1418                         " %llu %llu %llu"
   1419                         " %llu %llu %llu %llu\n",
   1420                         Ir_total.a, Ir_total.m1, Ir_total.mL,
   1421                         Dr_total.a, Dr_total.m1, Dr_total.mL,
   1422                         Dw_total.a, Dw_total.m1, Dw_total.mL,
   1423                         Bc_total.b, Bc_total.mp,
   1424                         Bi_total.b, Bi_total.mp);
   1425    }
   1426    else if (clo_cache_sim && !clo_branch_sim) {
   1427       VG_(sprintf)(buf, "summary:"
   1428                         " %llu %llu %llu"
   1429                         " %llu %llu %llu"
   1430                         " %llu %llu %llu\n",
   1431                         Ir_total.a, Ir_total.m1, Ir_total.mL,
   1432                         Dr_total.a, Dr_total.m1, Dr_total.mL,
   1433                         Dw_total.a, Dw_total.m1, Dw_total.mL);
   1434    }
   1435    else if (!clo_cache_sim && clo_branch_sim) {
   1436       VG_(sprintf)(buf, "summary:"
   1437                         " %llu"
   1438                         " %llu %llu %llu %llu\n",
   1439                         Ir_total.a,
   1440                         Bc_total.b, Bc_total.mp,
   1441                         Bi_total.b, Bi_total.mp);
   1442    }
   1443    else {
   1444       VG_(sprintf)(buf, "summary:"
   1445                         " %llu\n",
   1446                         Ir_total.a);
   1447    }
   1448 
   1449    VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
   1450    VG_(close)(fd);
   1451 }
   1452 
   1453 static UInt ULong_width(ULong n)
   1454 {
   1455    UInt w = 0;
   1456    while (n > 0) {
   1457       n = n / 10;
   1458       w++;
   1459    }
   1460    if (w == 0) w = 1;
   1461    return w + (w-1)/3;   // add space for commas
   1462 }
   1463 
   1464 static void cg_fini(Int exitcode)
   1465 {
   1466    static Char buf1[128], buf2[128], buf3[128], buf4[123], fmt[128];
   1467 
   1468    CacheCC  D_total;
   1469    BranchCC B_total;
   1470    ULong LL_total_m, LL_total_mr, LL_total_mw,
   1471          LL_total, LL_total_r, LL_total_w;
   1472    Int l1, l2, l3;
   1473 
   1474    fprint_CC_table_and_calc_totals();
   1475 
   1476    if (VG_(clo_verbosity) == 0)
   1477       return;
   1478 
   1479    // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
   1480    #define CG_MAX(a, b)  ((a) >= (b) ? (a) : (b))
   1481 
   1482    /* I cache results.  Use the I_refs value to determine the first column
   1483     * width. */
   1484    l1 = ULong_width(Ir_total.a);
   1485    l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b));
   1486    l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b));
   1487 
   1488    /* Make format string, getting width right for numbers */
   1489    VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
   1490 
   1491    /* Always print this */
   1492    VG_(umsg)(fmt, "I   refs:     ", Ir_total.a);
   1493 
   1494    /* If cache profiling is enabled, show D access numbers and all
   1495       miss numbers */
   1496    if (clo_cache_sim) {
   1497       VG_(umsg)(fmt, "I1  misses:   ", Ir_total.m1);
   1498       VG_(umsg)(fmt, "LLi misses:   ", Ir_total.mL);
   1499 
   1500       if (0 == Ir_total.a) Ir_total.a = 1;
   1501       VG_(percentify)(Ir_total.m1, Ir_total.a, 2, l1+1, buf1);
   1502       VG_(umsg)("I1  miss rate: %s\n", buf1);
   1503 
   1504       VG_(percentify)(Ir_total.mL, Ir_total.a, 2, l1+1, buf1);
   1505       VG_(umsg)("LLi miss rate: %s\n", buf1);
   1506       VG_(umsg)("\n");
   1507 
   1508       /* D cache results.  Use the D_refs.rd and D_refs.wr values to
   1509        * determine the width of columns 2 & 3. */
   1510       D_total.a  = Dr_total.a  + Dw_total.a;
   1511       D_total.m1 = Dr_total.m1 + Dw_total.m1;
   1512       D_total.mL = Dr_total.mL + Dw_total.mL;
   1513 
   1514       /* Make format string, getting width right for numbers */
   1515       VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu rd   + %%,%dllu wr)\n",
   1516                         l1, l2, l3);
   1517 
   1518       VG_(umsg)(fmt, "D   refs:     ",
   1519                      D_total.a, Dr_total.a, Dw_total.a);
   1520       VG_(umsg)(fmt, "D1  misses:   ",
   1521                      D_total.m1, Dr_total.m1, Dw_total.m1);
   1522       VG_(umsg)(fmt, "LLd misses:   ",
   1523                      D_total.mL, Dr_total.mL, Dw_total.mL);
   1524 
   1525       if (0 == D_total.a)  D_total.a = 1;
   1526       if (0 == Dr_total.a) Dr_total.a = 1;
   1527       if (0 == Dw_total.a) Dw_total.a = 1;
   1528       VG_(percentify)( D_total.m1,  D_total.a, 1, l1+1, buf1);
   1529       VG_(percentify)(Dr_total.m1, Dr_total.a, 1, l2+1, buf2);
   1530       VG_(percentify)(Dw_total.m1, Dw_total.a, 1, l3+1, buf3);
   1531       VG_(umsg)("D1  miss rate: %s (%s     + %s  )\n", buf1, buf2,buf3);
   1532 
   1533       VG_(percentify)( D_total.mL,  D_total.a, 1, l1+1, buf1);
   1534       VG_(percentify)(Dr_total.mL, Dr_total.a, 1, l2+1, buf2);
   1535       VG_(percentify)(Dw_total.mL, Dw_total.a, 1, l3+1, buf3);
   1536       VG_(umsg)("LLd miss rate: %s (%s     + %s  )\n", buf1, buf2,buf3);
   1537       VG_(umsg)("\n");
   1538 
   1539       /* LL overall results */
   1540 
   1541       LL_total   = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
   1542       LL_total_r = Dr_total.m1 + Ir_total.m1;
   1543       LL_total_w = Dw_total.m1;
   1544       VG_(umsg)(fmt, "LL refs:      ",
   1545                      LL_total, LL_total_r, LL_total_w);
   1546 
   1547       LL_total_m  = Dr_total.mL + Dw_total.mL + Ir_total.mL;
   1548       LL_total_mr = Dr_total.mL + Ir_total.mL;
   1549       LL_total_mw = Dw_total.mL;
   1550       VG_(umsg)(fmt, "LL misses:    ",
   1551                      LL_total_m, LL_total_mr, LL_total_mw);
   1552 
   1553       VG_(percentify)(LL_total_m,  (Ir_total.a + D_total.a),  1, l1+1, buf1);
   1554       VG_(percentify)(LL_total_mr, (Ir_total.a + Dr_total.a), 1, l2+1, buf2);
   1555       VG_(percentify)(LL_total_mw, Dw_total.a,                1, l3+1, buf3);
   1556       VG_(umsg)("LL miss rate:  %s (%s     + %s  )\n", buf1, buf2,buf3);
   1557    }
   1558 
   1559    /* If branch profiling is enabled, show branch overall results. */
   1560    if (clo_branch_sim) {
   1561       /* Make format string, getting width right for numbers */
   1562       VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu cond + %%,%dllu ind)\n",
   1563                         l1, l2, l3);
   1564 
   1565       if (0 == Bc_total.b)  Bc_total.b = 1;
   1566       if (0 == Bi_total.b)  Bi_total.b = 1;
   1567       B_total.b  = Bc_total.b  + Bi_total.b;
   1568       B_total.mp = Bc_total.mp + Bi_total.mp;
   1569 
   1570       VG_(umsg)("\n");
   1571       VG_(umsg)(fmt, "Branches:     ",
   1572                      B_total.b, Bc_total.b, Bi_total.b);
   1573 
   1574       VG_(umsg)(fmt, "Mispredicts:  ",
   1575                      B_total.mp, Bc_total.mp, Bi_total.mp);
   1576 
   1577       VG_(percentify)(B_total.mp,  B_total.b,  1, l1+1, buf1);
   1578       VG_(percentify)(Bc_total.mp, Bc_total.b, 1, l2+1, buf2);
   1579       VG_(percentify)(Bi_total.mp, Bi_total.b, 1, l3+1, buf3);
   1580 
   1581       VG_(umsg)("Mispred rate:  %s (%s     + %s   )\n", buf1, buf2,buf3);
   1582    }
   1583 
   1584    // Various stats
   1585    if (VG_(clo_stats)) {
   1586       Int debug_lookups = full_debugs      + fn_debugs +
   1587                           file_line_debugs + no_debugs;
   1588 
   1589       VG_(dmsg)("\n");
   1590       VG_(dmsg)("cachegrind: distinct files: %d\n", distinct_files);
   1591       VG_(dmsg)("cachegrind: distinct fns:   %d\n", distinct_fns);
   1592       VG_(dmsg)("cachegrind: distinct lines: %d\n", distinct_lines);
   1593       VG_(dmsg)("cachegrind: distinct instrs:%d\n", distinct_instrs);
   1594       VG_(dmsg)("cachegrind: debug lookups      : %d\n", debug_lookups);
   1595 
   1596       VG_(percentify)(full_debugs,      debug_lookups, 1, 6, buf1);
   1597       VG_(percentify)(file_line_debugs, debug_lookups, 1, 6, buf2);
   1598       VG_(percentify)(fn_debugs,        debug_lookups, 1, 6, buf3);
   1599       VG_(percentify)(no_debugs,        debug_lookups, 1, 6, buf4);
   1600       VG_(dmsg)("cachegrind: with full      info:%s (%d)\n",
   1601                 buf1, full_debugs);
   1602       VG_(dmsg)("cachegrind: with file/line info:%s (%d)\n",
   1603                 buf2, file_line_debugs);
   1604       VG_(dmsg)("cachegrind: with fn name   info:%s (%d)\n",
   1605                 buf3, fn_debugs);
   1606       VG_(dmsg)("cachegrind: with zero      info:%s (%d)\n",
   1607                 buf4, no_debugs);
   1608 
   1609       VG_(dmsg)("cachegrind: string table size: %lu\n",
   1610                 VG_(OSetGen_Size)(stringTable));
   1611       VG_(dmsg)("cachegrind: CC table size: %lu\n",
   1612                 VG_(OSetGen_Size)(CC_table));
   1613       VG_(dmsg)("cachegrind: InstrInfo table size: %lu\n",
   1614                 VG_(OSetGen_Size)(instrInfoTable));
   1615    }
   1616 }
   1617 
   1618 /*--------------------------------------------------------------------*/
   1619 /*--- Discarding BB info                                           ---*/
   1620 /*--------------------------------------------------------------------*/
   1621 
   1622 // Called when a translation is removed from the translation cache for
   1623 // any reason at all: to free up space, because the guest code was
   1624 // unmapped or modified, or for any arbitrary reason.
   1625 static
   1626 void cg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge )
   1627 {
   1628    SB_info* sbInfo;
   1629    Addr     orig_addr = (Addr)vge.base[0];
   1630 
   1631    tl_assert(vge.n_used > 0);
   1632 
   1633    if (DEBUG_CG)
   1634       VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
   1635                    (void*)(Addr)orig_addr,
   1636                    (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
   1637 
   1638    // Get BB info, remove from table, free BB info.  Simple!  Note that we
   1639    // use orig_addr, not the first instruction address in vge.
   1640    sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
   1641    tl_assert(NULL != sbInfo);
   1642    VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
   1643 }
   1644 
   1645 /*--------------------------------------------------------------------*/
   1646 /*--- Command line processing                                      ---*/
   1647 /*--------------------------------------------------------------------*/
   1648 
   1649 static Bool cg_process_cmd_line_option(Char* arg)
   1650 {
   1651    if (VG_(str_clo_cache_opt)(arg,
   1652                               &clo_I1_cache,
   1653                               &clo_D1_cache,
   1654                               &clo_LL_cache)) {}
   1655 
   1656    else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
   1657    else if VG_BOOL_CLO(arg, "--cache-sim",  clo_cache_sim)  {}
   1658    else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
   1659    else
   1660       return False;
   1661 
   1662    return True;
   1663 }
   1664 
   1665 static void cg_print_usage(void)
   1666 {
   1667    VG_(print_cache_clo_opts)();
   1668    VG_(printf)(
   1669 "    --cache-sim=yes|no  [yes]        collect cache stats?\n"
   1670 "    --branch-sim=yes|no [no]         collect branch prediction stats?\n"
   1671 "    --cachegrind-out-file=<file>     output file name [cachegrind.out.%%p]\n"
   1672    );
   1673 }
   1674 
   1675 static void cg_print_debug_usage(void)
   1676 {
   1677    VG_(printf)(
   1678 "    (none)\n"
   1679    );
   1680 }
   1681 
   1682 /*--------------------------------------------------------------------*/
   1683 /*--- Setup                                                        ---*/
   1684 /*--------------------------------------------------------------------*/
   1685 
   1686 static void cg_post_clo_init(void); /* just below */
   1687 
   1688 static void cg_pre_clo_init(void)
   1689 {
   1690    VG_(details_name)            ("Cachegrind");
   1691    VG_(details_version)         (NULL);
   1692    VG_(details_description)     ("a cache and branch-prediction profiler");
   1693    VG_(details_copyright_author)(
   1694       "Copyright (C) 2002-2012, and GNU GPL'd, by Nicholas Nethercote et al.");
   1695    VG_(details_bug_reports_to)  (VG_BUGS_TO);
   1696    VG_(details_avg_translation_sizeB) ( 500 );
   1697 
   1698    VG_(basic_tool_funcs)          (cg_post_clo_init,
   1699                                    cg_instrument,
   1700                                    cg_fini);
   1701 
   1702    VG_(needs_superblock_discards)(cg_discard_superblock_info);
   1703    VG_(needs_command_line_options)(cg_process_cmd_line_option,
   1704                                    cg_print_usage,
   1705                                    cg_print_debug_usage);
   1706 }
   1707 
   1708 static void cg_post_clo_init(void)
   1709 {
   1710    cache_t I1c, D1c, LLc;
   1711 
   1712    CC_table =
   1713       VG_(OSetGen_Create)(offsetof(LineCC, loc),
   1714                           cmp_CodeLoc_LineCC,
   1715                           VG_(malloc), "cg.main.cpci.1",
   1716                           VG_(free));
   1717    instrInfoTable =
   1718       VG_(OSetGen_Create)(/*keyOff*/0,
   1719                           NULL,
   1720                           VG_(malloc), "cg.main.cpci.2",
   1721                           VG_(free));
   1722    stringTable =
   1723       VG_(OSetGen_Create)(/*keyOff*/0,
   1724                           stringCmp,
   1725                           VG_(malloc), "cg.main.cpci.3",
   1726                           VG_(free));
   1727 
   1728    VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc,
   1729                                        &clo_I1_cache,
   1730                                        &clo_D1_cache,
   1731                                        &clo_LL_cache);
   1732 
   1733    // min_line_size is used to make sure that we never feed
   1734    // accesses to the simulator straddling more than two
   1735    // cache lines at any cache level
   1736    min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size;
   1737    min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size;
   1738 
   1739    Int largest_load_or_store_size
   1740       = VG_(machine_get_size_of_largest_guest_register)();
   1741    if (min_line_size < largest_load_or_store_size) {
   1742       /* We can't continue, because the cache simulation might
   1743          straddle more than 2 lines, and it will assert.  So let's
   1744          just stop before we start. */
   1745       VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n",
   1746                 (Int)min_line_size);
   1747       VG_(umsg)("  must be equal to or larger than the maximum register size (%d)\n",
   1748                 largest_load_or_store_size );
   1749       VG_(umsg)("  but it is not.  Exiting now.\n");
   1750       VG_(exit)(1);
   1751    }
   1752 
   1753    cachesim_I1_initcache(I1c);
   1754    cachesim_D1_initcache(D1c);
   1755    cachesim_LL_initcache(LLc);
   1756 }
   1757 
   1758 VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
   1759 
   1760 /*--------------------------------------------------------------------*/
   1761 /*--- end                                                          ---*/
   1762 /*--------------------------------------------------------------------*/
   1763 
   1764