Home | History | Annotate | Download | only in cachegrind
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Cachegrind: everything but the simulation itself.            ---*/
      4 /*---                                                    cg_main.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Cachegrind, a Valgrind tool for cache
      9    profiling programs.
     10 
     11    Copyright (C) 2002-2010 Nicholas Nethercote
     12       njn (at) valgrind.org
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #include "pub_tool_basics.h"
     33 #include "pub_tool_vki.h"
     34 #include "pub_tool_debuginfo.h"
     35 #include "pub_tool_libcbase.h"
     36 #include "pub_tool_libcassert.h"
     37 #include "pub_tool_libcfile.h"
     38 #include "pub_tool_libcprint.h"
     39 #include "pub_tool_libcproc.h"
     40 #include "pub_tool_machine.h"
     41 #include "pub_tool_mallocfree.h"
     42 #include "pub_tool_options.h"
     43 #include "pub_tool_oset.h"
     44 #include "pub_tool_tooliface.h"
     45 #include "pub_tool_xarray.h"
     46 #include "pub_tool_clientstate.h"
     47 #include "pub_tool_machine.h"      // VG_(fnptr_to_fnentry)
     48 
     49 #include "cg_arch.h"
     50 #include "cg_sim.c"
     51 #include "cg_branchpred.c"
     52 
     53 /*------------------------------------------------------------*/
     54 /*--- Constants                                            ---*/
     55 /*------------------------------------------------------------*/
     56 
     57 /* Set to 1 for very verbose debugging */
     58 #define DEBUG_CG 0
     59 
     60 #define MIN_LINE_SIZE         16
     61 #define FILE_LEN              VKI_PATH_MAX
     62 #define FN_LEN                256
     63 
     64 /*------------------------------------------------------------*/
     65 /*--- Options                                              ---*/
     66 /*------------------------------------------------------------*/
     67 
     68 static Bool  clo_cache_sim  = True;  /* do cache simulation? */
     69 static Bool  clo_branch_sim = False; /* do branch simulation? */
     70 static Char* clo_cachegrind_out_file = "cachegrind.out.%p";
     71 
     72 /*------------------------------------------------------------*/
     73 /*--- Types and Data Structures                            ---*/
     74 /*------------------------------------------------------------*/
     75 
     76 typedef
     77    struct {
     78       ULong a;  /* total # memory accesses of this kind */
     79       ULong m1; /* misses in the first level cache */
     80       ULong mL; /* misses in the second level cache */
     81    }
     82    CacheCC;
     83 
     84 typedef
     85    struct {
     86       ULong b;  /* total # branches of this kind */
     87       ULong mp; /* number of branches mispredicted */
     88    }
     89    BranchCC;
     90 
     91 //------------------------------------------------------------
     92 // Primary data structure #1: CC table
     93 // - Holds the per-source-line hit/miss stats, grouped by file/function/line.
     94 // - an ordered set of CCs.  CC indexing done by file/function/line (as
     95 //   determined from the instrAddr).
     96 // - Traversed for dumping stats at end in file/func/line hierarchy.
     97 
     98 typedef struct {
     99    Char* file;
    100    Char* fn;
    101    Int   line;
    102 }
    103 CodeLoc;
    104 
    105 typedef struct {
    106    CodeLoc  loc; /* Source location that these counts pertain to */
    107    CacheCC  Ir;  /* Insn read counts */
    108    CacheCC  Dr;  /* Data read counts */
    109    CacheCC  Dw;  /* Data write/modify counts */
    110    BranchCC Bc;  /* Conditional branch counts */
    111    BranchCC Bi;  /* Indirect branch counts */
    112 } LineCC;
    113 
    114 // First compare file, then fn, then line.
    115 static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
    116 {
    117    Word res;
    118    CodeLoc* a = (CodeLoc*)vloc;
    119    CodeLoc* b = &(((LineCC*)vcc)->loc);
    120 
    121    res = VG_(strcmp)(a->file, b->file);
    122    if (0 != res)
    123       return res;
    124 
    125    res = VG_(strcmp)(a->fn, b->fn);
    126    if (0 != res)
    127       return res;
    128 
    129    return a->line - b->line;
    130 }
    131 
    132 static OSet* CC_table;
    133 
    134 //------------------------------------------------------------
    135 // Primary data structure #2: InstrInfo table
    136 // - Holds the cached info about each instr that is used for simulation.
    137 // - table(SB_start_addr, list(InstrInfo))
    138 // - For each SB, each InstrInfo in the list holds info about the
    139 //   instruction (instrLen, instrAddr, etc), plus a pointer to its line
    140 //   CC.  This node is what's passed to the simulation function.
    141 // - When SBs are discarded the relevant list(instr_details) is freed.
    142 
    143 typedef struct _InstrInfo InstrInfo;
    144 struct _InstrInfo {
    145    Addr    instr_addr;
    146    UChar   instr_len;
    147    LineCC* parent;         // parent line-CC
    148 };
    149 
    150 typedef struct _SB_info SB_info;
    151 struct _SB_info {
    152    Addr      SB_addr;      // key;  MUST BE FIRST
    153    Int       n_instrs;
    154    InstrInfo instrs[0];
    155 };
    156 
    157 static OSet* instrInfoTable;
    158 
    159 //------------------------------------------------------------
    160 // Secondary data structure: string table
    161 // - holds strings, avoiding dups
    162 // - used for filenames and function names, each of which will be
    163 //   pointed to by one or more CCs.
    164 // - it also allows equality checks just by pointer comparison, which
    165 //   is good when printing the output file at the end.
    166 
    167 static OSet* stringTable;
    168 
    169 //------------------------------------------------------------
    170 // Stats
    171 static Int  distinct_files      = 0;
    172 static Int  distinct_fns        = 0;
    173 static Int  distinct_lines      = 0;
    174 static Int  distinct_instrs     = 0;
    175 
    176 static Int  full_debugs         = 0;
    177 static Int  file_line_debugs    = 0;
    178 static Int  fn_debugs           = 0;
    179 static Int  no_debugs           = 0;
    180 
    181 /*------------------------------------------------------------*/
    182 /*--- String table operations                              ---*/
    183 /*------------------------------------------------------------*/
    184 
    185 static Word stringCmp( const void* key, const void* elem )
    186 {
    187    return VG_(strcmp)(*(Char**)key, *(Char**)elem);
    188 }
    189 
    190 // Get a permanent string;  either pull it out of the string table if it's
    191 // been encountered before, or dup it and put it into the string table.
    192 static Char* get_perm_string(Char* s)
    193 {
    194    Char** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
    195    if (s_ptr) {
    196       return *s_ptr;
    197    } else {
    198       Char** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(Char*));
    199       *s_node = VG_(strdup)("cg.main.gps.1", s);
    200       VG_(OSetGen_Insert)(stringTable, s_node);
    201       return *s_node;
    202    }
    203 }
    204 
    205 /*------------------------------------------------------------*/
    206 /*--- CC table operations                                  ---*/
    207 /*------------------------------------------------------------*/
    208 
    209 static void get_debug_info(Addr instr_addr, Char file[FILE_LEN],
    210                            Char fn[FN_LEN], Int* line)
    211 {
    212    Char dir[FILE_LEN];
    213    Bool found_dirname;
    214    Bool found_file_line = VG_(get_filename_linenum)(
    215                              instr_addr,
    216                              file, FILE_LEN,
    217                              dir,  FILE_LEN, &found_dirname,
    218                              line
    219                           );
    220    Bool found_fn        = VG_(get_fnname)(instr_addr, fn, FN_LEN);
    221 
    222    if (!found_file_line) {
    223       VG_(strcpy)(file, "???");
    224       *line = 0;
    225    }
    226    if (!found_fn) {
    227       VG_(strcpy)(fn,  "???");
    228    }
    229 
    230    if (found_dirname) {
    231       // +1 for the '/'.
    232       tl_assert(VG_(strlen)(dir) + VG_(strlen)(file) + 1 < FILE_LEN);
    233       VG_(strcat)(dir, "/");     // Append '/'
    234       VG_(strcat)(dir, file);    // Append file to dir
    235       VG_(strcpy)(file, dir);    // Move dir+file to file
    236    }
    237 
    238    if (found_file_line) {
    239       if (found_fn) full_debugs++;
    240       else          file_line_debugs++;
    241    } else {
    242       if (found_fn) fn_debugs++;
    243       else          no_debugs++;
    244    }
    245 }
    246 
    247 // Do a three step traversal: by file, then fn, then line.
    248 // Returns a pointer to the line CC, creates a new one if necessary.
    249 static LineCC* get_lineCC(Addr origAddr)
    250 {
    251    Char    file[FILE_LEN], fn[FN_LEN];
    252    Int     line;
    253    CodeLoc loc;
    254    LineCC* lineCC;
    255 
    256    get_debug_info(origAddr, file, fn, &line);
    257 
    258    loc.file = file;
    259    loc.fn   = fn;
    260    loc.line = line;
    261 
    262    lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
    263    if (!lineCC) {
    264       // Allocate and zero a new node.
    265       lineCC           = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
    266       lineCC->loc.file = get_perm_string(loc.file);
    267       lineCC->loc.fn   = get_perm_string(loc.fn);
    268       lineCC->loc.line = loc.line;
    269       lineCC->Ir.a     = 0;
    270       lineCC->Ir.m1    = 0;
    271       lineCC->Ir.mL    = 0;
    272       lineCC->Dr.a     = 0;
    273       lineCC->Dr.m1    = 0;
    274       lineCC->Dr.mL    = 0;
    275       lineCC->Dw.a     = 0;
    276       lineCC->Dw.m1    = 0;
    277       lineCC->Dw.mL    = 0;
    278       lineCC->Bc.b     = 0;
    279       lineCC->Bc.mp    = 0;
    280       lineCC->Bi.b     = 0;
    281       lineCC->Bi.mp    = 0;
    282       VG_(OSetGen_Insert)(CC_table, lineCC);
    283    }
    284 
    285    return lineCC;
    286 }
    287 
    288 /*------------------------------------------------------------*/
    289 /*--- Cache simulation functions                           ---*/
    290 /*------------------------------------------------------------*/
    291 
    292 // Only used with --cache-sim=no.
    293 static VG_REGPARM(1)
    294 void log_1I(InstrInfo* n)
    295 {
    296    n->parent->Ir.a++;
    297 }
    298 
    299 // Only used with --cache-sim=no.
    300 static VG_REGPARM(2)
    301 void log_2I(InstrInfo* n, InstrInfo* n2)
    302 {
    303    n->parent->Ir.a++;
    304    n2->parent->Ir.a++;
    305 }
    306 
    307 // Only used with --cache-sim=no.
    308 static VG_REGPARM(3)
    309 void log_3I(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
    310 {
    311    n->parent->Ir.a++;
    312    n2->parent->Ir.a++;
    313    n3->parent->Ir.a++;
    314 }
    315 
    316 static VG_REGPARM(1)
    317 void log_1I_0D_cache_access(InstrInfo* n)
    318 {
    319    //VG_(printf)("1I_0D :  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n",
    320    //             n, n->instr_addr, n->instr_len);
    321    cachesim_I1_doref(n->instr_addr, n->instr_len,
    322                      &n->parent->Ir.m1, &n->parent->Ir.mL);
    323    n->parent->Ir.a++;
    324 }
    325 
    326 static VG_REGPARM(2)
    327 void log_2I_0D_cache_access(InstrInfo* n, InstrInfo* n2)
    328 {
    329    //VG_(printf)("2I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
    330    //            "        CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
    331    //            n,  n->instr_addr,  n->instr_len,
    332    //            n2, n2->instr_addr, n2->instr_len);
    333    cachesim_I1_doref(n->instr_addr, n->instr_len,
    334                      &n->parent->Ir.m1, &n->parent->Ir.mL);
    335    n->parent->Ir.a++;
    336    cachesim_I1_doref(n2->instr_addr, n2->instr_len,
    337                      &n2->parent->Ir.m1, &n2->parent->Ir.mL);
    338    n2->parent->Ir.a++;
    339 }
    340 
    341 static VG_REGPARM(3)
    342 void log_3I_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
    343 {
    344    //VG_(printf)("3I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
    345    //            "        CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
    346    //            "        CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
    347    //            n,  n->instr_addr,  n->instr_len,
    348    //            n2, n2->instr_addr, n2->instr_len,
    349    //            n3, n3->instr_addr, n3->instr_len);
    350    cachesim_I1_doref(n->instr_addr, n->instr_len,
    351                      &n->parent->Ir.m1, &n->parent->Ir.mL);
    352    n->parent->Ir.a++;
    353    cachesim_I1_doref(n2->instr_addr, n2->instr_len,
    354                      &n2->parent->Ir.m1, &n2->parent->Ir.mL);
    355    n2->parent->Ir.a++;
    356    cachesim_I1_doref(n3->instr_addr, n3->instr_len,
    357                      &n3->parent->Ir.m1, &n3->parent->Ir.mL);
    358    n3->parent->Ir.a++;
    359 }
    360 
    361 static VG_REGPARM(3)
    362 void log_1I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
    363 {
    364    //VG_(printf)("1I_1Dr:  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n"
    365    //            "                               daddr=0x%010lx,  dsize=%lu\n",
    366    //            n, n->instr_addr, n->instr_len, data_addr, data_size);
    367    cachesim_I1_doref(n->instr_addr, n->instr_len,
    368                      &n->parent->Ir.m1, &n->parent->Ir.mL);
    369    n->parent->Ir.a++;
    370 
    371    cachesim_D1_doref(data_addr, data_size,
    372                      &n->parent->Dr.m1, &n->parent->Dr.mL);
    373    n->parent->Dr.a++;
    374 }
    375 
    376 static VG_REGPARM(3)
    377 void log_1I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
    378 {
    379    //VG_(printf)("1I_1Dw:  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n"
    380    //            "                               daddr=0x%010lx,  dsize=%lu\n",
    381    //            n, n->instr_addr, n->instr_len, data_addr, data_size);
    382    cachesim_I1_doref(n->instr_addr, n->instr_len,
    383                      &n->parent->Ir.m1, &n->parent->Ir.mL);
    384    n->parent->Ir.a++;
    385 
    386    cachesim_D1_doref(data_addr, data_size,
    387                      &n->parent->Dw.m1, &n->parent->Dw.mL);
    388    n->parent->Dw.a++;
    389 }
    390 
    391 static VG_REGPARM(3)
    392 void log_0I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
    393 {
    394    //VG_(printf)("0I_1Dr:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
    395    //            n, data_addr, data_size);
    396    cachesim_D1_doref(data_addr, data_size,
    397                      &n->parent->Dr.m1, &n->parent->Dr.mL);
    398    n->parent->Dr.a++;
    399 }
    400 
    401 static VG_REGPARM(3)
    402 void log_0I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
    403 {
    404    //VG_(printf)("0I_1Dw:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
    405    //            n, data_addr, data_size);
    406    cachesim_D1_doref(data_addr, data_size,
    407                      &n->parent->Dw.m1, &n->parent->Dw.mL);
    408    n->parent->Dw.a++;
    409 }
    410 
    411 /* For branches, we consult two different predictors, one which
    412    predicts taken/untaken for conditional branches, and the other
    413    which predicts the branch target address for indirect branches
    414    (jump-to-register style ones). */
    415 
    416 static VG_REGPARM(2)
    417 void log_cond_branch(InstrInfo* n, Word taken)
    418 {
    419    //VG_(printf)("cbrnch:  CCaddr=0x%010lx,  taken=0x%010lx\n",
    420    //             n, taken);
    421    n->parent->Bc.b++;
    422    n->parent->Bc.mp
    423       += (1 & do_cond_branch_predict(n->instr_addr, taken));
    424 }
    425 
    426 static VG_REGPARM(2)
    427 void log_ind_branch(InstrInfo* n, UWord actual_dst)
    428 {
    429    //VG_(printf)("ibrnch:  CCaddr=0x%010lx,    dst=0x%010lx\n",
    430    //             n, actual_dst);
    431    n->parent->Bi.b++;
    432    n->parent->Bi.mp
    433       += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
    434 }
    435 
    436 
    437 /*------------------------------------------------------------*/
    438 /*--- Instrumentation types and structures                 ---*/
    439 /*------------------------------------------------------------*/
    440 
    441 /* Maintain an ordered list of memory events which are outstanding, in
    442    the sense that no IR has yet been generated to do the relevant
    443    helper calls.  The BB is scanned top to bottom and memory events
    444    are added to the end of the list, merging with the most recent
    445    notified event where possible (Dw immediately following Dr and
    446    having the same size and EA can be merged).
    447 
    448    This merging is done so that for architectures which have
    449    load-op-store instructions (x86, amd64), the insn is treated as if
    450    it makes just one memory reference (a modify), rather than two (a
    451    read followed by a write at the same address).
    452 
    453    At various points the list will need to be flushed, that is, IR
    454    generated from it.  That must happen before any possible exit from
    455    the block (the end, or an IRStmt_Exit).  Flushing also takes place
    456    when there is no space to add a new event.
    457 
    458    If we require the simulation statistics to be up to date with
    459    respect to possible memory exceptions, then the list would have to
    460    be flushed before each memory reference.  That would however lose
    461    performance by inhibiting event-merging during flushing.
    462 
    463    Flushing the list consists of walking it start to end and emitting
    464    instrumentation IR for each event, in the order in which they
    465    appear.  It may be possible to emit a single call for two adjacent
    466    events in order to reduce the number of helper function calls made.
    467    For example, it could well be profitable to handle two adjacent Ir
    468    events with a single helper call.  */
    469 
    470 typedef
    471    IRExpr
    472    IRAtom;
    473 
    474 typedef
    475    enum {
    476       Ev_Ir,  // Instruction read
    477       Ev_Dr,  // Data read
    478       Ev_Dw,  // Data write
    479       Ev_Dm,  // Data modify (read then write)
    480       Ev_Bc,  // branch conditional
    481       Ev_Bi   // branch indirect (to unknown destination)
    482    }
    483    EventTag;
    484 
    485 typedef
    486    struct {
    487       EventTag   tag;
    488       InstrInfo* inode;
    489       union {
    490          struct {
    491          } Ir;
    492          struct {
    493             IRAtom* ea;
    494             Int     szB;
    495          } Dr;
    496          struct {
    497             IRAtom* ea;
    498             Int     szB;
    499          } Dw;
    500          struct {
    501             IRAtom* ea;
    502             Int     szB;
    503          } Dm;
    504          struct {
    505             IRAtom* taken; /* :: Ity_I1 */
    506          } Bc;
    507          struct {
    508             IRAtom* dst;
    509          } Bi;
    510       } Ev;
    511    }
    512    Event;
    513 
    514 static void init_Event ( Event* ev ) {
    515    VG_(memset)(ev, 0, sizeof(Event));
    516 }
    517 
    518 static IRAtom* get_Event_dea ( Event* ev ) {
    519    switch (ev->tag) {
    520       case Ev_Dr: return ev->Ev.Dr.ea;
    521       case Ev_Dw: return ev->Ev.Dw.ea;
    522       case Ev_Dm: return ev->Ev.Dm.ea;
    523       default:    tl_assert(0);
    524    }
    525 }
    526 
    527 static Int get_Event_dszB ( Event* ev ) {
    528    switch (ev->tag) {
    529       case Ev_Dr: return ev->Ev.Dr.szB;
    530       case Ev_Dw: return ev->Ev.Dw.szB;
    531       case Ev_Dm: return ev->Ev.Dm.szB;
    532       default:    tl_assert(0);
    533    }
    534 }
    535 
    536 
    537 /* Up to this many unnotified events are allowed.  Number is
    538    arbitrary.  Larger numbers allow more event merging to occur, but
    539    potentially induce more spilling due to extending live ranges of
    540    address temporaries. */
    541 #define N_EVENTS 16
    542 
    543 
    544 /* A struct which holds all the running state during instrumentation.
    545    Mostly to avoid passing loads of parameters everywhere. */
    546 typedef
    547    struct {
    548       /* The current outstanding-memory-event list. */
    549       Event events[N_EVENTS];
    550       Int   events_used;
    551 
    552       /* The array of InstrInfo bins for the BB. */
    553       SB_info* sbInfo;
    554 
    555       /* Number InstrInfo bins 'used' so far. */
    556       Int sbInfo_i;
    557 
    558       /* The output SB being constructed. */
    559       IRSB* sbOut;
    560    }
    561    CgState;
    562 
    563 
    564 /*------------------------------------------------------------*/
    565 /*--- Instrumentation main                                 ---*/
    566 /*------------------------------------------------------------*/
    567 
    568 // Note that origAddr is the real origAddr, not the address of the first
    569 // instruction in the block (they can be different due to redirection).
    570 static
    571 SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
    572 {
    573    Int      i, n_instrs;
    574    IRStmt*  st;
    575    SB_info* sbInfo;
    576 
    577    // Count number of original instrs in SB
    578    n_instrs = 0;
    579    for (i = 0; i < sbIn->stmts_used; i++) {
    580       st = sbIn->stmts[i];
    581       if (Ist_IMark == st->tag) n_instrs++;
    582    }
    583 
    584    // Check that we don't have an entry for this BB in the instr-info table.
    585    // If this assertion fails, there has been some screwup:  some
    586    // translations must have been discarded but Cachegrind hasn't discarded
    587    // the corresponding entries in the instr-info table.
    588    sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
    589    tl_assert(NULL == sbInfo);
    590 
    591    // BB never translated before (at this address, at least;  could have
    592    // been unloaded and then reloaded elsewhere in memory)
    593    sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
    594                                 sizeof(SB_info) + n_instrs*sizeof(InstrInfo));
    595    sbInfo->SB_addr  = origAddr;
    596    sbInfo->n_instrs = n_instrs;
    597    VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
    598    distinct_instrs++;
    599 
    600    return sbInfo;
    601 }
    602 
    603 
    604 static void showEvent ( Event* ev )
    605 {
    606    switch (ev->tag) {
    607       case Ev_Ir:
    608          VG_(printf)("Ir %p\n", ev->inode);
    609          break;
    610       case Ev_Dr:
    611          VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
    612          ppIRExpr(ev->Ev.Dr.ea);
    613          VG_(printf)("\n");
    614          break;
    615       case Ev_Dw:
    616          VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
    617          ppIRExpr(ev->Ev.Dw.ea);
    618          VG_(printf)("\n");
    619          break;
    620       case Ev_Dm:
    621          VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
    622          ppIRExpr(ev->Ev.Dm.ea);
    623          VG_(printf)("\n");
    624          break;
    625       case Ev_Bc:
    626          VG_(printf)("Bc %p   GA=", ev->inode);
    627          ppIRExpr(ev->Ev.Bc.taken);
    628          VG_(printf)("\n");
    629          break;
    630       case Ev_Bi:
    631          VG_(printf)("Bi %p  DST=", ev->inode);
    632          ppIRExpr(ev->Ev.Bi.dst);
    633          VG_(printf)("\n");
    634          break;
    635       default:
    636          tl_assert(0);
    637          break;
    638    }
    639 }
    640 
    641 // Reserve and initialise an InstrInfo for the first mention of a new insn.
    642 static
    643 InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
    644 {
    645    InstrInfo* i_node;
    646    tl_assert(cgs->sbInfo_i >= 0);
    647    tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
    648    i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
    649    i_node->instr_addr = instr_addr;
    650    i_node->instr_len  = instr_len;
    651    i_node->parent     = get_lineCC(instr_addr);
    652    cgs->sbInfo_i++;
    653    return i_node;
    654 }
    655 
    656 
    657 /* Generate code for all outstanding memory events, and mark the queue
    658    empty.  Code is generated into cgs->bbOut, and this activity
    659    'consumes' slots in cgs->sbInfo. */
    660 
    661 static void flushEvents ( CgState* cgs )
    662 {
    663    Int        i, regparms;
    664    Char*      helperName;
    665    void*      helperAddr;
    666    IRExpr**   argv;
    667    IRExpr*    i_node_expr;
    668    IRDirty*   di;
    669    Event*     ev;
    670    Event*     ev2;
    671    Event*     ev3;
    672 
    673    i = 0;
    674    while (i < cgs->events_used) {
    675 
    676       helperName = NULL;
    677       helperAddr = NULL;
    678       argv       = NULL;
    679       regparms   = 0;
    680 
    681       /* generate IR to notify event i and possibly the ones
    682          immediately following it. */
    683       tl_assert(i >= 0 && i < cgs->events_used);
    684 
    685       ev  = &cgs->events[i];
    686       ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
    687       ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
    688 
    689       if (DEBUG_CG) {
    690          VG_(printf)("   flush ");
    691          showEvent( ev );
    692       }
    693 
    694       i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
    695 
    696       /* Decide on helper fn to call and args to pass it, and advance
    697          i appropriately. */
    698       switch (ev->tag) {
    699          case Ev_Ir:
    700             /* Merge an Ir with a following Dr/Dm. */
    701             if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
    702                /* Why is this true?  It's because we're merging an Ir
    703                   with a following Dr or Dm.  The Ir derives from the
    704                   instruction's IMark and the Dr/Dm from data
    705                   references which follow it.  In short it holds
    706                   because each insn starts with an IMark, hence an
    707                   Ev_Ir, and so these Dr/Dm must pertain to the
    708                   immediately preceding Ir.  Same applies to analogous
    709                   assertions in the subsequent cases. */
    710                tl_assert(ev2->inode == ev->inode);
    711                helperName = "log_1I_1Dr_cache_access";
    712                helperAddr = &log_1I_1Dr_cache_access;
    713                argv = mkIRExprVec_3( i_node_expr,
    714                                      get_Event_dea(ev2),
    715                                      mkIRExpr_HWord( get_Event_dszB(ev2) ) );
    716                regparms = 3;
    717                i += 2;
    718             }
    719             /* Merge an Ir with a following Dw. */
    720             else
    721             if (ev2 && ev2->tag == Ev_Dw) {
    722                tl_assert(ev2->inode == ev->inode);
    723                helperName = "log_1I_1Dw_cache_access";
    724                helperAddr = &log_1I_1Dw_cache_access;
    725                argv = mkIRExprVec_3( i_node_expr,
    726                                      get_Event_dea(ev2),
    727                                      mkIRExpr_HWord( get_Event_dszB(ev2) ) );
    728                regparms = 3;
    729                i += 2;
    730             }
    731             /* Merge an Ir with two following Irs. */
    732             else
    733             if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir)
    734             {
    735                if (clo_cache_sim) {
    736                   helperName = "log_3I_0D_cache_access";
    737                   helperAddr = &log_3I_0D_cache_access;
    738                } else {
    739                   helperName = "log_3I";
    740                   helperAddr = &log_3I;
    741                }
    742                argv = mkIRExprVec_3( i_node_expr,
    743                                      mkIRExpr_HWord( (HWord)ev2->inode ),
    744                                      mkIRExpr_HWord( (HWord)ev3->inode ) );
    745                regparms = 3;
    746                i += 3;
    747             }
    748             /* Merge an Ir with one following Ir. */
    749             else
    750             if (ev2 && ev2->tag == Ev_Ir) {
    751                if (clo_cache_sim) {
    752                   helperName = "log_2I_0D_cache_access";
    753                   helperAddr = &log_2I_0D_cache_access;
    754                } else {
    755                   helperName = "log_2I";
    756                   helperAddr = &log_2I;
    757                }
    758                argv = mkIRExprVec_2( i_node_expr,
    759                                      mkIRExpr_HWord( (HWord)ev2->inode ) );
    760                regparms = 2;
    761                i += 2;
    762             }
    763             /* No merging possible; emit as-is. */
    764             else {
    765                if (clo_cache_sim) {
    766                   helperName = "log_1I_0D_cache_access";
    767                   helperAddr = &log_1I_0D_cache_access;
    768                } else {
    769                   helperName = "log_1I";
    770                   helperAddr = &log_1I;
    771                }
    772                argv = mkIRExprVec_1( i_node_expr );
    773                regparms = 1;
    774                i++;
    775             }
    776             break;
    777          case Ev_Dr:
    778          case Ev_Dm:
    779             /* Data read or modify */
    780             helperName = "log_0I_1Dr_cache_access";
    781             helperAddr = &log_0I_1Dr_cache_access;
    782             argv = mkIRExprVec_3( i_node_expr,
    783                                   get_Event_dea(ev),
    784                                   mkIRExpr_HWord( get_Event_dszB(ev) ) );
    785             regparms = 3;
    786             i++;
    787             break;
    788          case Ev_Dw:
    789             /* Data write */
    790             helperName = "log_0I_1Dw_cache_access";
    791             helperAddr = &log_0I_1Dw_cache_access;
    792             argv = mkIRExprVec_3( i_node_expr,
    793                                   get_Event_dea(ev),
    794                                   mkIRExpr_HWord( get_Event_dszB(ev) ) );
    795             regparms = 3;
    796             i++;
    797             break;
    798          case Ev_Bc:
    799             /* Conditional branch */
    800             helperName = "log_cond_branch";
    801             helperAddr = &log_cond_branch;
    802             argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
    803             regparms = 2;
    804             i++;
    805             break;
    806          case Ev_Bi:
    807             /* Branch to an unknown destination */
    808             helperName = "log_ind_branch";
    809             helperAddr = &log_ind_branch;
    810             argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
    811             regparms = 2;
    812             i++;
    813             break;
    814          default:
    815             tl_assert(0);
    816       }
    817 
    818       /* Add the helper. */
    819       tl_assert(helperName);
    820       tl_assert(helperAddr);
    821       tl_assert(argv);
    822       di = unsafeIRDirty_0_N( regparms,
    823                               helperName, VG_(fnptr_to_fnentry)( helperAddr ),
    824                               argv );
    825       addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
    826    }
    827 
    828    cgs->events_used = 0;
    829 }
    830 
    831 static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
    832 {
    833    Event* evt;
    834    if (cgs->events_used == N_EVENTS)
    835       flushEvents(cgs);
    836    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
    837    evt = &cgs->events[cgs->events_used];
    838    init_Event(evt);
    839    evt->tag      = Ev_Ir;
    840    evt->inode    = inode;
    841    cgs->events_used++;
    842 }
    843 
    844 static
    845 void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
    846 {
    847    Event* evt;
    848    tl_assert(isIRAtom(ea));
    849    tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
    850    if (!clo_cache_sim)
    851       return;
    852    if (cgs->events_used == N_EVENTS)
    853       flushEvents(cgs);
    854    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
    855    evt = &cgs->events[cgs->events_used];
    856    init_Event(evt);
    857    evt->tag       = Ev_Dr;
    858    evt->inode     = inode;
    859    evt->Ev.Dr.szB = datasize;
    860    evt->Ev.Dr.ea  = ea;
    861    cgs->events_used++;
    862 }
    863 
    864 static
    865 void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
    866 {
    867    Event* lastEvt;
    868    Event* evt;
    869 
    870    tl_assert(isIRAtom(ea));
    871    tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
    872 
    873    if (!clo_cache_sim)
    874       return;
    875 
    876    /* Is it possible to merge this write with the preceding read? */
    877    lastEvt = &cgs->events[cgs->events_used-1];
    878    if (cgs->events_used > 0
    879     && lastEvt->tag       == Ev_Dr
    880     && lastEvt->Ev.Dr.szB == datasize
    881     && lastEvt->inode     == inode
    882     && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
    883    {
    884       lastEvt->tag   = Ev_Dm;
    885       return;
    886    }
    887 
    888    /* No.  Add as normal. */
    889    if (cgs->events_used == N_EVENTS)
    890       flushEvents(cgs);
    891    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
    892    evt = &cgs->events[cgs->events_used];
    893    init_Event(evt);
    894    evt->tag       = Ev_Dw;
    895    evt->inode     = inode;
    896    evt->Ev.Dw.szB = datasize;
    897    evt->Ev.Dw.ea  = ea;
    898    cgs->events_used++;
    899 }
    900 
    901 static
    902 void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
    903 {
    904    Event* evt;
    905    tl_assert(isIRAtom(guard));
    906    tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard)
    907              == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
    908    if (!clo_branch_sim)
    909       return;
    910    if (cgs->events_used == N_EVENTS)
    911       flushEvents(cgs);
    912    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
    913    evt = &cgs->events[cgs->events_used];
    914    init_Event(evt);
    915    evt->tag         = Ev_Bc;
    916    evt->inode       = inode;
    917    evt->Ev.Bc.taken = guard;
    918    cgs->events_used++;
    919 }
    920 
    921 static
    922 void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
    923 {
    924    Event* evt;
    925    tl_assert(isIRAtom(whereTo));
    926    tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo)
    927              == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
    928    if (!clo_branch_sim)
    929       return;
    930    if (cgs->events_used == N_EVENTS)
    931       flushEvents(cgs);
    932    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
    933    evt = &cgs->events[cgs->events_used];
    934    init_Event(evt);
    935    evt->tag       = Ev_Bi;
    936    evt->inode     = inode;
    937    evt->Ev.Bi.dst = whereTo;
    938    cgs->events_used++;
    939 }
    940 
    941 ////////////////////////////////////////////////////////////
    942 
    943 
    944 static
    945 IRSB* cg_instrument ( VgCallbackClosure* closure,
    946                       IRSB* sbIn,
    947                       VexGuestLayout* layout,
    948                       VexGuestExtents* vge,
    949                       IRType gWordTy, IRType hWordTy )
    950 {
    951    Int        i, isize;
    952    IRStmt*    st;
    953    Addr64     cia; /* address of current insn */
    954    CgState    cgs;
    955    IRTypeEnv* tyenv = sbIn->tyenv;
    956    InstrInfo* curr_inode = NULL;
    957 
    958    if (gWordTy != hWordTy) {
    959       /* We don't currently support this case. */
    960       VG_(tool_panic)("host/guest word size mismatch");
    961    }
    962 
    963    // Set up new SB
    964    cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
    965 
    966    // Copy verbatim any IR preamble preceding the first IMark
    967    i = 0;
    968    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
    969       addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
    970       i++;
    971    }
    972 
    973    // Get the first statement, and initial cia from it
    974    tl_assert(sbIn->stmts_used > 0);
    975    tl_assert(i < sbIn->stmts_used);
    976    st = sbIn->stmts[i];
    977    tl_assert(Ist_IMark == st->tag);
    978 
    979    cia   = st->Ist.IMark.addr;
    980    isize = st->Ist.IMark.len;
    981    // If Vex fails to decode an instruction, the size will be zero.
    982    // Pretend otherwise.
    983    if (isize == 0) isize = VG_MIN_INSTR_SZB;
    984 
    985    // Set up running state and get block info
    986    tl_assert(closure->readdr == vge->base[0]);
    987    cgs.events_used = 0;
    988    cgs.sbInfo      = get_SB_info(sbIn, (Addr)closure->readdr);
    989    cgs.sbInfo_i    = 0;
    990 
    991    if (DEBUG_CG)
    992       VG_(printf)("\n\n---------- cg_instrument ----------\n");
    993 
    994    // Traverse the block, initialising inodes, adding events and flushing as
    995    // necessary.
    996    for (/*use current i*/; i < sbIn->stmts_used; i++) {
    997 
    998       st = sbIn->stmts[i];
    999       tl_assert(isFlatIRStmt(st));
   1000 
   1001       switch (st->tag) {
   1002          case Ist_NoOp:
   1003          case Ist_AbiHint:
   1004          case Ist_Put:
   1005          case Ist_PutI:
   1006          case Ist_MBE:
   1007             break;
   1008 
   1009          case Ist_IMark:
   1010             cia   = st->Ist.IMark.addr;
   1011             isize = st->Ist.IMark.len;
   1012 
   1013             // If Vex fails to decode an instruction, the size will be zero.
   1014             // Pretend otherwise.
   1015             if (isize == 0) isize = VG_MIN_INSTR_SZB;
   1016 
   1017             // Sanity-check size.
   1018             tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
   1019                      || VG_CLREQ_SZB == isize );
   1020 
   1021             // Get space for and init the inode, record it as the current one.
   1022             // Subsequent Dr/Dw/Dm events from the same instruction will
   1023             // also use it.
   1024             curr_inode = setup_InstrInfo(&cgs, cia, isize);
   1025 
   1026             addEvent_Ir( &cgs, curr_inode );
   1027             break;
   1028 
   1029          case Ist_WrTmp: {
   1030             IRExpr* data = st->Ist.WrTmp.data;
   1031             if (data->tag == Iex_Load) {
   1032                IRExpr* aexpr = data->Iex.Load.addr;
   1033                // Note also, endianness info is ignored.  I guess
   1034                // that's not interesting.
   1035                addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty),
   1036                                   aexpr );
   1037             }
   1038             break;
   1039          }
   1040 
   1041          case Ist_Store: {
   1042             IRExpr* data  = st->Ist.Store.data;
   1043             IRExpr* aexpr = st->Ist.Store.addr;
   1044             addEvent_Dw( &cgs, curr_inode,
   1045                          sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
   1046             break;
   1047          }
   1048 
   1049          case Ist_Dirty: {
   1050             Int      dataSize;
   1051             IRDirty* d = st->Ist.Dirty.details;
   1052             if (d->mFx != Ifx_None) {
   1053                /* This dirty helper accesses memory.  Collect the details. */
   1054                tl_assert(d->mAddr != NULL);
   1055                tl_assert(d->mSize != 0);
   1056                dataSize = d->mSize;
   1057                // Large (eg. 28B, 108B, 512B on x86) data-sized
   1058                // instructions will be done inaccurately, but they're
   1059                // very rare and this avoids errors from hitting more
   1060                // than two cache lines in the simulation.
   1061                if (dataSize > MIN_LINE_SIZE)
   1062                   dataSize = MIN_LINE_SIZE;
   1063                if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
   1064                   addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
   1065                if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
   1066                   addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
   1067             } else {
   1068                tl_assert(d->mAddr == NULL);
   1069                tl_assert(d->mSize == 0);
   1070             }
   1071             break;
   1072          }
   1073 
   1074          case Ist_CAS: {
   1075             /* We treat it as a read and a write of the location.  I
   1076                think that is the same behaviour as it was before IRCAS
   1077                was introduced, since prior to that point, the Vex
   1078                front ends would translate a lock-prefixed instruction
   1079                into a (normal) read followed by a (normal) write. */
   1080             Int    dataSize;
   1081             IRCAS* cas = st->Ist.CAS.details;
   1082             tl_assert(cas->addr != NULL);
   1083             tl_assert(cas->dataLo != NULL);
   1084             dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
   1085             if (cas->dataHi != NULL)
   1086                dataSize *= 2; /* since it's a doubleword-CAS */
   1087             /* I don't think this can ever happen, but play safe. */
   1088             if (dataSize > MIN_LINE_SIZE)
   1089                dataSize = MIN_LINE_SIZE;
   1090             addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
   1091             addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
   1092             break;
   1093          }
   1094 
   1095          case Ist_LLSC: {
   1096             IRType dataTy;
   1097             if (st->Ist.LLSC.storedata == NULL) {
   1098                /* LL */
   1099                dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
   1100                addEvent_Dr( &cgs, curr_inode,
   1101                             sizeofIRType(dataTy), st->Ist.LLSC.addr );
   1102             } else {
   1103                /* SC */
   1104                dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
   1105                addEvent_Dw( &cgs, curr_inode,
   1106                             sizeofIRType(dataTy), st->Ist.LLSC.addr );
   1107             }
   1108             break;
   1109          }
   1110 
   1111          case Ist_Exit: {
   1112             // call branch predictor only if this is a branch in guest code
   1113             if ( (st->Ist.Exit.jk == Ijk_Boring) ||
   1114                  (st->Ist.Exit.jk == Ijk_Call) ||
   1115                  (st->Ist.Exit.jk == Ijk_Ret) )
   1116             {
   1117                /* Stuff to widen the guard expression to a host word, so
   1118                   we can pass it to the branch predictor simulation
   1119                   functions easily. */
   1120                Bool     inverted;
   1121                Addr64   nia, sea;
   1122                IRConst* dst;
   1123                IRType   tyW    = hWordTy;
   1124                IROp     widen  = tyW==Ity_I32  ? Iop_1Uto32  : Iop_1Uto64;
   1125                IROp     opXOR  = tyW==Ity_I32  ? Iop_Xor32   : Iop_Xor64;
   1126                IRTemp   guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
   1127                IRTemp   guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
   1128                IRTemp   guard  = newIRTemp(cgs.sbOut->tyenv, tyW);
   1129                IRExpr*  one    = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
   1130                                               : IRExpr_Const(IRConst_U64(1));
   1131 
   1132                /* First we need to figure out whether the side exit got
   1133                   inverted by the ir optimiser.  To do that, figure out
   1134                   the next (fallthrough) instruction's address and the
   1135                   side exit address and see if they are the same. */
   1136                nia = cia + (Addr64)isize;
   1137                if (tyW == Ity_I32)
   1138                   nia &= 0xFFFFFFFFULL;
   1139 
   1140                /* Side exit address */
   1141                dst = st->Ist.Exit.dst;
   1142                if (tyW == Ity_I32) {
   1143                   tl_assert(dst->tag == Ico_U32);
   1144                   sea = (Addr64)(UInt)dst->Ico.U32;
   1145                } else {
   1146                   tl_assert(tyW == Ity_I64);
   1147                   tl_assert(dst->tag == Ico_U64);
   1148                   sea = dst->Ico.U64;
   1149                }
   1150 
   1151                inverted = nia == sea;
   1152 
   1153                /* Widen the guard expression. */
   1154                addStmtToIRSB( cgs.sbOut,
   1155                               IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
   1156                addStmtToIRSB( cgs.sbOut,
   1157                               IRStmt_WrTmp( guardW,
   1158                                             IRExpr_Unop(widen,
   1159                                                         IRExpr_RdTmp(guard1))) );
   1160                /* If the exit is inverted, invert the sense of the guard. */
   1161                addStmtToIRSB(
   1162                      cgs.sbOut,
   1163                      IRStmt_WrTmp(
   1164                            guard,
   1165                            inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
   1166                                     : IRExpr_RdTmp(guardW)
   1167                               ));
   1168                /* And post the event. */
   1169                addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
   1170             }
   1171 
   1172             /* We may never reach the next statement, so need to flush
   1173                all outstanding transactions now. */
   1174             flushEvents( &cgs );
   1175             break;
   1176          }
   1177 
   1178          default:
   1179             tl_assert(0);
   1180             break;
   1181       }
   1182 
   1183       /* Copy the original statement */
   1184       addStmtToIRSB( cgs.sbOut, st );
   1185 
   1186       if (DEBUG_CG) {
   1187          ppIRStmt(st);
   1188          VG_(printf)("\n");
   1189       }
   1190    }
   1191 
   1192    /* Deal with branches to unknown destinations.  Except ignore ones
   1193       which are function returns as we assume the return stack
   1194       predictor never mispredicts. */
   1195    if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
   1196       if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
   1197       switch (sbIn->next->tag) {
   1198          case Iex_Const:
   1199             break; /* boring - branch to known address */
   1200          case Iex_RdTmp:
   1201             /* looks like an indirect branch (branch to unknown) */
   1202             addEvent_Bi( &cgs, curr_inode, sbIn->next );
   1203             break;
   1204          default:
   1205             /* shouldn't happen - if the incoming IR is properly
   1206                flattened, should only have tmp and const cases to
   1207                consider. */
   1208             tl_assert(0);
   1209       }
   1210    }
   1211 
   1212    /* At the end of the bb.  Flush outstandings. */
   1213    flushEvents( &cgs );
   1214 
   1215    /* done.  stay sane ... */
   1216    tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
   1217 
   1218    if (DEBUG_CG) {
   1219       VG_(printf)( "goto {");
   1220       ppIRJumpKind(sbIn->jumpkind);
   1221       VG_(printf)( "} ");
   1222       ppIRExpr( sbIn->next );
   1223       VG_(printf)( "}\n");
   1224    }
   1225 
   1226    return cgs.sbOut;
   1227 }
   1228 
   1229 /*------------------------------------------------------------*/
   1230 /*--- Cache configuration                                  ---*/
   1231 /*------------------------------------------------------------*/
   1232 
   1233 #define UNDEFINED_CACHE     { -1, -1, -1 }
   1234 
   1235 static cache_t clo_I1_cache = UNDEFINED_CACHE;
   1236 static cache_t clo_D1_cache = UNDEFINED_CACHE;
   1237 static cache_t clo_LL_cache = UNDEFINED_CACHE;
   1238 
   1239 // Checks cache config is ok.  Returns NULL if ok, or a pointer to an error
   1240 // string otherwise.
   1241 static Char* check_cache(cache_t* cache)
   1242 {
   1243    // Simulator requires set count to be a power of two.
   1244    if ((cache->size % (cache->line_size * cache->assoc) != 0) ||
   1245        (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc)))
   1246    {
   1247       return "Cache set count is not a power of two.\n";
   1248    }
   1249 
   1250    // Simulator requires line size to be a power of two.
   1251    if (-1 == VG_(log2)(cache->line_size)) {
   1252       return "Cache line size is not a power of two.\n";
   1253    }
   1254 
   1255    // Then check line size >= 16 -- any smaller and a single instruction could
   1256    // straddle three cache lines, which breaks a simulation assertion and is
   1257    // stupid anyway.
   1258    if (cache->line_size < MIN_LINE_SIZE) {
   1259       return "Cache line size is too small.\n";
   1260    }
   1261 
   1262    /* Then check cache size > line size (causes seg faults if not). */
   1263    if (cache->size <= cache->line_size) {
   1264       return "Cache size <= line size.\n";
   1265    }
   1266 
   1267    /* Then check assoc <= (size / line size) (seg faults otherwise). */
   1268    if (cache->assoc > (cache->size / cache->line_size)) {
   1269       return "Cache associativity > (size / line size).\n";
   1270    }
   1271 
   1272    return NULL;
   1273 }
   1274 
   1275 static
   1276 void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* LLc)
   1277 {
   1278 #define DEFINED(L)   (-1 != L.size  || -1 != L.assoc || -1 != L.line_size)
   1279 
   1280    Char* checkRes;
   1281 
   1282    // Count how many were defined on the command line.
   1283    Bool all_caches_clo_defined =
   1284       (DEFINED(clo_I1_cache) &&
   1285        DEFINED(clo_D1_cache) &&
   1286        DEFINED(clo_LL_cache));
   1287 
   1288    // Set the cache config (using auto-detection, if supported by the
   1289    // architecture).
   1290    VG_(configure_caches)( I1c, D1c, LLc, all_caches_clo_defined );
   1291 
   1292    // Check the default/auto-detected values.
   1293    checkRes = check_cache(I1c);  tl_assert(!checkRes);
   1294    checkRes = check_cache(D1c);  tl_assert(!checkRes);
   1295    checkRes = check_cache(LLc);  tl_assert(!checkRes);
   1296 
   1297    // Then replace with any defined on the command line.  (Already checked in
   1298    // parse_cache_opt().)
   1299    if (DEFINED(clo_I1_cache)) { *I1c = clo_I1_cache; }
   1300    if (DEFINED(clo_D1_cache)) { *D1c = clo_D1_cache; }
   1301    if (DEFINED(clo_LL_cache)) { *LLc = clo_LL_cache; }
   1302 
   1303    if (VG_(clo_verbosity) >= 2) {
   1304       VG_(umsg)("Cache configuration used:\n");
   1305       VG_(umsg)("  I1: %dB, %d-way, %dB lines\n",
   1306                 I1c->size, I1c->assoc, I1c->line_size);
   1307       VG_(umsg)("  D1: %dB, %d-way, %dB lines\n",
   1308                 D1c->size, D1c->assoc, D1c->line_size);
   1309       VG_(umsg)("  LL: %dB, %d-way, %dB lines\n",
   1310                 LLc->size, LLc->assoc, LLc->line_size);
   1311    }
   1312 #undef CMD_LINE_DEFINED
   1313 }
   1314 
   1315 /*------------------------------------------------------------*/
   1316 /*--- cg_fini() and related function                       ---*/
   1317 /*------------------------------------------------------------*/
   1318 
   1319 // Total reads/writes/misses.  Calculated during CC traversal at the end.
   1320 // All auto-zeroed.
   1321 static CacheCC  Ir_total;
   1322 static CacheCC  Dr_total;
   1323 static CacheCC  Dw_total;
   1324 static BranchCC Bc_total;
   1325 static BranchCC Bi_total;
   1326 
   1327 static void fprint_CC_table_and_calc_totals(void)
   1328 {
   1329    Int     i, fd;
   1330    SysRes  sres;
   1331    Char    buf[512], *currFile = NULL, *currFn = NULL;
   1332    LineCC* lineCC;
   1333 
   1334    // Setup output filename.  Nb: it's important to do this now, ie. as late
   1335    // as possible.  If we do it at start-up and the program forks and the
   1336    // output file format string contains a %p (pid) specifier, both the
   1337    // parent and child will incorrectly write to the same file;  this
   1338    // happened in 3.3.0.
   1339    Char* cachegrind_out_file =
   1340       VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
   1341 
   1342    sres = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
   1343                                          VKI_S_IRUSR|VKI_S_IWUSR);
   1344    if (sr_isError(sres)) {
   1345       // If the file can't be opened for whatever reason (conflict
   1346       // between multiple cachegrinded processes?), give up now.
   1347       VG_(umsg)("error: can't open cache simulation output file '%s'\n",
   1348                 cachegrind_out_file );
   1349       VG_(umsg)("       ... so simulation results will be missing.\n");
   1350       VG_(free)(cachegrind_out_file);
   1351       return;
   1352    } else {
   1353       fd = sr_Res(sres);
   1354       VG_(free)(cachegrind_out_file);
   1355    }
   1356 
   1357    // "desc:" lines (giving I1/D1/LL cache configuration).  The spaces after
   1358    // the 2nd colon makes cg_annotate's output look nicer.
   1359    VG_(sprintf)(buf, "desc: I1 cache:         %s\n"
   1360                      "desc: D1 cache:         %s\n"
   1361                      "desc: LL cache:         %s\n",
   1362                      I1.desc_line, D1.desc_line, LL.desc_line);
   1363    VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
   1364 
   1365    // "cmd:" line
   1366    VG_(strcpy)(buf, "cmd:");
   1367    VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
   1368    if (VG_(args_the_exename)) {
   1369       VG_(write)(fd, " ", 1);
   1370       VG_(write)(fd, VG_(args_the_exename),
   1371                      VG_(strlen)( VG_(args_the_exename) ));
   1372    }
   1373    for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
   1374       HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
   1375       if (arg) {
   1376          VG_(write)(fd, " ", 1);
   1377          VG_(write)(fd, arg, VG_(strlen)( arg ));
   1378       }
   1379    }
   1380    // "events:" line
   1381    if (clo_cache_sim && clo_branch_sim) {
   1382       VG_(sprintf)(buf, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
   1383                                   "Bc Bcm Bi Bim\n");
   1384    }
   1385    else if (clo_cache_sim && !clo_branch_sim) {
   1386       VG_(sprintf)(buf, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
   1387                                   "\n");
   1388    }
   1389    else if (!clo_cache_sim && clo_branch_sim) {
   1390       VG_(sprintf)(buf, "\nevents: Ir "
   1391                                   "Bc Bcm Bi Bim\n");
   1392    }
   1393    else {
   1394       VG_(sprintf)(buf, "\nevents: Ir\n");
   1395    }
   1396 
   1397    VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
   1398 
   1399    // Traverse every lineCC
   1400    VG_(OSetGen_ResetIter)(CC_table);
   1401    while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
   1402       Bool just_hit_a_new_file = False;
   1403       // If we've hit a new file, print a "fl=" line.  Note that because
   1404       // each string is stored exactly once in the string table, we can use
   1405       // pointer comparison rather than strcmp() to test for equality, which
   1406       // is good because most of the time the comparisons are equal and so
   1407       // the whole strings would have to be checked.
   1408       if ( lineCC->loc.file != currFile ) {
   1409          currFile = lineCC->loc.file;
   1410          VG_(sprintf)(buf, "fl=%s\n", currFile);
   1411          VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
   1412          distinct_files++;
   1413          just_hit_a_new_file = True;
   1414       }
   1415       // If we've hit a new function, print a "fn=" line.  We know to do
   1416       // this when the function name changes, and also every time we hit a
   1417       // new file (in which case the new function name might be the same as
   1418       // in the old file, hence the just_hit_a_new_file test).
   1419       if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
   1420          currFn = lineCC->loc.fn;
   1421          VG_(sprintf)(buf, "fn=%s\n", currFn);
   1422          VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
   1423          distinct_fns++;
   1424       }
   1425 
   1426       // Print the LineCC
   1427       if (clo_cache_sim && clo_branch_sim) {
   1428          VG_(sprintf)(buf, "%u %llu %llu %llu"
   1429                              " %llu %llu %llu"
   1430                              " %llu %llu %llu"
   1431                              " %llu %llu %llu %llu\n",
   1432                             lineCC->loc.line,
   1433                             lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
   1434                             lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
   1435                             lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL,
   1436                             lineCC->Bc.b, lineCC->Bc.mp,
   1437                             lineCC->Bi.b, lineCC->Bi.mp);
   1438       }
   1439       else if (clo_cache_sim && !clo_branch_sim) {
   1440          VG_(sprintf)(buf, "%u %llu %llu %llu"
   1441                              " %llu %llu %llu"
   1442                              " %llu %llu %llu\n",
   1443                             lineCC->loc.line,
   1444                             lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
   1445                             lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
   1446                             lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL);
   1447       }
   1448       else if (!clo_cache_sim && clo_branch_sim) {
   1449          VG_(sprintf)(buf, "%u %llu"
   1450                              " %llu %llu %llu %llu\n",
   1451                             lineCC->loc.line,
   1452                             lineCC->Ir.a,
   1453                             lineCC->Bc.b, lineCC->Bc.mp,
   1454                             lineCC->Bi.b, lineCC->Bi.mp);
   1455       }
   1456       else {
   1457          VG_(sprintf)(buf, "%u %llu\n",
   1458                             lineCC->loc.line,
   1459                             lineCC->Ir.a);
   1460       }
   1461 
   1462       VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
   1463 
   1464       // Update summary stats
   1465       Ir_total.a  += lineCC->Ir.a;
   1466       Ir_total.m1 += lineCC->Ir.m1;
   1467       Ir_total.mL += lineCC->Ir.mL;
   1468       Dr_total.a  += lineCC->Dr.a;
   1469       Dr_total.m1 += lineCC->Dr.m1;
   1470       Dr_total.mL += lineCC->Dr.mL;
   1471       Dw_total.a  += lineCC->Dw.a;
   1472       Dw_total.m1 += lineCC->Dw.m1;
   1473       Dw_total.mL += lineCC->Dw.mL;
   1474       Bc_total.b  += lineCC->Bc.b;
   1475       Bc_total.mp += lineCC->Bc.mp;
   1476       Bi_total.b  += lineCC->Bi.b;
   1477       Bi_total.mp += lineCC->Bi.mp;
   1478 
   1479       distinct_lines++;
   1480    }
   1481 
   1482    // Summary stats must come after rest of table, since we calculate them
   1483    // during traversal.  */
   1484    if (clo_cache_sim && clo_branch_sim) {
   1485       VG_(sprintf)(buf, "summary:"
   1486                         " %llu %llu %llu"
   1487                         " %llu %llu %llu"
   1488                         " %llu %llu %llu"
   1489                         " %llu %llu %llu %llu\n",
   1490                         Ir_total.a, Ir_total.m1, Ir_total.mL,
   1491                         Dr_total.a, Dr_total.m1, Dr_total.mL,
   1492                         Dw_total.a, Dw_total.m1, Dw_total.mL,
   1493                         Bc_total.b, Bc_total.mp,
   1494                         Bi_total.b, Bi_total.mp);
   1495    }
   1496    else if (clo_cache_sim && !clo_branch_sim) {
   1497       VG_(sprintf)(buf, "summary:"
   1498                         " %llu %llu %llu"
   1499                         " %llu %llu %llu"
   1500                         " %llu %llu %llu\n",
   1501                         Ir_total.a, Ir_total.m1, Ir_total.mL,
   1502                         Dr_total.a, Dr_total.m1, Dr_total.mL,
   1503                         Dw_total.a, Dw_total.m1, Dw_total.mL);
   1504    }
   1505    else if (!clo_cache_sim && clo_branch_sim) {
   1506       VG_(sprintf)(buf, "summary:"
   1507                         " %llu"
   1508                         " %llu %llu %llu %llu\n",
   1509                         Ir_total.a,
   1510                         Bc_total.b, Bc_total.mp,
   1511                         Bi_total.b, Bi_total.mp);
   1512    }
   1513    else {
   1514       VG_(sprintf)(buf, "summary:"
   1515                         " %llu\n",
   1516                         Ir_total.a);
   1517    }
   1518 
   1519    VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
   1520    VG_(close)(fd);
   1521 }
   1522 
   1523 static UInt ULong_width(ULong n)
   1524 {
   1525    UInt w = 0;
   1526    while (n > 0) {
   1527       n = n / 10;
   1528       w++;
   1529    }
   1530    if (w == 0) w = 1;
   1531    return w + (w-1)/3;   // add space for commas
   1532 }
   1533 
   1534 static void cg_fini(Int exitcode)
   1535 {
   1536    static Char buf1[128], buf2[128], buf3[128], buf4[123], fmt[128];
   1537 
   1538    CacheCC  D_total;
   1539    BranchCC B_total;
   1540    ULong LL_total_m, LL_total_mr, LL_total_mw,
   1541          LL_total, LL_total_r, LL_total_w;
   1542    Int l1, l2, l3;
   1543 
   1544    fprint_CC_table_and_calc_totals();
   1545 
   1546    if (VG_(clo_verbosity) == 0)
   1547       return;
   1548 
   1549    // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
   1550    #define CG_MAX(a, b)  ((a) >= (b) ? (a) : (b))
   1551 
   1552    /* I cache results.  Use the I_refs value to determine the first column
   1553     * width. */
   1554    l1 = ULong_width(Ir_total.a);
   1555    l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b));
   1556    l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b));
   1557 
   1558    /* Make format string, getting width right for numbers */
   1559    VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
   1560 
   1561    /* Always print this */
   1562    VG_(umsg)(fmt, "I   refs:     ", Ir_total.a);
   1563 
   1564    /* If cache profiling is enabled, show D access numbers and all
   1565       miss numbers */
   1566    if (clo_cache_sim) {
   1567       VG_(umsg)(fmt, "I1  misses:   ", Ir_total.m1);
   1568       VG_(umsg)(fmt, "LLi misses:   ", Ir_total.mL);
   1569 
   1570       if (0 == Ir_total.a) Ir_total.a = 1;
   1571       VG_(percentify)(Ir_total.m1, Ir_total.a, 2, l1+1, buf1);
   1572       VG_(umsg)("I1  miss rate: %s\n", buf1);
   1573 
   1574       VG_(percentify)(Ir_total.mL, Ir_total.a, 2, l1+1, buf1);
   1575       VG_(umsg)("LLi miss rate: %s\n", buf1);
   1576       VG_(umsg)("\n");
   1577 
   1578       /* D cache results.  Use the D_refs.rd and D_refs.wr values to
   1579        * determine the width of columns 2 & 3. */
   1580       D_total.a  = Dr_total.a  + Dw_total.a;
   1581       D_total.m1 = Dr_total.m1 + Dw_total.m1;
   1582       D_total.mL = Dr_total.mL + Dw_total.mL;
   1583 
   1584       /* Make format string, getting width right for numbers */
   1585       VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu rd   + %%,%dllu wr)\n",
   1586                         l1, l2, l3);
   1587 
   1588       VG_(umsg)(fmt, "D   refs:     ",
   1589                      D_total.a, Dr_total.a, Dw_total.a);
   1590       VG_(umsg)(fmt, "D1  misses:   ",
   1591                      D_total.m1, Dr_total.m1, Dw_total.m1);
   1592       VG_(umsg)(fmt, "LLd misses:   ",
   1593                      D_total.mL, Dr_total.mL, Dw_total.mL);
   1594 
   1595       if (0 == D_total.a)  D_total.a = 1;
   1596       if (0 == Dr_total.a) Dr_total.a = 1;
   1597       if (0 == Dw_total.a) Dw_total.a = 1;
   1598       VG_(percentify)( D_total.m1,  D_total.a, 1, l1+1, buf1);
   1599       VG_(percentify)(Dr_total.m1, Dr_total.a, 1, l2+1, buf2);
   1600       VG_(percentify)(Dw_total.m1, Dw_total.a, 1, l3+1, buf3);
   1601       VG_(umsg)("D1  miss rate: %s (%s     + %s  )\n", buf1, buf2,buf3);
   1602 
   1603       VG_(percentify)( D_total.mL,  D_total.a, 1, l1+1, buf1);
   1604       VG_(percentify)(Dr_total.mL, Dr_total.a, 1, l2+1, buf2);
   1605       VG_(percentify)(Dw_total.mL, Dw_total.a, 1, l3+1, buf3);
   1606       VG_(umsg)("LLd miss rate: %s (%s     + %s  )\n", buf1, buf2,buf3);
   1607       VG_(umsg)("\n");
   1608 
   1609       /* LL overall results */
   1610 
   1611       LL_total   = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
   1612       LL_total_r = Dr_total.m1 + Ir_total.m1;
   1613       LL_total_w = Dw_total.m1;
   1614       VG_(umsg)(fmt, "LL refs:      ",
   1615                      LL_total, LL_total_r, LL_total_w);
   1616 
   1617       LL_total_m  = Dr_total.mL + Dw_total.mL + Ir_total.mL;
   1618       LL_total_mr = Dr_total.mL + Ir_total.mL;
   1619       LL_total_mw = Dw_total.mL;
   1620       VG_(umsg)(fmt, "LL misses:    ",
   1621                      LL_total_m, LL_total_mr, LL_total_mw);
   1622 
   1623       VG_(percentify)(LL_total_m,  (Ir_total.a + D_total.a),  1, l1+1, buf1);
   1624       VG_(percentify)(LL_total_mr, (Ir_total.a + Dr_total.a), 1, l2+1, buf2);
   1625       VG_(percentify)(LL_total_mw, Dw_total.a,                1, l3+1, buf3);
   1626       VG_(umsg)("LL miss rate:  %s (%s     + %s  )\n", buf1, buf2,buf3);
   1627    }
   1628 
   1629    /* If branch profiling is enabled, show branch overall results. */
   1630    if (clo_branch_sim) {
   1631       /* Make format string, getting width right for numbers */
   1632       VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu cond + %%,%dllu ind)\n",
   1633                         l1, l2, l3);
   1634 
   1635       if (0 == Bc_total.b)  Bc_total.b = 1;
   1636       if (0 == Bi_total.b)  Bi_total.b = 1;
   1637       B_total.b  = Bc_total.b  + Bi_total.b;
   1638       B_total.mp = Bc_total.mp + Bi_total.mp;
   1639 
   1640       VG_(umsg)("\n");
   1641       VG_(umsg)(fmt, "Branches:     ",
   1642                      B_total.b, Bc_total.b, Bi_total.b);
   1643 
   1644       VG_(umsg)(fmt, "Mispredicts:  ",
   1645                      B_total.mp, Bc_total.mp, Bi_total.mp);
   1646 
   1647       VG_(percentify)(B_total.mp,  B_total.b,  1, l1+1, buf1);
   1648       VG_(percentify)(Bc_total.mp, Bc_total.b, 1, l2+1, buf2);
   1649       VG_(percentify)(Bi_total.mp, Bi_total.b, 1, l3+1, buf3);
   1650 
   1651       VG_(umsg)("Mispred rate:  %s (%s     + %s   )\n", buf1, buf2,buf3);
   1652    }
   1653 
   1654    // Various stats
   1655    if (VG_(clo_stats)) {
   1656       Int debug_lookups = full_debugs      + fn_debugs +
   1657                           file_line_debugs + no_debugs;
   1658 
   1659       VG_(dmsg)("\n");
   1660       VG_(dmsg)("cachegrind: distinct files: %d\n", distinct_files);
   1661       VG_(dmsg)("cachegrind: distinct fns:   %d\n", distinct_fns);
   1662       VG_(dmsg)("cachegrind: distinct lines: %d\n", distinct_lines);
   1663       VG_(dmsg)("cachegrind: distinct instrs:%d\n", distinct_instrs);
   1664       VG_(dmsg)("cachegrind: debug lookups      : %d\n", debug_lookups);
   1665 
   1666       VG_(percentify)(full_debugs,      debug_lookups, 1, 6, buf1);
   1667       VG_(percentify)(file_line_debugs, debug_lookups, 1, 6, buf2);
   1668       VG_(percentify)(fn_debugs,        debug_lookups, 1, 6, buf3);
   1669       VG_(percentify)(no_debugs,        debug_lookups, 1, 6, buf4);
   1670       VG_(dmsg)("cachegrind: with full      info:%s (%d)\n",
   1671                 buf1, full_debugs);
   1672       VG_(dmsg)("cachegrind: with file/line info:%s (%d)\n",
   1673                 buf2, file_line_debugs);
   1674       VG_(dmsg)("cachegrind: with fn name   info:%s (%d)\n",
   1675                 buf3, fn_debugs);
   1676       VG_(dmsg)("cachegrind: with zero      info:%s (%d)\n",
   1677                 buf4, no_debugs);
   1678 
   1679       VG_(dmsg)("cachegrind: string table size: %lu\n",
   1680                 VG_(OSetGen_Size)(stringTable));
   1681       VG_(dmsg)("cachegrind: CC table size: %lu\n",
   1682                 VG_(OSetGen_Size)(CC_table));
   1683       VG_(dmsg)("cachegrind: InstrInfo table size: %lu\n",
   1684                 VG_(OSetGen_Size)(instrInfoTable));
   1685    }
   1686 }
   1687 
   1688 /*--------------------------------------------------------------------*/
   1689 /*--- Discarding BB info                                           ---*/
   1690 /*--------------------------------------------------------------------*/
   1691 
   1692 // Called when a translation is removed from the translation cache for
   1693 // any reason at all: to free up space, because the guest code was
   1694 // unmapped or modified, or for any arbitrary reason.
   1695 static
   1696 void cg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge )
   1697 {
   1698    SB_info* sbInfo;
   1699    Addr     orig_addr = (Addr)vge.base[0];
   1700 
   1701    tl_assert(vge.n_used > 0);
   1702 
   1703    if (DEBUG_CG)
   1704       VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
   1705                    (void*)(Addr)orig_addr,
   1706                    (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
   1707 
   1708    // Get BB info, remove from table, free BB info.  Simple!  Note that we
   1709    // use orig_addr, not the first instruction address in vge.
   1710    sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
   1711    tl_assert(NULL != sbInfo);
   1712    VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
   1713 }
   1714 
   1715 /*--------------------------------------------------------------------*/
   1716 /*--- Command line processing                                      ---*/
   1717 /*--------------------------------------------------------------------*/
   1718 
   1719 static void parse_cache_opt ( cache_t* cache, Char* opt, Char* optval )
   1720 {
   1721    Long i1, i2, i3;
   1722    Char* endptr;
   1723    Char* checkRes;
   1724 
   1725    // Option argument looks like "65536,2,64".  Extract them.
   1726    i1 = VG_(strtoll10)(optval,   &endptr); if (*endptr != ',')  goto bad;
   1727    i2 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != ',')  goto bad;
   1728    i3 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != '\0') goto bad;
   1729 
   1730    // Check for overflow.
   1731    cache->size      = (Int)i1;
   1732    cache->assoc     = (Int)i2;
   1733    cache->line_size = (Int)i3;
   1734    if (cache->size      != i1) goto overflow;
   1735    if (cache->assoc     != i2) goto overflow;
   1736    if (cache->line_size != i3) goto overflow;
   1737 
   1738    checkRes = check_cache(cache);
   1739    if (checkRes) {
   1740       VG_(fmsg)("%s", checkRes);
   1741       goto bad;
   1742    }
   1743 
   1744    return;
   1745 
   1746   bad:
   1747    VG_(fmsg_bad_option)(opt, "");
   1748 
   1749   overflow:
   1750    VG_(fmsg_bad_option)(opt,
   1751       "One of the cache parameters was too large and overflowed.\n");
   1752 }
   1753 
   1754 static Bool cg_process_cmd_line_option(Char* arg)
   1755 {
   1756    Char* tmp_str;
   1757 
   1758    // 5 is length of "--I1="
   1759    if      VG_STR_CLO(arg, "--I1", tmp_str)
   1760       parse_cache_opt(&clo_I1_cache, arg, tmp_str);
   1761    else if VG_STR_CLO(arg, "--D1", tmp_str)
   1762       parse_cache_opt(&clo_D1_cache, arg, tmp_str);
   1763    else if (VG_STR_CLO(arg, "--L2", tmp_str) || // for backwards compatibility
   1764             VG_STR_CLO(arg, "--LL", tmp_str))
   1765       parse_cache_opt(&clo_LL_cache, arg, tmp_str);
   1766 
   1767    else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
   1768    else if VG_BOOL_CLO(arg, "--cache-sim",  clo_cache_sim)  {}
   1769    else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
   1770    else
   1771       return False;
   1772 
   1773    return True;
   1774 }
   1775 
   1776 static void cg_print_usage(void)
   1777 {
   1778    VG_(printf)(
   1779 "    --I1=<size>,<assoc>,<line_size>  set I1 cache manually\n"
   1780 "    --D1=<size>,<assoc>,<line_size>  set D1 cache manually\n"
   1781 "    --LL=<size>,<assoc>,<line_size>  set LL cache manually\n"
   1782 "    --cache-sim=yes|no  [yes]        collect cache stats?\n"
   1783 "    --branch-sim=yes|no [no]         collect branch prediction stats?\n"
   1784 "    --cachegrind-out-file=<file>     output file name [cachegrind.out.%%p]\n"
   1785    );
   1786 }
   1787 
   1788 static void cg_print_debug_usage(void)
   1789 {
   1790    VG_(printf)(
   1791 "    (none)\n"
   1792    );
   1793 }
   1794 
   1795 /*--------------------------------------------------------------------*/
   1796 /*--- Setup                                                        ---*/
   1797 /*--------------------------------------------------------------------*/
   1798 
   1799 static void cg_post_clo_init(void); /* just below */
   1800 
   1801 static void cg_pre_clo_init(void)
   1802 {
   1803    VG_(details_name)            ("Cachegrind");
   1804    VG_(details_version)         (NULL);
   1805    VG_(details_description)     ("a cache and branch-prediction profiler");
   1806    VG_(details_copyright_author)(
   1807       "Copyright (C) 2002-2010, and GNU GPL'd, by Nicholas Nethercote et al.");
   1808    VG_(details_bug_reports_to)  (VG_BUGS_TO);
   1809    VG_(details_avg_translation_sizeB) ( 500 );
   1810 
   1811    VG_(basic_tool_funcs)          (cg_post_clo_init,
   1812                                    cg_instrument,
   1813                                    cg_fini);
   1814 
   1815    VG_(needs_superblock_discards)(cg_discard_superblock_info);
   1816    VG_(needs_command_line_options)(cg_process_cmd_line_option,
   1817                                    cg_print_usage,
   1818                                    cg_print_debug_usage);
   1819 }
   1820 
   1821 static void cg_post_clo_init(void)
   1822 {
   1823    cache_t I1c, D1c, LLc;
   1824 
   1825    CC_table =
   1826       VG_(OSetGen_Create)(offsetof(LineCC, loc),
   1827                           cmp_CodeLoc_LineCC,
   1828                           VG_(malloc), "cg.main.cpci.1",
   1829                           VG_(free));
   1830    instrInfoTable =
   1831       VG_(OSetGen_Create)(/*keyOff*/0,
   1832                           NULL,
   1833                           VG_(malloc), "cg.main.cpci.2",
   1834                           VG_(free));
   1835    stringTable =
   1836       VG_(OSetGen_Create)(/*keyOff*/0,
   1837                           stringCmp,
   1838                           VG_(malloc), "cg.main.cpci.3",
   1839                           VG_(free));
   1840 
   1841    configure_caches(&I1c, &D1c, &LLc);
   1842 
   1843    cachesim_I1_initcache(I1c);
   1844    cachesim_D1_initcache(D1c);
   1845    cachesim_LL_initcache(LLc);
   1846 }
   1847 
   1848 VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
   1849 
   1850 /*--------------------------------------------------------------------*/
   1851 /*--- end                                                          ---*/
   1852 /*--------------------------------------------------------------------*/
   1853 
   1854