Home | History | Annotate | Download | only in cachegrind
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Branch predictor simulation                  cg_branchpred.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Cachegrind, a Valgrind tool for cache
      8    profiling programs.
      9 
     10    Copyright (C) 2002-2015 Nicholas Nethercote
     11       njn (at) valgrind.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 
     32 /* This file contains the actual branch predictor simulator and its
     33    associated state.  As with cg_sim.c it is #included directly into
     34    cg_main.c.  It provides:
     35 
     36    - a taken/not-taken predictor for conditional branches
     37    - a branch target address predictor for indirect branches
     38 
     39    Function return-address prediction is not modelled, on the basis
     40    that return stack predictors almost always predict correctly, and
     41    also that it is difficult for Valgrind to robustly identify
     42    function calls and returns.
     43 */
     44 
     45 /* How many bits at the bottom of an instruction address are
     46    guaranteed to be zero? */
     47 #if defined(VGA_ppc32) || defined(VGA_ppc64be)  || defined(VGA_ppc64le) \
     48     || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_arm64)
     49 #  define N_IADDR_LO_ZERO_BITS 2
     50 #elif defined(VGA_x86) || defined(VGA_amd64)
     51 #  define N_IADDR_LO_ZERO_BITS 0
     52 #elif defined(VGA_s390x) || defined(VGA_arm)
     53 #  define N_IADDR_LO_ZERO_BITS 1
     54 #elif defined(VGA_tilegx)
     55 #  define N_IADDR_LO_ZERO_BITS 3
     56 #else
     57 #  error "Unsupported architecture"
     58 #endif
     59 
     60 
     61 /* Get a taken/not-taken prediction for the instruction (presumably a
     62    conditional branch) at instr_addr.  Once that's done, update the
     63    predictor state based on whether or not it was actually taken, as
     64    indicated by 'taken'.  Finally, return 1 for a mispredict and 0 for
     65    a successful predict.
     66 
     67    The predictor is an array of 16k (== 2^14) 2-bit saturating
     68    counters.  Given the address of the branch instruction, the array
     69    index to use is computed both from the low order bits of the branch
     70    instruction's address, and the global history - that is, from the
     71    taken/not-taken behaviour of the most recent few branches.  This
     72    makes the predictor able to correlate this branch's behaviour with
     73    that of other branches.
     74 
     75    TODO: use predictor written by someone who understands this stuff.
     76    Perhaps it would be better to move to a standard GShare predictor
     77    and/or tournament predictor.
     78 */
     79 /* The index is composed of N_HIST bits at the top and N_IADD bits at
     80    the bottom.  These numbers chosen somewhat arbitrarily, but note
     81    that making N_IADD_BITS too small (eg 4) can cause large amounts of
     82    aliasing, and hence misprediction, particularly if the history bits
     83    are mostly unchanging. */
     84 #define N_HIST_BITS 7
     85 #define N_IADD_BITS 7
     86 
     87 #define N_BITS     (N_HIST_BITS + N_IADD_BITS)
     88 #define N_COUNTERS (1 << N_BITS)
     89 
     90 static UWord shift_register = 0;   /* Contains global history */
     91 static UChar counters[N_COUNTERS]; /* Counter array; presumably auto-zeroed */
     92 
     93 
     94 static ULong do_cond_branch_predict ( Addr instr_addr, Word takenW )
     95 {
     96    UWord indx;
     97    Bool  predicted_taken, actually_taken, mispredict;
     98 
     99    const UWord hist_mask = (1 << N_HIST_BITS) - 1;
    100    const UWord iadd_mask = (1 << N_IADD_BITS) - 1;
    101          UWord hist_bits = shift_register & hist_mask;
    102          UWord iadd_bits = (instr_addr >> N_IADDR_LO_ZERO_BITS)
    103                            & iadd_mask;
    104 
    105    tl_assert(hist_bits <= hist_mask);
    106    tl_assert(iadd_bits <= iadd_mask);
    107    indx = (hist_bits << N_IADD_BITS) | iadd_bits;
    108    tl_assert(indx < N_COUNTERS);
    109    if (0) VG_(printf)("index = %d\n", (Int)indx);
    110 
    111    tl_assert(takenW <= 1);
    112    predicted_taken = counters[ indx ] >= 2;
    113    actually_taken  = takenW > 0;
    114 
    115    mispredict = (actually_taken && (!predicted_taken))
    116                 || ((!actually_taken) && predicted_taken);
    117 
    118    shift_register <<= 1;
    119    shift_register |= (actually_taken ? 1 : 0);
    120 
    121    if (actually_taken) {
    122       if (counters[indx] < 3)
    123          counters[indx]++;
    124    } else {
    125       if (counters[indx] > 0)
    126          counters[indx]--;
    127    }
    128 
    129    tl_assert(counters[indx] <= 3);
    130 
    131    return mispredict ? 1 : 0;
    132 }
    133 
    134 
    135 /* A very simple indirect branch predictor.  Use the branch's address
    136    to index a table which records the previous target address for this
    137    branch (or whatever aliased with it) and use that as the
    138    prediction. */
    139 #define N_BTAC_BITS 9
    140 #define N_BTAC      (1 << N_BTAC_BITS)
    141 static Addr btac[N_BTAC]; /* BTAC; presumably auto-zeroed */
    142 
    143 static ULong do_ind_branch_predict ( Addr instr_addr, Addr actual )
    144 {
    145    Bool mispredict;
    146    const UWord mask = (1 << N_BTAC_BITS) - 1;
    147          UWord indx = (instr_addr >> N_IADDR_LO_ZERO_BITS)
    148                       & mask;
    149    tl_assert(indx < N_BTAC);
    150    mispredict = btac[indx] != actual;
    151    btac[indx] = actual;
    152    return mispredict ? 1 : 0;
    153 }
    154 
    155 
    156 /*--------------------------------------------------------------------*/
    157 /*--- end                                          cg_branchpred.c ---*/
    158 /*--------------------------------------------------------------------*/
    159 
    160