1 2 /*--------------------------------------------------------------------*/ 3 /*--- Branch predictor simulation cg_branchpred.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Cachegrind, a Valgrind tool for cache 8 profiling programs. 9 10 Copyright (C) 2002-2013 Nicholas Nethercote 11 njn (at) valgrind.org 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 */ 30 31 32 /* This file contains the actual branch predictor simulator and its 33 associated state. As with cg_sim.c it is #included directly into 34 cg_main.c. It provides: 35 36 - a taken/not-taken predictor for conditional branches 37 - a branch target address predictor for indirect branches 38 39 Function return-address prediction is not modelled, on the basis 40 that return stack predictors almost always predict correctly, and 41 also that it is difficult for Valgrind to robustly identify 42 function calls and returns. 43 */ 44 45 /* How many bits at the bottom of an instruction address are 46 guaranteed to be zero? */ 47 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \ 48 || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_arm64) 49 # define N_IADDR_LO_ZERO_BITS 2 50 #elif defined(VGA_x86) || defined(VGA_amd64) 51 # define N_IADDR_LO_ZERO_BITS 0 52 #elif defined(VGA_s390x) || defined(VGA_arm) 53 # define N_IADDR_LO_ZERO_BITS 1 54 #elif defined(VGA_tilegx) 55 # define N_IADDR_LO_ZERO_BITS 3 56 #else 57 # error "Unsupported architecture" 58 #endif 59 60 61 /* Get a taken/not-taken prediction for the instruction (presumably a 62 conditional branch) at instr_addr. Once that's done, update the 63 predictor state based on whether or not it was actually taken, as 64 indicated by 'taken'. Finally, return 1 for a mispredict and 0 for 65 a successful predict. 66 67 The predictor is an array of 16k (== 2^14) 2-bit saturating 68 counters. Given the address of the branch instruction, the array 69 index to use is computed both from the low order bits of the branch 70 instruction's address, and the global history - that is, from the 71 taken/not-taken behaviour of the most recent few branches. This 72 makes the predictor able to correlate this branch's behaviour with 73 that of other branches. 74 75 TODO: use predictor written by someone who understands this stuff. 76 Perhaps it would be better to move to a standard GShare predictor 77 and/or tournament predictor. 78 */ 79 /* The index is composed of N_HIST bits at the top and N_IADD bits at 80 the bottom. These numbers chosen somewhat arbitrarily, but note 81 that making N_IADD_BITS too small (eg 4) can cause large amounts of 82 aliasing, and hence misprediction, particularly if the history bits 83 are mostly unchanging. */ 84 #define N_HIST_BITS 7 85 #define N_IADD_BITS 7 86 87 #define N_BITS (N_HIST_BITS + N_IADD_BITS) 88 #define N_COUNTERS (1 << N_BITS) 89 90 static UWord shift_register = 0; /* Contains global history */ 91 static UChar counters[N_COUNTERS]; /* Counter array; presumably auto-zeroed */ 92 93 94 static ULong do_cond_branch_predict ( Addr instr_addr, Word takenW ) 95 { 96 UWord indx; 97 Bool predicted_taken, actually_taken, mispredict; 98 99 const UWord hist_mask = (1 << N_HIST_BITS) - 1; 100 const UWord iadd_mask = (1 << N_IADD_BITS) - 1; 101 UWord hist_bits = shift_register & hist_mask; 102 UWord iadd_bits = (instr_addr >> N_IADDR_LO_ZERO_BITS) 103 & iadd_mask; 104 105 tl_assert(hist_bits <= hist_mask); 106 tl_assert(iadd_bits <= iadd_mask); 107 indx = (hist_bits << N_IADD_BITS) | iadd_bits; 108 tl_assert(indx < N_COUNTERS); 109 if (0) VG_(printf)("index = %d\n", (Int)indx); 110 111 tl_assert(takenW <= 1); 112 predicted_taken = counters[ indx ] >= 2; 113 actually_taken = takenW > 0; 114 115 mispredict = (actually_taken && (!predicted_taken)) 116 || ((!actually_taken) && predicted_taken); 117 118 shift_register <<= 1; 119 shift_register |= (actually_taken ? 1 : 0); 120 121 if (actually_taken) { 122 if (counters[indx] < 3) 123 counters[indx]++; 124 } else { 125 if (counters[indx] > 0) 126 counters[indx]--; 127 } 128 129 tl_assert(counters[indx] <= 3); 130 131 return mispredict ? 1 : 0; 132 } 133 134 135 /* A very simple indirect branch predictor. Use the branch's address 136 to index a table which records the previous target address for this 137 branch (or whatever aliased with it) and use that as the 138 prediction. */ 139 #define N_BTAC_BITS 9 140 #define N_BTAC (1 << N_BTAC_BITS) 141 static Addr btac[N_BTAC]; /* BTAC; presumably auto-zeroed */ 142 143 static ULong do_ind_branch_predict ( Addr instr_addr, Addr actual ) 144 { 145 Bool mispredict; 146 const UWord mask = (1 << N_BTAC_BITS) - 1; 147 UWord indx = (instr_addr >> N_IADDR_LO_ZERO_BITS) 148 & mask; 149 tl_assert(indx < N_BTAC); 150 mispredict = btac[indx] != actual; 151 btac[indx] = actual; 152 return mispredict ? 1 : 0; 153 } 154 155 156 /*--------------------------------------------------------------------*/ 157 /*--- end cg_branchpred.c ---*/ 158 /*--------------------------------------------------------------------*/ 159 160