1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the itinerary class data for the ARM Cortex A9 processors. 11 // 12 //===----------------------------------------------------------------------===// 13 14 // ===---------------------------------------------------------------------===// 15 // This section contains legacy support for itineraries. This is 16 // required until SD and PostRA schedulers are replaced by MachineScheduler. 17 18 // 19 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical 20 // Reference Manual". 21 // 22 // Functional units 23 def A9_Issue0 : FuncUnit; // Issue 0 24 def A9_Issue1 : FuncUnit; // Issue 1 25 def A9_Branch : FuncUnit; // Branch 26 def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0 27 def A9_ALU1 : FuncUnit; // ALU pipeline 1 28 def A9_AGU : FuncUnit; // Address generation unit for ld / st 29 def A9_NPipe : FuncUnit; // NEON pipeline 30 def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer 31 def A9_LSUnit : FuncUnit; // L/S Unit 32 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side 33 def A9_DRegsN : FuncUnit; // FP register set, NEON side 34 35 // Bypasses 36 def A9_LdBypass : Bypass; 37 38 def CortexA9Itineraries : ProcessorItineraries< 39 [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0, 40 A9_LSUnit, A9_DRegsVFP, A9_DRegsN], 41 [A9_LdBypass], [ 42 // Two fully-pipelined integer ALU pipelines 43 44 // 45 // Move instructions, unconditional 46 InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 47 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, 48 InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 49 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 50 InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 51 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 52 InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 53 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, 54 InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 55 InstrStage<1, [A9_ALU0, A9_ALU1]>, 56 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>, 57 InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 58 InstrStage<1, [A9_ALU0, A9_ALU1]>, 59 InstrStage<1, [A9_ALU0, A9_ALU1]>, 60 InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>, 61 InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 62 InstrStage<1, [A9_ALU0, A9_ALU1]>, 63 InstrStage<1, [A9_ALU0, A9_ALU1]>, 64 InstrStage<1, [A9_MUX0], 0>, 65 InstrStage<1, [A9_AGU], 0>, 66 InstrStage<1, [A9_LSUnit]>], [5]>, 67 // 68 // MVN instructions 69 InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 70 InstrStage<1, [A9_ALU0, A9_ALU1]>], 71 [1]>, 72 InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 73 InstrStage<1, [A9_ALU0, A9_ALU1]>], 74 [1, 1], [NoBypass, A9_LdBypass]>, 75 InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 76 InstrStage<2, [A9_ALU0, A9_ALU1]>], 77 [2, 1]>, 78 InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 79 InstrStage<3, [A9_ALU0, A9_ALU1]>], 80 [3, 1, 1]>, 81 // 82 // No operand cycles 83 InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 84 InstrStage<1, [A9_ALU0, A9_ALU1]>]>, 85 // 86 // Binary Instructions that produce a result 87 InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 88 InstrStage<1, [A9_ALU0, A9_ALU1]>], 89 [1, 1], [NoBypass, A9_LdBypass]>, 90 InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 91 InstrStage<1, [A9_ALU0, A9_ALU1]>], 92 [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>, 93 InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 94 InstrStage<2, [A9_ALU0, A9_ALU1]>], 95 [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>, 96 InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 97 InstrStage<2, [A9_ALU0, A9_ALU1]>], 98 [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>, 99 InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 100 InstrStage<3, [A9_ALU0, A9_ALU1]>], 101 [3, 1, 1, 1], 102 [NoBypass, A9_LdBypass, NoBypass, NoBypass]>, 103 // 104 // Bitwise Instructions that produce a result 105 InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 106 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 107 InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 108 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>, 109 InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 110 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, 111 InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 112 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>, 113 // 114 // Unary Instructions that produce a result 115 116 // CLZ, RBIT, etc. 117 InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 118 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 119 120 // BFC, BFI, UBFX, SBFX 121 InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 122 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>, 123 124 // 125 // Zero and sign extension instructions 126 InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 127 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>, 128 InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 129 InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>, 130 InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 131 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>, 132 // 133 // Compare instructions 134 InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 135 InstrStage<1, [A9_ALU0, A9_ALU1]>], 136 [1], [A9_LdBypass]>, 137 InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 138 InstrStage<1, [A9_ALU0, A9_ALU1]>], 139 [1, 1], [A9_LdBypass, A9_LdBypass]>, 140 InstrItinData<IIC_iCMPsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 141 InstrStage<2, [A9_ALU0, A9_ALU1]>], 142 [1, 1], [A9_LdBypass, NoBypass]>, 143 InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 144 InstrStage<3, [A9_ALU0, A9_ALU1]>], 145 [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>, 146 // 147 // Test instructions 148 InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 149 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, 150 InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 151 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 152 InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 153 InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>, 154 InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 155 InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>, 156 // 157 // Move instructions, conditional 158 // FIXME: Correctly model the extra input dep on the destination. 159 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 160 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, 161 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 162 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 163 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 164 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 165 InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 166 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, 167 InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 168 InstrStage<1, [A9_ALU0, A9_ALU1]>, 169 InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 170 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>, 171 172 // Integer multiply pipeline 173 // 174 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 175 InstrStage<2, [A9_ALU0]>], [3, 1, 1]>, 176 InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 177 InstrStage<2, [A9_ALU0]>], 178 [3, 1, 1, 1]>, 179 InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 180 InstrStage<2, [A9_ALU0]>], [4, 1, 1]>, 181 InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 182 InstrStage<2, [A9_ALU0]>], 183 [4, 1, 1, 1]>, 184 InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 185 InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>, 186 InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 187 InstrStage<3, [A9_ALU0]>], 188 [4, 5, 1, 1]>, 189 // Integer load pipeline 190 // FIXME: The timings are some rough approximations 191 // 192 // Immediate offset 193 InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 194 InstrStage<1, [A9_MUX0], 0>, 195 InstrStage<1, [A9_AGU], 0>, 196 InstrStage<1, [A9_LSUnit]>], 197 [3, 1], [A9_LdBypass]>, 198 InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 199 InstrStage<1, [A9_MUX0], 0>, 200 InstrStage<2, [A9_AGU], 0>, 201 InstrStage<1, [A9_LSUnit]>], 202 [4, 1], [A9_LdBypass]>, 203 // FIXME: If address is 64-bit aligned, AGU cycles is 1. 204 InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 205 InstrStage<1, [A9_MUX0], 0>, 206 InstrStage<2, [A9_AGU], 0>, 207 InstrStage<1, [A9_LSUnit]>], 208 [3, 3, 1], [A9_LdBypass]>, 209 // 210 // Register offset 211 InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 212 InstrStage<1, [A9_MUX0], 0>, 213 InstrStage<1, [A9_AGU], 0>, 214 InstrStage<1, [A9_LSUnit]>], 215 [3, 1, 1], [A9_LdBypass]>, 216 InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 217 InstrStage<1, [A9_MUX0], 0>, 218 InstrStage<2, [A9_AGU], 0>, 219 InstrStage<1, [A9_LSUnit]>], 220 [4, 1, 1], [A9_LdBypass]>, 221 InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 222 InstrStage<1, [A9_MUX0], 0>, 223 InstrStage<2, [A9_AGU], 0>, 224 InstrStage<1, [A9_LSUnit]>], 225 [3, 3, 1, 1], [A9_LdBypass]>, 226 // 227 // Scaled register offset 228 InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 229 InstrStage<1, [A9_MUX0], 0>, 230 InstrStage<1, [A9_AGU], 0>, 231 InstrStage<1, [A9_LSUnit], 0>], 232 [4, 1, 1], [A9_LdBypass]>, 233 InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 234 InstrStage<1, [A9_MUX0], 0>, 235 InstrStage<2, [A9_AGU], 0>, 236 InstrStage<1, [A9_LSUnit]>], 237 [5, 1, 1], [A9_LdBypass]>, 238 // 239 // Immediate offset with update 240 InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 241 InstrStage<1, [A9_MUX0], 0>, 242 InstrStage<1, [A9_AGU], 0>, 243 InstrStage<1, [A9_LSUnit]>], 244 [3, 2, 1], [A9_LdBypass]>, 245 InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 246 InstrStage<1, [A9_MUX0], 0>, 247 InstrStage<2, [A9_AGU], 0>, 248 InstrStage<1, [A9_LSUnit]>], 249 [4, 3, 1], [A9_LdBypass]>, 250 // 251 // Register offset with update 252 InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 253 InstrStage<1, [A9_MUX0], 0>, 254 InstrStage<1, [A9_AGU], 0>, 255 InstrStage<1, [A9_LSUnit]>], 256 [3, 2, 1, 1], [A9_LdBypass]>, 257 InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 258 InstrStage<1, [A9_MUX0], 0>, 259 InstrStage<2, [A9_AGU], 0>, 260 InstrStage<1, [A9_LSUnit]>], 261 [4, 3, 1, 1], [A9_LdBypass]>, 262 InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 263 InstrStage<1, [A9_MUX0], 0>, 264 InstrStage<2, [A9_AGU], 0>, 265 InstrStage<1, [A9_LSUnit]>], 266 [3, 3, 1, 1], [A9_LdBypass]>, 267 // 268 // Scaled register offset with update 269 InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 270 InstrStage<1, [A9_MUX0], 0>, 271 InstrStage<1, [A9_AGU], 0>, 272 InstrStage<1, [A9_LSUnit]>], 273 [4, 3, 1, 1], [A9_LdBypass]>, 274 InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 275 InstrStage<1, [A9_MUX0], 0>, 276 InstrStage<2, [A9_AGU], 0>, 277 InstrStage<1, [A9_LSUnit]>], 278 [5, 4, 1, 1], [A9_LdBypass]>, 279 // 280 // Load multiple, def is the 5th operand. 281 // FIXME: This assumes 3 to 4 registers. 282 InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 283 InstrStage<1, [A9_MUX0], 0>, 284 InstrStage<2, [A9_AGU], 1>, 285 InstrStage<2, [A9_LSUnit]>], 286 [1, 1, 1, 1, 3], 287 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], 288 -1>, // dynamic uops 289 // 290 // Load multiple + update, defs are the 1st and 5th operands. 291 InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 292 InstrStage<1, [A9_MUX0], 0>, 293 InstrStage<2, [A9_AGU], 1>, 294 InstrStage<2, [A9_LSUnit]>], 295 [2, 1, 1, 1, 3], 296 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], 297 -1>, // dynamic uops 298 // 299 // Load multiple plus branch 300 InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 301 InstrStage<1, [A9_MUX0], 0>, 302 InstrStage<1, [A9_AGU], 1>, 303 InstrStage<2, [A9_LSUnit]>, 304 InstrStage<1, [A9_Branch]>], 305 [1, 2, 1, 1, 3], 306 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], 307 -1>, // dynamic uops 308 // 309 // Pop, def is the 3rd operand. 310 InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 311 InstrStage<1, [A9_MUX0], 0>, 312 InstrStage<2, [A9_AGU], 1>, 313 InstrStage<2, [A9_LSUnit]>], 314 [1, 1, 3], 315 [NoBypass, NoBypass, A9_LdBypass], 316 -1>, // dynamic uops 317 // 318 // Pop + branch, def is the 3rd operand. 319 InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 320 InstrStage<1, [A9_MUX0], 0>, 321 InstrStage<2, [A9_AGU], 1>, 322 InstrStage<2, [A9_LSUnit]>, 323 InstrStage<1, [A9_Branch]>], 324 [1, 1, 3], 325 [NoBypass, NoBypass, A9_LdBypass], 326 -1>, // dynamic uops 327 // 328 // iLoadi + iALUr for t2LDRpci_pic. 329 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 330 InstrStage<1, [A9_MUX0], 0>, 331 InstrStage<1, [A9_AGU], 0>, 332 InstrStage<1, [A9_LSUnit]>, 333 InstrStage<1, [A9_ALU0, A9_ALU1]>], 334 [2, 1]>, 335 336 // Integer store pipeline 337 /// 338 // Immediate offset 339 InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 340 InstrStage<1, [A9_MUX0], 0>, 341 InstrStage<1, [A9_AGU], 0>, 342 InstrStage<1, [A9_LSUnit]>], [1, 1]>, 343 InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 344 InstrStage<1, [A9_MUX0], 0>, 345 InstrStage<2, [A9_AGU], 1>, 346 InstrStage<1, [A9_LSUnit]>], [1, 1]>, 347 // FIXME: If address is 64-bit aligned, AGU cycles is 1. 348 InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 349 InstrStage<1, [A9_MUX0], 0>, 350 InstrStage<2, [A9_AGU], 1>, 351 InstrStage<1, [A9_LSUnit]>], [1, 1]>, 352 // 353 // Register offset 354 InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 355 InstrStage<1, [A9_MUX0], 0>, 356 InstrStage<1, [A9_AGU], 0>, 357 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 358 InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 359 InstrStage<1, [A9_MUX0], 0>, 360 InstrStage<2, [A9_AGU], 1>, 361 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 362 InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 363 InstrStage<1, [A9_MUX0], 0>, 364 InstrStage<2, [A9_AGU], 1>, 365 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 366 // 367 // Scaled register offset 368 InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 369 InstrStage<1, [A9_MUX0], 0>, 370 InstrStage<1, [A9_AGU], 0>, 371 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 372 InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 373 InstrStage<1, [A9_MUX0], 0>, 374 InstrStage<2, [A9_AGU], 1>, 375 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 376 // 377 // Immediate offset with update 378 InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 379 InstrStage<1, [A9_MUX0], 0>, 380 InstrStage<1, [A9_AGU], 0>, 381 InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>, 382 InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 383 InstrStage<1, [A9_MUX0], 0>, 384 InstrStage<2, [A9_AGU], 1>, 385 InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>, 386 // 387 // Register offset with update 388 InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 389 InstrStage<1, [A9_MUX0], 0>, 390 InstrStage<1, [A9_AGU], 0>, 391 InstrStage<1, [A9_LSUnit]>], 392 [2, 1, 1, 1]>, 393 InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 394 InstrStage<1, [A9_MUX0], 0>, 395 InstrStage<2, [A9_AGU], 1>, 396 InstrStage<1, [A9_LSUnit]>], 397 [3, 1, 1, 1]>, 398 InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 399 InstrStage<1, [A9_MUX0], 0>, 400 InstrStage<2, [A9_AGU], 1>, 401 InstrStage<1, [A9_LSUnit]>], 402 [3, 1, 1, 1]>, 403 // 404 // Scaled register offset with update 405 InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 406 InstrStage<1, [A9_MUX0], 0>, 407 InstrStage<1, [A9_AGU], 0>, 408 InstrStage<1, [A9_LSUnit]>], 409 [2, 1, 1, 1]>, 410 InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 411 InstrStage<1, [A9_MUX0], 0>, 412 InstrStage<2, [A9_AGU], 1>, 413 InstrStage<1, [A9_LSUnit]>], 414 [3, 1, 1, 1]>, 415 // 416 // Store multiple 417 InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 418 InstrStage<1, [A9_MUX0], 0>, 419 InstrStage<1, [A9_AGU], 0>, 420 InstrStage<2, [A9_LSUnit]>], 421 [], [], -1>, // dynamic uops 422 // 423 // Store multiple + update 424 InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 425 InstrStage<1, [A9_MUX0], 0>, 426 InstrStage<1, [A9_AGU], 0>, 427 InstrStage<2, [A9_LSUnit]>], 428 [2], [], -1>, // dynamic uops 429 // 430 // Preload 431 InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>, 432 433 // Branch 434 // 435 // no delay slots, so the latency of a branch is unimportant 436 InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>, 437 InstrStage<1, [A9_Issue1], 0>, 438 InstrStage<1, [A9_Branch]>]>, 439 440 // VFP and NEON shares the same register file. This means that every VFP 441 // instruction should wait for full completion of the consecutive NEON 442 // instruction and vice-versa. We model this behavior with two artificial FUs: 443 // DRegsVFP and DRegsVFP. 444 // 445 // Every VFP instruction: 446 // - Acquires DRegsVFP resource for 1 cycle 447 // - Reserves DRegsN resource for the whole duration (including time to 448 // register file writeback!). 449 // Every NEON instruction does the same but with FUs swapped. 450 // 451 // Since the reserved FU cannot be acquired, this models precisely 452 // "cross-domain" stalls. 453 454 // VFP 455 // Issue through integer pipeline, and execute in NEON unit. 456 457 // FP Special Register to Integer Register File Move 458 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 459 InstrStage<1, [A9_MUX0], 0>, 460 InstrStage<1, [A9_DRegsVFP], 0, Required>, 461 InstrStage<2, [A9_DRegsN], 0, Reserved>, 462 InstrStage<1, [A9_NPipe]>], 463 [1]>, 464 // 465 // Single-precision FP Unary 466 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 467 InstrStage<1, [A9_MUX0], 0>, 468 InstrStage<1, [A9_DRegsVFP], 0, Required>, 469 // Extra latency cycles since wbck is 2 cycles 470 InstrStage<3, [A9_DRegsN], 0, Reserved>, 471 InstrStage<1, [A9_NPipe]>], 472 [1, 1]>, 473 // 474 // Double-precision FP Unary 475 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 476 InstrStage<1, [A9_MUX0], 0>, 477 InstrStage<1, [A9_DRegsVFP], 0, Required>, 478 // Extra latency cycles since wbck is 2 cycles 479 InstrStage<3, [A9_DRegsN], 0, Reserved>, 480 InstrStage<1, [A9_NPipe]>], 481 [1, 1]>, 482 483 // 484 // Single-precision FP Compare 485 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 486 InstrStage<1, [A9_MUX0], 0>, 487 InstrStage<1, [A9_DRegsVFP], 0, Required>, 488 // Extra latency cycles since wbck is 4 cycles 489 InstrStage<5, [A9_DRegsN], 0, Reserved>, 490 InstrStage<1, [A9_NPipe]>], 491 [1, 1]>, 492 // 493 // Double-precision FP Compare 494 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 495 InstrStage<1, [A9_MUX0], 0>, 496 InstrStage<1, [A9_DRegsVFP], 0, Required>, 497 // Extra latency cycles since wbck is 4 cycles 498 InstrStage<5, [A9_DRegsN], 0, Reserved>, 499 InstrStage<1, [A9_NPipe]>], 500 [1, 1]>, 501 // 502 // Single to Double FP Convert 503 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 504 InstrStage<1, [A9_MUX0], 0>, 505 InstrStage<1, [A9_DRegsVFP], 0, Required>, 506 InstrStage<5, [A9_DRegsN], 0, Reserved>, 507 InstrStage<1, [A9_NPipe]>], 508 [4, 1]>, 509 // 510 // Double to Single FP Convert 511 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 512 InstrStage<1, [A9_MUX0], 0>, 513 InstrStage<1, [A9_DRegsVFP], 0, Required>, 514 InstrStage<5, [A9_DRegsN], 0, Reserved>, 515 InstrStage<1, [A9_NPipe]>], 516 [4, 1]>, 517 518 // 519 // Single to Half FP Convert 520 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 521 InstrStage<1, [A9_MUX0], 0>, 522 InstrStage<1, [A9_DRegsVFP], 0, Required>, 523 InstrStage<5, [A9_DRegsN], 0, Reserved>, 524 InstrStage<1, [A9_NPipe]>], 525 [4, 1]>, 526 // 527 // Half to Single FP Convert 528 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 529 InstrStage<1, [A9_MUX0], 0>, 530 InstrStage<1, [A9_DRegsVFP], 0, Required>, 531 InstrStage<3, [A9_DRegsN], 0, Reserved>, 532 InstrStage<1, [A9_NPipe]>], 533 [2, 1]>, 534 535 // 536 // Single-Precision FP to Integer Convert 537 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 538 InstrStage<1, [A9_MUX0], 0>, 539 InstrStage<1, [A9_DRegsVFP], 0, Required>, 540 InstrStage<5, [A9_DRegsN], 0, Reserved>, 541 InstrStage<1, [A9_NPipe]>], 542 [4, 1]>, 543 // 544 // Double-Precision FP to Integer Convert 545 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 546 InstrStage<1, [A9_MUX0], 0>, 547 InstrStage<1, [A9_DRegsVFP], 0, Required>, 548 InstrStage<5, [A9_DRegsN], 0, Reserved>, 549 InstrStage<1, [A9_NPipe]>], 550 [4, 1]>, 551 // 552 // Integer to Single-Precision FP Convert 553 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 554 InstrStage<1, [A9_MUX0], 0>, 555 InstrStage<1, [A9_DRegsVFP], 0, Required>, 556 InstrStage<5, [A9_DRegsN], 0, Reserved>, 557 InstrStage<1, [A9_NPipe]>], 558 [4, 1]>, 559 // 560 // Integer to Double-Precision FP Convert 561 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 562 InstrStage<1, [A9_MUX0], 0>, 563 InstrStage<1, [A9_DRegsVFP], 0, Required>, 564 InstrStage<5, [A9_DRegsN], 0, Reserved>, 565 InstrStage<1, [A9_NPipe]>], 566 [4, 1]>, 567 // 568 // Single-precision FP ALU 569 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 570 InstrStage<1, [A9_MUX0], 0>, 571 InstrStage<1, [A9_DRegsVFP], 0, Required>, 572 InstrStage<5, [A9_DRegsN], 0, Reserved>, 573 InstrStage<1, [A9_NPipe]>], 574 [4, 1, 1]>, 575 // 576 // Double-precision FP ALU 577 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 578 InstrStage<1, [A9_MUX0], 0>, 579 InstrStage<1, [A9_DRegsVFP], 0, Required>, 580 InstrStage<5, [A9_DRegsN], 0, Reserved>, 581 InstrStage<1, [A9_NPipe]>], 582 [4, 1, 1]>, 583 // 584 // Single-precision FP Multiply 585 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 586 InstrStage<1, [A9_MUX0], 0>, 587 InstrStage<1, [A9_DRegsVFP], 0, Required>, 588 InstrStage<6, [A9_DRegsN], 0, Reserved>, 589 InstrStage<1, [A9_NPipe]>], 590 [5, 1, 1]>, 591 // 592 // Double-precision FP Multiply 593 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 594 InstrStage<1, [A9_MUX0], 0>, 595 InstrStage<1, [A9_DRegsVFP], 0, Required>, 596 InstrStage<7, [A9_DRegsN], 0, Reserved>, 597 InstrStage<2, [A9_NPipe]>], 598 [6, 1, 1]>, 599 // 600 // Single-precision FP MAC 601 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 602 InstrStage<1, [A9_MUX0], 0>, 603 InstrStage<1, [A9_DRegsVFP], 0, Required>, 604 InstrStage<9, [A9_DRegsN], 0, Reserved>, 605 InstrStage<1, [A9_NPipe]>], 606 [8, 1, 1, 1]>, 607 // 608 // Double-precision FP MAC 609 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 610 InstrStage<1, [A9_MUX0], 0>, 611 InstrStage<1, [A9_DRegsVFP], 0, Required>, 612 InstrStage<10, [A9_DRegsN], 0, Reserved>, 613 InstrStage<2, [A9_NPipe]>], 614 [9, 1, 1, 1]>, 615 // 616 // Single-precision Fused FP MAC 617 InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 618 InstrStage<1, [A9_MUX0], 0>, 619 InstrStage<1, [A9_DRegsVFP], 0, Required>, 620 InstrStage<9, [A9_DRegsN], 0, Reserved>, 621 InstrStage<1, [A9_NPipe]>], 622 [8, 1, 1, 1]>, 623 // 624 // Double-precision Fused FP MAC 625 InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 626 InstrStage<1, [A9_MUX0], 0>, 627 InstrStage<1, [A9_DRegsVFP], 0, Required>, 628 InstrStage<10, [A9_DRegsN], 0, Reserved>, 629 InstrStage<2, [A9_NPipe]>], 630 [9, 1, 1, 1]>, 631 // 632 // Single-precision FP DIV 633 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 634 InstrStage<1, [A9_MUX0], 0>, 635 InstrStage<1, [A9_DRegsVFP], 0, Required>, 636 InstrStage<16, [A9_DRegsN], 0, Reserved>, 637 InstrStage<10, [A9_NPipe]>], 638 [15, 1, 1]>, 639 // 640 // Double-precision FP DIV 641 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 642 InstrStage<1, [A9_MUX0], 0>, 643 InstrStage<1, [A9_DRegsVFP], 0, Required>, 644 InstrStage<26, [A9_DRegsN], 0, Reserved>, 645 InstrStage<20, [A9_NPipe]>], 646 [25, 1, 1]>, 647 // 648 // Single-precision FP SQRT 649 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 650 InstrStage<1, [A9_MUX0], 0>, 651 InstrStage<1, [A9_DRegsVFP], 0, Required>, 652 InstrStage<18, [A9_DRegsN], 0, Reserved>, 653 InstrStage<13, [A9_NPipe]>], 654 [17, 1]>, 655 // 656 // Double-precision FP SQRT 657 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 658 InstrStage<1, [A9_MUX0], 0>, 659 InstrStage<1, [A9_DRegsVFP], 0, Required>, 660 InstrStage<33, [A9_DRegsN], 0, Reserved>, 661 InstrStage<28, [A9_NPipe]>], 662 [32, 1]>, 663 664 // 665 // Integer to Single-precision Move 666 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 667 InstrStage<1, [A9_MUX0], 0>, 668 InstrStage<1, [A9_DRegsVFP], 0, Required>, 669 // Extra 1 latency cycle since wbck is 2 cycles 670 InstrStage<3, [A9_DRegsN], 0, Reserved>, 671 InstrStage<1, [A9_NPipe]>], 672 [1, 1]>, 673 // 674 // Integer to Double-precision Move 675 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 676 InstrStage<1, [A9_MUX0], 0>, 677 InstrStage<1, [A9_DRegsVFP], 0, Required>, 678 // Extra 1 latency cycle since wbck is 2 cycles 679 InstrStage<3, [A9_DRegsN], 0, Reserved>, 680 InstrStage<1, [A9_NPipe]>], 681 [1, 1, 1]>, 682 // 683 // Single-precision to Integer Move 684 // 685 // On A9 move-from-VFP is free to issue with no stall if other VFP 686 // operations are in flight. I assume it still can't dual-issue though. 687 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 688 InstrStage<1, [A9_MUX0], 0>], 689 [2, 1]>, 690 // 691 // Double-precision to Integer Move 692 // 693 // On A9 move-from-VFP is free to issue with no stall if other VFP 694 // operations are in flight. I assume it still can't dual-issue though. 695 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 696 InstrStage<1, [A9_MUX0], 0>], 697 [2, 1, 1]>, 698 // 699 // Single-precision FP Load 700 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 701 InstrStage<1, [A9_MUX0], 0>, 702 InstrStage<1, [A9_DRegsVFP], 0, Required>, 703 InstrStage<2, [A9_DRegsN], 0, Reserved>, 704 InstrStage<1, [A9_NPipe], 0>, 705 InstrStage<1, [A9_LSUnit]>], 706 [1, 1]>, 707 // 708 // Double-precision FP Load 709 // FIXME: Result latency is 1 if address is 64-bit aligned. 710 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 711 InstrStage<1, [A9_MUX0], 0>, 712 InstrStage<1, [A9_DRegsVFP], 0, Required>, 713 InstrStage<2, [A9_DRegsN], 0, Reserved>, 714 InstrStage<1, [A9_NPipe], 0>, 715 InstrStage<1, [A9_LSUnit]>], 716 [2, 1]>, 717 // 718 // FP Load Multiple 719 // FIXME: assumes 2 doubles which requires 2 LS cycles. 720 InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 721 InstrStage<1, [A9_MUX0], 0>, 722 InstrStage<1, [A9_DRegsVFP], 0, Required>, 723 InstrStage<2, [A9_DRegsN], 0, Reserved>, 724 InstrStage<1, [A9_NPipe], 0>, 725 InstrStage<2, [A9_LSUnit]>], 726 [1, 1, 1, 1], [], -1>, // dynamic uops 727 // 728 // FP Load Multiple + update 729 // FIXME: assumes 2 doubles which requires 2 LS cycles. 730 InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 731 InstrStage<1, [A9_MUX0], 0>, 732 InstrStage<1, [A9_DRegsVFP], 0, Required>, 733 InstrStage<2, [A9_DRegsN], 0, Reserved>, 734 InstrStage<1, [A9_NPipe], 0>, 735 InstrStage<2, [A9_LSUnit]>], 736 [2, 1, 1, 1], [], -1>, // dynamic uops 737 // 738 // Single-precision FP Store 739 InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 740 InstrStage<1, [A9_MUX0], 0>, 741 InstrStage<1, [A9_DRegsVFP], 0, Required>, 742 InstrStage<2, [A9_DRegsN], 0, Reserved>, 743 InstrStage<1, [A9_NPipe], 0>, 744 InstrStage<1, [A9_LSUnit]>], 745 [1, 1]>, 746 // 747 // Double-precision FP Store 748 InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 749 InstrStage<1, [A9_MUX0], 0>, 750 InstrStage<1, [A9_DRegsVFP], 0, Required>, 751 InstrStage<2, [A9_DRegsN], 0, Reserved>, 752 InstrStage<1, [A9_NPipe], 0>, 753 InstrStage<1, [A9_LSUnit]>], 754 [1, 1]>, 755 // 756 // FP Store Multiple 757 // FIXME: assumes 2 doubles which requires 2 LS cycles. 758 InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 759 InstrStage<1, [A9_MUX0], 0>, 760 InstrStage<1, [A9_DRegsVFP], 0, Required>, 761 InstrStage<2, [A9_DRegsN], 0, Reserved>, 762 InstrStage<1, [A9_NPipe], 0>, 763 InstrStage<2, [A9_LSUnit]>], 764 [1, 1, 1, 1], [], -1>, // dynamic uops 765 // 766 // FP Store Multiple + update 767 // FIXME: assumes 2 doubles which requires 2 LS cycles. 768 InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 769 InstrStage<1, [A9_MUX0], 0>, 770 InstrStage<1, [A9_DRegsVFP], 0, Required>, 771 InstrStage<2, [A9_DRegsN], 0, Reserved>, 772 InstrStage<1, [A9_NPipe], 0>, 773 InstrStage<2, [A9_LSUnit]>], 774 [2, 1, 1, 1], [], -1>, // dynamic uops 775 // NEON 776 // VLD1 777 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 778 InstrStage<1, [A9_MUX0], 0>, 779 InstrStage<1, [A9_DRegsN], 0, Required>, 780 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 781 InstrStage<1, [A9_NPipe], 0>, 782 InstrStage<1, [A9_LSUnit]>], 783 [1, 1]>, 784 // VLD1x2 785 InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 786 InstrStage<1, [A9_MUX0], 0>, 787 InstrStage<1, [A9_DRegsN], 0, Required>, 788 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 789 InstrStage<1, [A9_NPipe], 0>, 790 InstrStage<1, [A9_LSUnit]>], 791 [1, 1, 1]>, 792 // VLD1x3 793 InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 794 InstrStage<1, [A9_MUX0], 0>, 795 InstrStage<1, [A9_DRegsN], 0, Required>, 796 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 797 InstrStage<2, [A9_NPipe], 0>, 798 InstrStage<2, [A9_LSUnit]>], 799 [1, 1, 2, 1]>, 800 // VLD1x4 801 InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 802 InstrStage<1, [A9_MUX0], 0>, 803 InstrStage<1, [A9_DRegsN], 0, Required>, 804 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 805 InstrStage<2, [A9_NPipe], 0>, 806 InstrStage<2, [A9_LSUnit]>], 807 [1, 1, 2, 2, 1]>, 808 // VLD1u 809 InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 810 InstrStage<1, [A9_MUX0], 0>, 811 InstrStage<1, [A9_DRegsN], 0, Required>, 812 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 813 InstrStage<1, [A9_NPipe], 0>, 814 InstrStage<1, [A9_LSUnit]>], 815 [1, 2, 1]>, 816 // VLD1x2u 817 InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 818 InstrStage<1, [A9_MUX0], 0>, 819 InstrStage<1, [A9_DRegsN], 0, Required>, 820 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 821 InstrStage<1, [A9_NPipe], 0>, 822 InstrStage<1, [A9_LSUnit]>], 823 [1, 1, 2, 1]>, 824 // VLD1x3u 825 InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 826 InstrStage<1, [A9_MUX0], 0>, 827 InstrStage<1, [A9_DRegsN], 0, Required>, 828 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 829 InstrStage<2, [A9_NPipe], 0>, 830 InstrStage<2, [A9_LSUnit]>], 831 [1, 1, 2, 2, 1]>, 832 // VLD1x4u 833 InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 834 InstrStage<1, [A9_MUX0], 0>, 835 InstrStage<1, [A9_DRegsN], 0, Required>, 836 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 837 InstrStage<2, [A9_NPipe], 0>, 838 InstrStage<2, [A9_LSUnit]>], 839 [1, 1, 2, 2, 2, 1]>, 840 // 841 // VLD1ln 842 InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 843 InstrStage<1, [A9_MUX0], 0>, 844 InstrStage<1, [A9_DRegsN], 0, Required>, 845 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 846 InstrStage<2, [A9_NPipe], 0>, 847 InstrStage<2, [A9_LSUnit]>], 848 [3, 1, 1, 1]>, 849 // 850 // VLD1lnu 851 InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 852 InstrStage<1, [A9_MUX0], 0>, 853 InstrStage<1, [A9_DRegsN], 0, Required>, 854 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 855 InstrStage<2, [A9_NPipe], 0>, 856 InstrStage<2, [A9_LSUnit]>], 857 [3, 2, 1, 1, 1, 1]>, 858 // 859 // VLD1dup 860 InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 861 InstrStage<1, [A9_MUX0], 0>, 862 InstrStage<1, [A9_DRegsN], 0, Required>, 863 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 864 InstrStage<1, [A9_NPipe], 0>, 865 InstrStage<1, [A9_LSUnit]>], 866 [2, 1]>, 867 // 868 // VLD1dupu 869 InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 870 InstrStage<1, [A9_MUX0], 0>, 871 InstrStage<1, [A9_DRegsN], 0, Required>, 872 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 873 InstrStage<1, [A9_NPipe], 0>, 874 InstrStage<1, [A9_LSUnit]>], 875 [2, 2, 1, 1]>, 876 // 877 // VLD2 878 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 879 InstrStage<1, [A9_MUX0], 0>, 880 InstrStage<1, [A9_DRegsN], 0, Required>, 881 // Extra latency cycles since wbck is 7 cycles 882 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 883 InstrStage<1, [A9_NPipe], 0>, 884 InstrStage<1, [A9_LSUnit]>], 885 [2, 2, 1]>, 886 // 887 // VLD2x2 888 InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 889 InstrStage<1, [A9_MUX0], 0>, 890 InstrStage<1, [A9_DRegsN], 0, Required>, 891 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 892 InstrStage<2, [A9_NPipe], 0>, 893 InstrStage<2, [A9_LSUnit]>], 894 [2, 3, 2, 3, 1]>, 895 // 896 // VLD2ln 897 InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 898 InstrStage<1, [A9_MUX0], 0>, 899 InstrStage<1, [A9_DRegsN], 0, Required>, 900 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 901 InstrStage<2, [A9_NPipe], 0>, 902 InstrStage<2, [A9_LSUnit]>], 903 [3, 3, 1, 1, 1, 1]>, 904 // 905 // VLD2u 906 InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 907 InstrStage<1, [A9_MUX0], 0>, 908 InstrStage<1, [A9_DRegsN], 0, Required>, 909 // Extra latency cycles since wbck is 7 cycles 910 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 911 InstrStage<1, [A9_NPipe], 0>, 912 InstrStage<1, [A9_LSUnit]>], 913 [2, 2, 2, 1, 1, 1]>, 914 // 915 // VLD2x2u 916 InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 917 InstrStage<1, [A9_MUX0], 0>, 918 InstrStage<1, [A9_DRegsN], 0, Required>, 919 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 920 InstrStage<2, [A9_NPipe], 0>, 921 InstrStage<2, [A9_LSUnit]>], 922 [2, 3, 2, 3, 2, 1]>, 923 // 924 // VLD2lnu 925 InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 926 InstrStage<1, [A9_MUX0], 0>, 927 InstrStage<1, [A9_DRegsN], 0, Required>, 928 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 929 InstrStage<2, [A9_NPipe], 0>, 930 InstrStage<2, [A9_LSUnit]>], 931 [3, 3, 2, 1, 1, 1, 1, 1]>, 932 // 933 // VLD2dup 934 InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 935 InstrStage<1, [A9_MUX0], 0>, 936 InstrStage<1, [A9_DRegsN], 0, Required>, 937 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 938 InstrStage<1, [A9_NPipe], 0>, 939 InstrStage<1, [A9_LSUnit]>], 940 [2, 2, 1]>, 941 // 942 // VLD2dupu 943 InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 944 InstrStage<1, [A9_MUX0], 0>, 945 InstrStage<1, [A9_DRegsN], 0, Required>, 946 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 947 InstrStage<1, [A9_NPipe], 0>, 948 InstrStage<1, [A9_LSUnit]>], 949 [2, 2, 2, 1, 1]>, 950 // 951 // VLD3 952 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 953 InstrStage<1, [A9_MUX0], 0>, 954 InstrStage<1, [A9_DRegsN], 0, Required>, 955 InstrStage<9,[A9_DRegsVFP], 0, Reserved>, 956 InstrStage<3, [A9_NPipe], 0>, 957 InstrStage<3, [A9_LSUnit]>], 958 [3, 3, 4, 1]>, 959 // 960 // VLD3ln 961 InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 962 InstrStage<1, [A9_MUX0], 0>, 963 InstrStage<1, [A9_DRegsN], 0, Required>, 964 InstrStage<11,[A9_DRegsVFP], 0, Reserved>, 965 InstrStage<5, [A9_NPipe], 0>, 966 InstrStage<5, [A9_LSUnit]>], 967 [5, 5, 6, 1, 1, 1, 1, 2]>, 968 // 969 // VLD3u 970 InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 971 InstrStage<1, [A9_MUX0], 0>, 972 InstrStage<1, [A9_DRegsN], 0, Required>, 973 InstrStage<9,[A9_DRegsVFP], 0, Reserved>, 974 InstrStage<3, [A9_NPipe], 0>, 975 InstrStage<3, [A9_LSUnit]>], 976 [3, 3, 4, 2, 1]>, 977 // 978 // VLD3lnu 979 InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 980 InstrStage<1, [A9_MUX0], 0>, 981 InstrStage<1, [A9_DRegsN], 0, Required>, 982 InstrStage<11,[A9_DRegsVFP], 0, Reserved>, 983 InstrStage<5, [A9_NPipe], 0>, 984 InstrStage<5, [A9_LSUnit]>], 985 [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>, 986 // 987 // VLD3dup 988 InstrItinData<IIC_VLD3dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 989 InstrStage<1, [A9_MUX0], 0>, 990 InstrStage<1, [A9_DRegsN], 0, Required>, 991 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 992 InstrStage<3, [A9_NPipe], 0>, 993 InstrStage<3, [A9_LSUnit]>], 994 [3, 3, 4, 1]>, 995 // 996 // VLD3dupu 997 InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 998 InstrStage<1, [A9_MUX0], 0>, 999 InstrStage<1, [A9_DRegsN], 0, Required>, 1000 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1001 InstrStage<3, [A9_NPipe], 0>, 1002 InstrStage<3, [A9_LSUnit]>], 1003 [3, 3, 4, 2, 1, 1]>, 1004 // 1005 // VLD4 1006 InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1007 InstrStage<1, [A9_MUX0], 0>, 1008 InstrStage<1, [A9_DRegsN], 0, Required>, 1009 InstrStage<9,[A9_DRegsVFP], 0, Reserved>, 1010 InstrStage<3, [A9_NPipe], 0>, 1011 InstrStage<3, [A9_LSUnit]>], 1012 [3, 3, 4, 4, 1]>, 1013 // 1014 // VLD4ln 1015 InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1016 InstrStage<1, [A9_MUX0], 0>, 1017 InstrStage<1, [A9_DRegsN], 0, Required>, 1018 InstrStage<10,[A9_DRegsVFP], 0, Reserved>, 1019 InstrStage<4, [A9_NPipe], 0>, 1020 InstrStage<4, [A9_LSUnit]>], 1021 [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>, 1022 // 1023 // VLD4u 1024 InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1025 InstrStage<1, [A9_MUX0], 0>, 1026 InstrStage<1, [A9_DRegsN], 0, Required>, 1027 InstrStage<9,[A9_DRegsVFP], 0, Reserved>, 1028 InstrStage<3, [A9_NPipe], 0>, 1029 InstrStage<3, [A9_LSUnit]>], 1030 [3, 3, 4, 4, 2, 1]>, 1031 // 1032 // VLD4lnu 1033 InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1034 InstrStage<1, [A9_MUX0], 0>, 1035 InstrStage<1, [A9_DRegsN], 0, Required>, 1036 InstrStage<10,[A9_DRegsVFP], 0, Reserved>, 1037 InstrStage<4, [A9_NPipe], 0>, 1038 InstrStage<4, [A9_LSUnit]>], 1039 [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>, 1040 // 1041 // VLD4dup 1042 InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1043 InstrStage<1, [A9_MUX0], 0>, 1044 InstrStage<1, [A9_DRegsN], 0, Required>, 1045 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1046 InstrStage<2, [A9_NPipe], 0>, 1047 InstrStage<2, [A9_LSUnit]>], 1048 [2, 2, 3, 3, 1]>, 1049 // 1050 // VLD4dupu 1051 InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1052 InstrStage<1, [A9_MUX0], 0>, 1053 InstrStage<1, [A9_DRegsN], 0, Required>, 1054 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1055 InstrStage<2, [A9_NPipe], 0>, 1056 InstrStage<2, [A9_LSUnit]>], 1057 [2, 2, 3, 3, 2, 1, 1]>, 1058 // 1059 // VST1 1060 InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1061 InstrStage<1, [A9_MUX0], 0>, 1062 InstrStage<1, [A9_DRegsN], 0, Required>, 1063 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1064 InstrStage<1, [A9_NPipe], 0>, 1065 InstrStage<1, [A9_LSUnit]>], 1066 [1, 1, 1]>, 1067 // 1068 // VST1x2 1069 InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1070 InstrStage<1, [A9_MUX0], 0>, 1071 InstrStage<1, [A9_DRegsN], 0, Required>, 1072 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1073 InstrStage<1, [A9_NPipe], 0>, 1074 InstrStage<1, [A9_LSUnit]>], 1075 [1, 1, 1, 1]>, 1076 // 1077 // VST1x3 1078 InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1079 InstrStage<1, [A9_MUX0], 0>, 1080 InstrStage<1, [A9_DRegsN], 0, Required>, 1081 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1082 InstrStage<2, [A9_NPipe], 0>, 1083 InstrStage<2, [A9_LSUnit]>], 1084 [1, 1, 1, 1, 2]>, 1085 // 1086 // VST1x4 1087 InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1088 InstrStage<1, [A9_MUX0], 0>, 1089 InstrStage<1, [A9_DRegsN], 0, Required>, 1090 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1091 InstrStage<2, [A9_NPipe], 0>, 1092 InstrStage<2, [A9_LSUnit]>], 1093 [1, 1, 1, 1, 2, 2]>, 1094 // 1095 // VST1u 1096 InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1097 InstrStage<1, [A9_MUX0], 0>, 1098 InstrStage<1, [A9_DRegsN], 0, Required>, 1099 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1100 InstrStage<1, [A9_NPipe], 0>, 1101 InstrStage<1, [A9_LSUnit]>], 1102 [2, 1, 1, 1, 1]>, 1103 // 1104 // VST1x2u 1105 InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1106 InstrStage<1, [A9_MUX0], 0>, 1107 InstrStage<1, [A9_DRegsN], 0, Required>, 1108 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1109 InstrStage<1, [A9_NPipe], 0>, 1110 InstrStage<1, [A9_LSUnit]>], 1111 [2, 1, 1, 1, 1, 1]>, 1112 // 1113 // VST1x3u 1114 InstrItinData<IIC_VST1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1115 InstrStage<1, [A9_MUX0], 0>, 1116 InstrStage<1, [A9_DRegsN], 0, Required>, 1117 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1118 InstrStage<2, [A9_NPipe], 0>, 1119 InstrStage<2, [A9_LSUnit]>], 1120 [2, 1, 1, 1, 1, 1, 2]>, 1121 // 1122 // VST1x4u 1123 InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1124 InstrStage<1, [A9_MUX0], 0>, 1125 InstrStage<1, [A9_DRegsN], 0, Required>, 1126 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1127 InstrStage<2, [A9_NPipe], 0>, 1128 InstrStage<2, [A9_LSUnit]>], 1129 [2, 1, 1, 1, 1, 1, 2, 2]>, 1130 // 1131 // VST1ln 1132 InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1133 InstrStage<1, [A9_MUX0], 0>, 1134 InstrStage<1, [A9_DRegsN], 0, Required>, 1135 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1136 InstrStage<1, [A9_NPipe], 0>, 1137 InstrStage<1, [A9_LSUnit]>], 1138 [1, 1, 1]>, 1139 // 1140 // VST1lnu 1141 InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1142 InstrStage<1, [A9_MUX0], 0>, 1143 InstrStage<1, [A9_DRegsN], 0, Required>, 1144 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1145 InstrStage<1, [A9_NPipe], 0>, 1146 InstrStage<1, [A9_LSUnit]>], 1147 [2, 1, 1, 1, 1]>, 1148 // 1149 // VST2 1150 InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1151 InstrStage<1, [A9_MUX0], 0>, 1152 InstrStage<1, [A9_DRegsN], 0, Required>, 1153 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1154 InstrStage<1, [A9_NPipe], 0>, 1155 InstrStage<1, [A9_LSUnit]>], 1156 [1, 1, 1, 1]>, 1157 // 1158 // VST2x2 1159 InstrItinData<IIC_VST2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1160 InstrStage<1, [A9_MUX0], 0>, 1161 InstrStage<1, [A9_DRegsN], 0, Required>, 1162 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1163 InstrStage<3, [A9_NPipe], 0>, 1164 InstrStage<3, [A9_LSUnit]>], 1165 [1, 1, 1, 1, 2, 2]>, 1166 // 1167 // VST2u 1168 InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1169 InstrStage<1, [A9_MUX0], 0>, 1170 InstrStage<1, [A9_DRegsN], 0, Required>, 1171 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1172 InstrStage<1, [A9_NPipe], 0>, 1173 InstrStage<1, [A9_LSUnit]>], 1174 [2, 1, 1, 1, 1, 1]>, 1175 // 1176 // VST2x2u 1177 InstrItinData<IIC_VST2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1178 InstrStage<1, [A9_MUX0], 0>, 1179 InstrStage<1, [A9_DRegsN], 0, Required>, 1180 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1181 InstrStage<3, [A9_NPipe], 0>, 1182 InstrStage<3, [A9_LSUnit]>], 1183 [2, 1, 1, 1, 1, 1, 2, 2]>, 1184 // 1185 // VST2ln 1186 InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1187 InstrStage<1, [A9_MUX0], 0>, 1188 InstrStage<1, [A9_DRegsN], 0, Required>, 1189 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1190 InstrStage<1, [A9_NPipe], 0>, 1191 InstrStage<1, [A9_LSUnit]>], 1192 [1, 1, 1, 1]>, 1193 // 1194 // VST2lnu 1195 InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1196 InstrStage<1, [A9_MUX0], 0>, 1197 InstrStage<1, [A9_DRegsN], 0, Required>, 1198 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1199 InstrStage<1, [A9_NPipe], 0>, 1200 InstrStage<1, [A9_LSUnit]>], 1201 [2, 1, 1, 1, 1, 1]>, 1202 // 1203 // VST3 1204 InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1205 InstrStage<1, [A9_MUX0], 0>, 1206 InstrStage<1, [A9_DRegsN], 0, Required>, 1207 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1208 InstrStage<2, [A9_NPipe], 0>, 1209 InstrStage<2, [A9_LSUnit]>], 1210 [1, 1, 1, 1, 2]>, 1211 // 1212 // VST3u 1213 InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1214 InstrStage<1, [A9_MUX0], 0>, 1215 InstrStage<1, [A9_DRegsN], 0, Required>, 1216 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1217 InstrStage<2, [A9_NPipe], 0>, 1218 InstrStage<2, [A9_LSUnit]>], 1219 [2, 1, 1, 1, 1, 1, 2]>, 1220 // 1221 // VST3ln 1222 InstrItinData<IIC_VST3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1223 InstrStage<1, [A9_MUX0], 0>, 1224 InstrStage<1, [A9_DRegsN], 0, Required>, 1225 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1226 InstrStage<3, [A9_NPipe], 0>, 1227 InstrStage<3, [A9_LSUnit]>], 1228 [1, 1, 1, 1, 2]>, 1229 // 1230 // VST3lnu 1231 InstrItinData<IIC_VST3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1232 InstrStage<1, [A9_MUX0], 0>, 1233 InstrStage<1, [A9_DRegsN], 0, Required>, 1234 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1235 InstrStage<3, [A9_NPipe], 0>, 1236 InstrStage<3, [A9_LSUnit]>], 1237 [2, 1, 1, 1, 1, 1, 2]>, 1238 // 1239 // VST4 1240 InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1241 InstrStage<1, [A9_MUX0], 0>, 1242 InstrStage<1, [A9_DRegsN], 0, Required>, 1243 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1244 InstrStage<2, [A9_NPipe], 0>, 1245 InstrStage<2, [A9_LSUnit]>], 1246 [1, 1, 1, 1, 2, 2]>, 1247 // 1248 // VST4u 1249 InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1250 InstrStage<1, [A9_MUX0], 0>, 1251 InstrStage<1, [A9_DRegsN], 0, Required>, 1252 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1253 InstrStage<2, [A9_NPipe], 0>, 1254 InstrStage<2, [A9_LSUnit]>], 1255 [2, 1, 1, 1, 1, 1, 2, 2]>, 1256 // 1257 // VST4ln 1258 InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1259 InstrStage<1, [A9_MUX0], 0>, 1260 InstrStage<1, [A9_DRegsN], 0, Required>, 1261 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1262 InstrStage<2, [A9_NPipe], 0>, 1263 InstrStage<2, [A9_LSUnit]>], 1264 [1, 1, 1, 1, 2, 2]>, 1265 // 1266 // VST4lnu 1267 InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1268 InstrStage<1, [A9_MUX0], 0>, 1269 InstrStage<1, [A9_DRegsN], 0, Required>, 1270 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1271 InstrStage<2, [A9_NPipe], 0>, 1272 InstrStage<2, [A9_LSUnit]>], 1273 [2, 1, 1, 1, 1, 1, 2, 2]>, 1274 1275 // 1276 // Double-register Integer Unary 1277 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1278 InstrStage<1, [A9_MUX0], 0>, 1279 InstrStage<1, [A9_DRegsN], 0, Required>, 1280 // Extra latency cycles since wbck is 6 cycles 1281 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1282 InstrStage<1, [A9_NPipe]>], 1283 [4, 2]>, 1284 // 1285 // Quad-register Integer Unary 1286 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1287 InstrStage<1, [A9_MUX0], 0>, 1288 InstrStage<1, [A9_DRegsN], 0, Required>, 1289 // Extra latency cycles since wbck is 6 cycles 1290 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1291 InstrStage<1, [A9_NPipe]>], 1292 [4, 2]>, 1293 // 1294 // Double-register Integer Q-Unary 1295 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1296 InstrStage<1, [A9_MUX0], 0>, 1297 InstrStage<1, [A9_DRegsN], 0, Required>, 1298 // Extra latency cycles since wbck is 6 cycles 1299 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1300 InstrStage<1, [A9_NPipe]>], 1301 [4, 1]>, 1302 // 1303 // Quad-register Integer CountQ-Unary 1304 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1305 InstrStage<1, [A9_MUX0], 0>, 1306 InstrStage<1, [A9_DRegsN], 0, Required>, 1307 // Extra latency cycles since wbck is 6 cycles 1308 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1309 InstrStage<1, [A9_NPipe]>], 1310 [4, 1]>, 1311 // 1312 // Double-register Integer Binary 1313 InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1314 InstrStage<1, [A9_MUX0], 0>, 1315 InstrStage<1, [A9_DRegsN], 0, Required>, 1316 // Extra latency cycles since wbck is 6 cycles 1317 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1318 InstrStage<1, [A9_NPipe]>], 1319 [3, 2, 2]>, 1320 // 1321 // Quad-register Integer Binary 1322 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1323 InstrStage<1, [A9_MUX0], 0>, 1324 InstrStage<1, [A9_DRegsN], 0, Required>, 1325 // Extra latency cycles since wbck is 6 cycles 1326 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1327 InstrStage<1, [A9_NPipe]>], 1328 [3, 2, 2]>, 1329 // 1330 // Double-register Integer Subtract 1331 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1332 InstrStage<1, [A9_MUX0], 0>, 1333 InstrStage<1, [A9_DRegsN], 0, Required>, 1334 // Extra latency cycles since wbck is 6 cycles 1335 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1336 InstrStage<1, [A9_NPipe]>], 1337 [3, 2, 1]>, 1338 // 1339 // Quad-register Integer Subtract 1340 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1341 InstrStage<1, [A9_MUX0], 0>, 1342 InstrStage<1, [A9_DRegsN], 0, Required>, 1343 // Extra latency cycles since wbck is 6 cycles 1344 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1345 InstrStage<1, [A9_NPipe]>], 1346 [3, 2, 1]>, 1347 // 1348 // Double-register Integer Shift 1349 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1350 InstrStage<1, [A9_MUX0], 0>, 1351 InstrStage<1, [A9_DRegsN], 0, Required>, 1352 // Extra latency cycles since wbck is 6 cycles 1353 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1354 InstrStage<1, [A9_NPipe]>], 1355 [3, 1, 1]>, 1356 // 1357 // Quad-register Integer Shift 1358 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1359 InstrStage<1, [A9_MUX0], 0>, 1360 InstrStage<1, [A9_DRegsN], 0, Required>, 1361 // Extra latency cycles since wbck is 6 cycles 1362 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1363 InstrStage<1, [A9_NPipe]>], 1364 [3, 1, 1]>, 1365 // 1366 // Double-register Integer Shift (4 cycle) 1367 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1368 InstrStage<1, [A9_MUX0], 0>, 1369 InstrStage<1, [A9_DRegsN], 0, Required>, 1370 // Extra latency cycles since wbck is 6 cycles 1371 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1372 InstrStage<1, [A9_NPipe]>], 1373 [4, 1, 1]>, 1374 // 1375 // Quad-register Integer Shift (4 cycle) 1376 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1377 InstrStage<1, [A9_MUX0], 0>, 1378 InstrStage<1, [A9_DRegsN], 0, Required>, 1379 // Extra latency cycles since wbck is 6 cycles 1380 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1381 InstrStage<1, [A9_NPipe]>], 1382 [4, 1, 1]>, 1383 // 1384 // Double-register Integer Binary (4 cycle) 1385 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1386 InstrStage<1, [A9_MUX0], 0>, 1387 InstrStage<1, [A9_DRegsN], 0, Required>, 1388 // Extra latency cycles since wbck is 6 cycles 1389 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1390 InstrStage<1, [A9_NPipe]>], 1391 [4, 2, 2]>, 1392 // 1393 // Quad-register Integer Binary (4 cycle) 1394 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1395 InstrStage<1, [A9_MUX0], 0>, 1396 InstrStage<1, [A9_DRegsN], 0, Required>, 1397 // Extra latency cycles since wbck is 6 cycles 1398 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1399 InstrStage<1, [A9_NPipe]>], 1400 [4, 2, 2]>, 1401 // 1402 // Double-register Integer Subtract (4 cycle) 1403 InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1404 InstrStage<1, [A9_MUX0], 0>, 1405 InstrStage<1, [A9_DRegsN], 0, Required>, 1406 // Extra latency cycles since wbck is 6 cycles 1407 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1408 InstrStage<1, [A9_NPipe]>], 1409 [4, 2, 1]>, 1410 // 1411 // Quad-register Integer Subtract (4 cycle) 1412 InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1413 InstrStage<1, [A9_MUX0], 0>, 1414 InstrStage<1, [A9_DRegsN], 0, Required>, 1415 // Extra latency cycles since wbck is 6 cycles 1416 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1417 InstrStage<1, [A9_NPipe]>], 1418 [4, 2, 1]>, 1419 1420 // 1421 // Double-register Integer Count 1422 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1423 InstrStage<1, [A9_MUX0], 0>, 1424 InstrStage<1, [A9_DRegsN], 0, Required>, 1425 // Extra latency cycles since wbck is 6 cycles 1426 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1427 InstrStage<1, [A9_NPipe]>], 1428 [3, 2, 2]>, 1429 // 1430 // Quad-register Integer Count 1431 // Result written in N3, but that is relative to the last cycle of multicycle, 1432 // so we use 4 for those cases 1433 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1434 InstrStage<1, [A9_MUX0], 0>, 1435 InstrStage<1, [A9_DRegsN], 0, Required>, 1436 // Extra latency cycles since wbck is 7 cycles 1437 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1438 InstrStage<2, [A9_NPipe]>], 1439 [4, 2, 2]>, 1440 // 1441 // Double-register Absolute Difference and Accumulate 1442 InstrItinData<IIC_VABAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1443 InstrStage<1, [A9_MUX0], 0>, 1444 InstrStage<1, [A9_DRegsN], 0, Required>, 1445 // Extra latency cycles since wbck is 6 cycles 1446 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1447 InstrStage<1, [A9_NPipe]>], 1448 [6, 3, 2, 1]>, 1449 // 1450 // Quad-register Absolute Difference and Accumulate 1451 InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1452 InstrStage<1, [A9_MUX0], 0>, 1453 InstrStage<1, [A9_DRegsN], 0, Required>, 1454 // Extra latency cycles since wbck is 6 cycles 1455 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1456 InstrStage<2, [A9_NPipe]>], 1457 [6, 3, 2, 1]>, 1458 // 1459 // Double-register Integer Pair Add Long 1460 InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1461 InstrStage<1, [A9_MUX0], 0>, 1462 InstrStage<1, [A9_DRegsN], 0, Required>, 1463 // Extra latency cycles since wbck is 6 cycles 1464 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1465 InstrStage<1, [A9_NPipe]>], 1466 [6, 3, 1]>, 1467 // 1468 // Quad-register Integer Pair Add Long 1469 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1470 InstrStage<1, [A9_MUX0], 0>, 1471 InstrStage<1, [A9_DRegsN], 0, Required>, 1472 // Extra latency cycles since wbck is 6 cycles 1473 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1474 InstrStage<2, [A9_NPipe]>], 1475 [6, 3, 1]>, 1476 1477 // 1478 // Double-register Integer Multiply (.8, .16) 1479 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1480 InstrStage<1, [A9_MUX0], 0>, 1481 InstrStage<1, [A9_DRegsN], 0, Required>, 1482 // Extra latency cycles since wbck is 6 cycles 1483 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1484 InstrStage<1, [A9_NPipe]>], 1485 [6, 2, 2]>, 1486 // 1487 // Quad-register Integer Multiply (.8, .16) 1488 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1489 InstrStage<1, [A9_MUX0], 0>, 1490 InstrStage<1, [A9_DRegsN], 0, Required>, 1491 // Extra latency cycles since wbck is 7 cycles 1492 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1493 InstrStage<2, [A9_NPipe]>], 1494 [7, 2, 2]>, 1495 1496 // 1497 // Double-register Integer Multiply (.32) 1498 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1499 InstrStage<1, [A9_MUX0], 0>, 1500 InstrStage<1, [A9_DRegsN], 0, Required>, 1501 // Extra latency cycles since wbck is 7 cycles 1502 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1503 InstrStage<2, [A9_NPipe]>], 1504 [7, 2, 1]>, 1505 // 1506 // Quad-register Integer Multiply (.32) 1507 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1508 InstrStage<1, [A9_MUX0], 0>, 1509 InstrStage<1, [A9_DRegsN], 0, Required>, 1510 // Extra latency cycles since wbck is 9 cycles 1511 InstrStage<10, [A9_DRegsVFP], 0, Reserved>, 1512 InstrStage<4, [A9_NPipe]>], 1513 [9, 2, 1]>, 1514 // 1515 // Double-register Integer Multiply-Accumulate (.8, .16) 1516 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1517 InstrStage<1, [A9_MUX0], 0>, 1518 InstrStage<1, [A9_DRegsN], 0, Required>, 1519 // Extra latency cycles since wbck is 6 cycles 1520 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1521 InstrStage<1, [A9_NPipe]>], 1522 [6, 3, 2, 2]>, 1523 // 1524 // Double-register Integer Multiply-Accumulate (.32) 1525 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1526 InstrStage<1, [A9_MUX0], 0>, 1527 InstrStage<1, [A9_DRegsN], 0, Required>, 1528 // Extra latency cycles since wbck is 7 cycles 1529 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1530 InstrStage<2, [A9_NPipe]>], 1531 [7, 3, 2, 1]>, 1532 // 1533 // Quad-register Integer Multiply-Accumulate (.8, .16) 1534 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1535 InstrStage<1, [A9_MUX0], 0>, 1536 InstrStage<1, [A9_DRegsN], 0, Required>, 1537 // Extra latency cycles since wbck is 7 cycles 1538 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1539 InstrStage<2, [A9_NPipe]>], 1540 [7, 3, 2, 2]>, 1541 // 1542 // Quad-register Integer Multiply-Accumulate (.32) 1543 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1544 InstrStage<1, [A9_MUX0], 0>, 1545 InstrStage<1, [A9_DRegsN], 0, Required>, 1546 // Extra latency cycles since wbck is 9 cycles 1547 InstrStage<10, [A9_DRegsVFP], 0, Reserved>, 1548 InstrStage<4, [A9_NPipe]>], 1549 [9, 3, 2, 1]>, 1550 1551 // 1552 // Move 1553 InstrItinData<IIC_VMOV, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1554 InstrStage<1, [A9_MUX0], 0>, 1555 InstrStage<1, [A9_DRegsN], 0, Required>, 1556 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1557 InstrStage<1, [A9_NPipe]>], 1558 [1,1]>, 1559 // 1560 // Move Immediate 1561 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1562 InstrStage<1, [A9_MUX0], 0>, 1563 InstrStage<1, [A9_DRegsN], 0, Required>, 1564 // Extra latency cycles since wbck is 6 cycles 1565 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1566 InstrStage<1, [A9_NPipe]>], 1567 [3]>, 1568 // 1569 // Double-register Permute Move 1570 InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1571 InstrStage<1, [A9_MUX0], 0>, 1572 InstrStage<1, [A9_DRegsN], 0, Required>, 1573 // Extra latency cycles since wbck is 6 cycles 1574 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1575 InstrStage<1, [A9_NPipe]>], 1576 [2, 1]>, 1577 // 1578 // Quad-register Permute Move 1579 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1580 InstrStage<1, [A9_MUX0], 0>, 1581 InstrStage<1, [A9_DRegsN], 0, Required>, 1582 // Extra latency cycles since wbck is 6 cycles 1583 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1584 InstrStage<1, [A9_NPipe]>], 1585 [2, 1]>, 1586 // 1587 // Integer to Single-precision Move 1588 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1589 InstrStage<1, [A9_MUX0], 0>, 1590 InstrStage<1, [A9_DRegsN], 0, Required>, 1591 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1592 InstrStage<1, [A9_NPipe]>], 1593 [1, 1]>, 1594 // 1595 // Integer to Double-precision Move 1596 InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1597 InstrStage<1, [A9_MUX0], 0>, 1598 InstrStage<1, [A9_DRegsN], 0, Required>, 1599 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1600 InstrStage<1, [A9_NPipe]>], 1601 [1, 1, 1]>, 1602 // 1603 // Single-precision to Integer Move 1604 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1605 InstrStage<1, [A9_MUX0], 0>, 1606 InstrStage<1, [A9_DRegsN], 0, Required>, 1607 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1608 InstrStage<1, [A9_NPipe]>], 1609 [2, 1]>, 1610 // 1611 // Double-precision to Integer Move 1612 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1613 InstrStage<1, [A9_MUX0], 0>, 1614 InstrStage<1, [A9_DRegsN], 0, Required>, 1615 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1616 InstrStage<1, [A9_NPipe]>], 1617 [2, 2, 1]>, 1618 // 1619 // Integer to Lane Move 1620 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1621 InstrStage<1, [A9_MUX0], 0>, 1622 InstrStage<1, [A9_DRegsN], 0, Required>, 1623 InstrStage<4, [A9_DRegsVFP], 0, Reserved>, 1624 InstrStage<2, [A9_NPipe]>], 1625 [3, 1, 1]>, 1626 1627 // 1628 // Vector narrow move 1629 InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1630 InstrStage<1, [A9_MUX0], 0>, 1631 InstrStage<1, [A9_DRegsN], 0, Required>, 1632 // Extra latency cycles since wbck is 6 cycles 1633 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1634 InstrStage<1, [A9_NPipe]>], 1635 [3, 1]>, 1636 // 1637 // Double-register FP Unary 1638 InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1639 InstrStage<1, [A9_MUX0], 0>, 1640 InstrStage<1, [A9_DRegsN], 0, Required>, 1641 // Extra latency cycles since wbck is 6 cycles 1642 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1643 InstrStage<1, [A9_NPipe]>], 1644 [5, 2]>, 1645 // 1646 // Quad-register FP Unary 1647 // Result written in N5, but that is relative to the last cycle of multicycle, 1648 // so we use 6 for those cases 1649 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1650 InstrStage<1, [A9_MUX0], 0>, 1651 InstrStage<1, [A9_DRegsN], 0, Required>, 1652 // Extra latency cycles since wbck is 7 cycles 1653 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1654 InstrStage<2, [A9_NPipe]>], 1655 [6, 2]>, 1656 // 1657 // Double-register FP Binary 1658 // FIXME: We're using this itin for many instructions and [2, 2] here is too 1659 // optimistic. 1660 InstrItinData<IIC_VBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1661 InstrStage<1, [A9_MUX0], 0>, 1662 InstrStage<1, [A9_DRegsN], 0, Required>, 1663 // Extra latency cycles since wbck is 6 cycles 1664 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1665 InstrStage<1, [A9_NPipe]>], 1666 [5, 2, 2]>, 1667 1668 // 1669 // VPADD, etc. 1670 InstrItinData<IIC_VPBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1671 InstrStage<1, [A9_MUX0], 0>, 1672 InstrStage<1, [A9_DRegsN], 0, Required>, 1673 // Extra latency cycles since wbck is 6 cycles 1674 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1675 InstrStage<1, [A9_NPipe]>], 1676 [5, 1, 1]>, 1677 // 1678 // Double-register FP VMUL 1679 InstrItinData<IIC_VFMULD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1680 InstrStage<1, [A9_MUX0], 0>, 1681 InstrStage<1, [A9_DRegsN], 0, Required>, 1682 // Extra latency cycles since wbck is 6 cycles 1683 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1684 InstrStage<1, [A9_NPipe]>], 1685 [5, 2, 1]>, 1686 // 1687 // Quad-register FP Binary 1688 // Result written in N5, but that is relative to the last cycle of multicycle, 1689 // so we use 6 for those cases 1690 // FIXME: We're using this itin for many instructions and [2, 2] here is too 1691 // optimistic. 1692 InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1693 InstrStage<1, [A9_MUX0], 0>, 1694 InstrStage<1, [A9_DRegsN], 0, Required>, 1695 // Extra latency cycles since wbck is 7 cycles 1696 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1697 InstrStage<2, [A9_NPipe]>], 1698 [6, 2, 2]>, 1699 // 1700 // Quad-register FP VMUL 1701 InstrItinData<IIC_VFMULQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1702 InstrStage<1, [A9_MUX0], 0>, 1703 InstrStage<1, [A9_DRegsN], 0, Required>, 1704 // Extra latency cycles since wbck is 7 cycles 1705 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1706 InstrStage<1, [A9_NPipe]>], 1707 [6, 2, 1]>, 1708 // 1709 // Double-register FP Multiple-Accumulate 1710 InstrItinData<IIC_VMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1711 InstrStage<1, [A9_MUX0], 0>, 1712 InstrStage<1, [A9_DRegsN], 0, Required>, 1713 // Extra latency cycles since wbck is 7 cycles 1714 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1715 InstrStage<2, [A9_NPipe]>], 1716 [6, 3, 2, 1]>, 1717 // 1718 // Quad-register FP Multiple-Accumulate 1719 // Result written in N9, but that is relative to the last cycle of multicycle, 1720 // so we use 10 for those cases 1721 InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1722 InstrStage<1, [A9_MUX0], 0>, 1723 InstrStage<1, [A9_DRegsN], 0, Required>, 1724 // Extra latency cycles since wbck is 9 cycles 1725 InstrStage<10, [A9_DRegsVFP], 0, Reserved>, 1726 InstrStage<4, [A9_NPipe]>], 1727 [8, 4, 2, 1]>, 1728 // 1729 // Double-register Fused FP Multiple-Accumulate 1730 InstrItinData<IIC_VFMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1731 InstrStage<1, [A9_MUX0], 0>, 1732 InstrStage<1, [A9_DRegsN], 0, Required>, 1733 // Extra latency cycles since wbck is 7 cycles 1734 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1735 InstrStage<2, [A9_NPipe]>], 1736 [6, 3, 2, 1]>, 1737 // 1738 // Quad-register Fused FP Multiple-Accumulate 1739 // Result written in N9, but that is relative to the last cycle of multicycle, 1740 // so we use 10 for those cases 1741 InstrItinData<IIC_VFMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1742 InstrStage<1, [A9_MUX0], 0>, 1743 InstrStage<1, [A9_DRegsN], 0, Required>, 1744 // Extra latency cycles since wbck is 9 cycles 1745 InstrStage<10, [A9_DRegsVFP], 0, Reserved>, 1746 InstrStage<4, [A9_NPipe]>], 1747 [8, 4, 2, 1]>, 1748 // 1749 // Double-register Reciprical Step 1750 InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1751 InstrStage<1, [A9_MUX0], 0>, 1752 InstrStage<1, [A9_DRegsN], 0, Required>, 1753 // Extra latency cycles since wbck is 10 cycles 1754 InstrStage<11, [A9_DRegsVFP], 0, Reserved>, 1755 InstrStage<1, [A9_NPipe]>], 1756 [9, 2, 2]>, 1757 // 1758 // Quad-register Reciprical Step 1759 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1760 InstrStage<1, [A9_MUX0], 0>, 1761 InstrStage<1, [A9_DRegsN], 0, Required>, 1762 // Extra latency cycles since wbck is 11 cycles 1763 InstrStage<12, [A9_DRegsVFP], 0, Reserved>, 1764 InstrStage<2, [A9_NPipe]>], 1765 [10, 2, 2]>, 1766 // 1767 // Double-register Permute 1768 InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1769 InstrStage<1, [A9_MUX0], 0>, 1770 InstrStage<1, [A9_DRegsN], 0, Required>, 1771 // Extra latency cycles since wbck is 6 cycles 1772 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1773 InstrStage<1, [A9_NPipe]>], 1774 [2, 2, 1, 1]>, 1775 // 1776 // Quad-register Permute 1777 // Result written in N2, but that is relative to the last cycle of multicycle, 1778 // so we use 3 for those cases 1779 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1780 InstrStage<1, [A9_MUX0], 0>, 1781 InstrStage<1, [A9_DRegsN], 0, Required>, 1782 // Extra latency cycles since wbck is 7 cycles 1783 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1784 InstrStage<2, [A9_NPipe]>], 1785 [3, 3, 1, 1]>, 1786 // 1787 // Quad-register Permute (3 cycle issue) 1788 // Result written in N2, but that is relative to the last cycle of multicycle, 1789 // so we use 4 for those cases 1790 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1791 InstrStage<1, [A9_MUX0], 0>, 1792 InstrStage<1, [A9_DRegsN], 0, Required>, 1793 // Extra latency cycles since wbck is 8 cycles 1794 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1795 InstrStage<3, [A9_NPipe]>], 1796 [4, 4, 1, 1]>, 1797 1798 // 1799 // Double-register VEXT 1800 InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1801 InstrStage<1, [A9_MUX0], 0>, 1802 InstrStage<1, [A9_DRegsN], 0, Required>, 1803 // Extra latency cycles since wbck is 6 cycles 1804 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1805 InstrStage<1, [A9_NPipe]>], 1806 [2, 1, 1]>, 1807 // 1808 // Quad-register VEXT 1809 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1810 InstrStage<1, [A9_MUX0], 0>, 1811 InstrStage<1, [A9_DRegsN], 0, Required>, 1812 // Extra latency cycles since wbck is 7 cycles 1813 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1814 InstrStage<2, [A9_NPipe]>], 1815 [3, 1, 2]>, 1816 // 1817 // VTB 1818 InstrItinData<IIC_VTB1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1819 InstrStage<1, [A9_MUX0], 0>, 1820 InstrStage<1, [A9_DRegsN], 0, Required>, 1821 // Extra latency cycles since wbck is 7 cycles 1822 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1823 InstrStage<2, [A9_NPipe]>], 1824 [3, 2, 1]>, 1825 InstrItinData<IIC_VTB2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1826 InstrStage<1, [A9_MUX0], 0>, 1827 InstrStage<2, [A9_DRegsN], 0, Required>, 1828 // Extra latency cycles since wbck is 7 cycles 1829 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1830 InstrStage<2, [A9_NPipe]>], 1831 [3, 2, 2, 1]>, 1832 InstrItinData<IIC_VTB3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1833 InstrStage<1, [A9_MUX0], 0>, 1834 InstrStage<2, [A9_DRegsN], 0, Required>, 1835 // Extra latency cycles since wbck is 8 cycles 1836 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1837 InstrStage<3, [A9_NPipe]>], 1838 [4, 2, 2, 3, 1]>, 1839 InstrItinData<IIC_VTB4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1840 InstrStage<1, [A9_MUX0], 0>, 1841 InstrStage<1, [A9_DRegsN], 0, Required>, 1842 // Extra latency cycles since wbck is 8 cycles 1843 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1844 InstrStage<3, [A9_NPipe]>], 1845 [4, 2, 2, 3, 3, 1]>, 1846 // 1847 // VTBX 1848 InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1849 InstrStage<1, [A9_MUX0], 0>, 1850 InstrStage<1, [A9_DRegsN], 0, Required>, 1851 // Extra latency cycles since wbck is 7 cycles 1852 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1853 InstrStage<2, [A9_NPipe]>], 1854 [3, 1, 2, 1]>, 1855 InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1856 InstrStage<1, [A9_MUX0], 0>, 1857 InstrStage<1, [A9_DRegsN], 0, Required>, 1858 // Extra latency cycles since wbck is 7 cycles 1859 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1860 InstrStage<2, [A9_NPipe]>], 1861 [3, 1, 2, 2, 1]>, 1862 InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1863 InstrStage<1, [A9_MUX0], 0>, 1864 InstrStage<1, [A9_DRegsN], 0, Required>, 1865 // Extra latency cycles since wbck is 8 cycles 1866 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1867 InstrStage<3, [A9_NPipe]>], 1868 [4, 1, 2, 2, 3, 1]>, 1869 InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1870 InstrStage<1, [A9_MUX0], 0>, 1871 InstrStage<1, [A9_DRegsN], 0, Required>, 1872 // Extra latency cycles since wbck is 8 cycles 1873 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1874 InstrStage<2, [A9_NPipe]>], 1875 [4, 1, 2, 2, 3, 3, 1]> 1876 ]>; 1877 1878 // ===---------------------------------------------------------------------===// 1879 // The following definitions describe the simpler per-operand machine model. 1880 // This works with MachineScheduler and will eventually replace itineraries. 1881 1882 class A9WriteLMOpsListType<list<WriteSequence> writes> { 1883 list <WriteSequence> Writes = writes; 1884 SchedMachineModel SchedModel = ?; 1885 } 1886 1887 // Cortex-A9 machine model for scheduling and other instruction cost heuristics. 1888 def CortexA9Model : SchedMachineModel { 1889 let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. 1890 let MicroOpBufferSize = 56; // Based on available renamed registers. 1891 let LoadLatency = 2; // Optimistic load latency assuming bypass. 1892 // This is overriden by OperandCycles if the 1893 // Itineraries are queried instead. 1894 let MispredictPenalty = 8; // Based on estimate of pipeline depth. 1895 1896 let Itineraries = CortexA9Itineraries; 1897 1898 // FIXME: Many vector operations were never given an itinerary. We 1899 // haven't mapped these to the new model either. 1900 let CompleteModel = 0; 1901 } 1902 1903 //===----------------------------------------------------------------------===// 1904 // Define each kind of processor resource and number available. 1905 // 1906 // The AGU unit has BufferSize=1 so that the latency between operations 1907 // that use it are considered to stall other operations. 1908 // 1909 // The FP unit has BufferSize=0 so that it is a hard dispatch 1910 // hazard. No instruction may be dispatched while the unit is reserved. 1911 1912 let SchedModel = CortexA9Model in { 1913 1914 def A9UnitALU : ProcResource<2>; 1915 def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; } 1916 def A9UnitAGU : ProcResource<1> { let BufferSize = 1; } 1917 def A9UnitLS : ProcResource<1>; 1918 def A9UnitFP : ProcResource<1> { let BufferSize = 0; } 1919 def A9UnitB : ProcResource<1>; 1920 1921 //===----------------------------------------------------------------------===// 1922 // Define scheduler read/write types with their resources and latency on A9. 1923 1924 // Consume an issue slot, but no processor resources. This is useful when all 1925 // other writes associated with the operand have NumMicroOps = 0. 1926 def A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; } 1927 1928 // Write an integer register. 1929 def A9WriteI : SchedWriteRes<[A9UnitALU]>; 1930 // Write an integer shifted-by register 1931 def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; } 1932 1933 // Basic ALU. 1934 def A9WriteALU : SchedWriteRes<[A9UnitALU]>; 1935 // ALU with operand shifted by immediate. 1936 def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; } 1937 // ALU with operand shifted by register. 1938 def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; } 1939 1940 // Multiplication 1941 def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; } 1942 def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5; 1943 let NumMicroOps = 0; } 1944 def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; } 1945 def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4; 1946 let NumMicroOps = 0; } 1947 1948 // Floating-point 1949 // Only one FP or AGU instruction may issue per cycle. We model this 1950 // by having FP instructions consume the AGU resource. 1951 def A9WriteF : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; } 1952 def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; } 1953 def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } 1954 def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } 1955 def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; } 1956 def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } 1957 def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; } 1958 def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; } 1959 def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; } 1960 def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; } 1961 1962 // NEON has an odd mix of latencies. Simply name the write types by latency. 1963 def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; } 1964 def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; } 1965 def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; } 1966 def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; } 1967 def A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } 1968 def A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } 1969 def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; } 1970 def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } 1971 def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; } 1972 1973 // Reserve A9UnitFP for 2 consecutive cycles. 1974 def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { 1975 let Latency = 4; 1976 let ResourceCycles = [2]; 1977 } 1978 def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { 1979 let Latency = 7; 1980 let ResourceCycles = [2]; 1981 } 1982 def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { 1983 let Latency = 9; 1984 let ResourceCycles = [2]; 1985 } 1986 1987 // Branches don't have a def operand but still consume resources. 1988 def A9WriteB : SchedWriteRes<[A9UnitB]>; 1989 1990 // Address generation. 1991 def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; } 1992 1993 // Load Integer. 1994 def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; } 1995 // Load the upper 32-bits using the same micro-op. 1996 def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3; 1997 let NumMicroOps = 0; } 1998 // Offset shifted by register. 1999 def A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; } 2000 // Load (and zero extend) a byte. 2001 def A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; } 2002 def A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; } 2003 2004 // Load or Store Float, aligned. 2005 def A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; } 2006 2007 // Store Integer. 2008 def A9WriteS : SchedWriteRes<[A9UnitLS]>; 2009 2010 //===----------------------------------------------------------------------===// 2011 // Define resources dynamically for load multiple variants. 2012 2013 // Define helpers for extra latency without consuming resources. 2014 def A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; } 2015 foreach NumCycles = 2-8 in { 2016 def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>; 2017 } // foreach NumCycles 2018 2019 // Define address generation sequences and predicates for 8 flavors of LDMs. 2020 foreach NumAddr = 1-8 in { 2021 2022 // Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive 2023 // latency for instructions that generate multiple loads or stores. 2024 def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>; 2025 2026 // Define a predicate to select the LDM based on number of memory addresses. 2027 def A9LMAdr#NumAddr#Pred : 2028 SchedPredicate<"(TII->getNumLDMAddresses(MI)+1)/2 == "#NumAddr>; 2029 2030 } // foreach NumAddr 2031 2032 // Fall-back for unknown LDMs. 2033 def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(MI) == 0">; 2034 2035 // LDM/VLDM/VLDn address generation latency & resources. 2036 // Dynamically select the A9WriteAdrN sequence using a predicate. 2037 def A9WriteLMAdr : SchedWriteVariant<[ 2038 SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>, 2039 SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>, 2040 SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>, 2041 SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>, 2042 SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>, 2043 SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>, 2044 SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>, 2045 SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>, 2046 // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers. 2047 SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>; 2048 2049 // Define LDM Resources. 2050 // These take no issue resource, so they can be combined with other 2051 // writes like WriteB. 2052 // A9WriteLMLo takes a single LS resource and 2 cycles. 2053 def A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2; 2054 let NumMicroOps = 0; } 2055 // Assuming aligned access, the upper half of each pair is free with 2056 // the same latency. 2057 def A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2; 2058 let NumMicroOps = 0; } 2059 // Each A9WriteL#N variant adds N cycles of latency without consuming 2060 // additional resources. 2061 foreach NumAddr = 1-8 in { 2062 def A9WriteL#NumAddr : WriteSequence< 2063 [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; 2064 def A9WriteL#NumAddr#Hi : WriteSequence< 2065 [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; 2066 } 2067 2068 //===----------------------------------------------------------------------===// 2069 // LDM: Load multiple into 32-bit integer registers. 2070 2071 def A9WriteLMOpsList : A9WriteLMOpsListType< 2072 [A9WriteL1, A9WriteL1Hi, 2073 A9WriteL2, A9WriteL2Hi, 2074 A9WriteL3, A9WriteL3Hi, 2075 A9WriteL4, A9WriteL4Hi, 2076 A9WriteL5, A9WriteL5Hi, 2077 A9WriteL6, A9WriteL6Hi, 2078 A9WriteL7, A9WriteL7Hi, 2079 A9WriteL8, A9WriteL8Hi]>; 2080 2081 // A9WriteLM variants expand into a pair of writes for each 64-bit 2082 // value loaded. When the number of registers is odd, the last 2083 // A9WriteLnHi is naturally ignored because the instruction has no 2084 // following def operands. These variants take no issue resource, so 2085 // they may need to be part of a WriteSequence that includes A9WriteIssue. 2086 def A9WriteLM : SchedWriteVariant<[ 2087 SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>, 2088 SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>, 2089 SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>, 2090 SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>, 2091 SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>, 2092 SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>, 2093 SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>, 2094 SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>, 2095 // For unknown LDMs, define the maximum number of writes, but only 2096 // make the first two consume resources. 2097 SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi, 2098 A9WriteL2, A9WriteL2Hi, 2099 A9WriteL3Hi, A9WriteL3Hi, 2100 A9WriteL4Hi, A9WriteL4Hi, 2101 A9WriteL5Hi, A9WriteL5Hi, 2102 A9WriteL6Hi, A9WriteL6Hi, 2103 A9WriteL7Hi, A9WriteL7Hi, 2104 A9WriteL8Hi, A9WriteL8Hi]>]> { 2105 let Variadic = 1; 2106 } 2107 2108 //===----------------------------------------------------------------------===// 2109 // VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support. 2110 2111 // A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources 2112 // so can be used in WriteSequences for in single-issue instructions that 2113 // encapsulate multiple loads. 2114 def A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { 2115 let Latency = 1; 2116 let NumMicroOps = 0; 2117 } 2118 2119 foreach NumAddr = 1-8 in { 2120 2121 // Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops. 2122 def A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>; 2123 2124 // A9WriteLfp1-8 definitions are statically expanded into a sequence of 2125 // A9WriteLfpOps with additive latency that takes a single issue slot. 2126 // Used directly to describe NEON VLDn. 2127 def A9WriteLfp#NumAddr : WriteSequence< 2128 [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>; 2129 2130 // A9WriteLfp1-8Mov adds a cycle of latency and FP resource for 2131 // permuting loaded values. 2132 def A9WriteLfp#NumAddr#Mov : WriteSequence< 2133 [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>; 2134 2135 } // foreach NumAddr 2136 2137 // Define VLDM/VSTM PreRA resources. 2138 // A9WriteLMfpPreRA are dynamically expanded into the correct 2139 // A9WriteLfp1-8 sequence based on a predicate. This supports the 2140 // preRA VLDM variants in which all 64-bit loads are written to the 2141 // same tuple of either single or double precision registers. 2142 def A9WriteLMfpPreRA : SchedWriteVariant<[ 2143 SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>, 2144 SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>, 2145 SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>, 2146 SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>, 2147 SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>, 2148 SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>, 2149 SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>, 2150 SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>, 2151 // For unknown VLDM/VSTM PreRA, assume 2xS registers. 2152 SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>; 2153 2154 // Define VLDM/VSTM PostRA Resources. 2155 // A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency. 2156 def A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; } 2157 2158 foreach NumAddr = 1-8 in { 2159 2160 // Each A9WriteL#N variant adds N cycles of latency without consuming 2161 // additional resources. 2162 def A9WriteLMfp#NumAddr : WriteSequence< 2163 [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; 2164 2165 // Assuming aligned access, the upper half of each pair is free with 2166 // the same latency. 2167 def A9WriteLMfp#NumAddr#Hi : WriteSequence< 2168 [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; 2169 2170 } // foreach NumAddr 2171 2172 // VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a 2173 // pair of writes for each 64-bit data loaded. When the number of 2174 // registers is odd, the last WriteLMfpnHi is naturally ignored because 2175 // the instruction has no following def operands. 2176 2177 def A9WriteLMfpPostRAOpsList : A9WriteLMOpsListType< 2178 [A9WriteLMfp1, A9WriteLMfp2, // 0-1 2179 A9WriteLMfp3, A9WriteLMfp4, // 2-3 2180 A9WriteLMfp5, A9WriteLMfp6, // 4-5 2181 A9WriteLMfp7, A9WriteLMfp8, // 6-7 2182 A9WriteLMfp1Hi, // 8-8 2183 A9WriteLMfp2Hi, A9WriteLMfp2Hi, // 9-10 2184 A9WriteLMfp3Hi, A9WriteLMfp3Hi, // 11-12 2185 A9WriteLMfp4Hi, A9WriteLMfp4Hi, // 13-14 2186 A9WriteLMfp5Hi, A9WriteLMfp5Hi, // 15-16 2187 A9WriteLMfp6Hi, A9WriteLMfp6Hi, // 17-18 2188 A9WriteLMfp7Hi, A9WriteLMfp7Hi, // 19-20 2189 A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22 2190 2191 def A9WriteLMfpPostRA : SchedWriteVariant<[ 2192 SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>, 2193 SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>, 2194 SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>, 2195 SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>, 2196 SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>, 2197 SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>, 2198 SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>, 2199 SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>, 2200 // For unknown LDMs, define the maximum number of writes, but only 2201 // make the first two consume resources. We are optimizing for the case 2202 // where the operands are DPRs, and this determines the first eight 2203 // types. The remaining eight types are filled to cover the case 2204 // where the operands are SPRs. 2205 SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp2, 2206 A9WriteLMfp3Hi, A9WriteLMfp4Hi, 2207 A9WriteLMfp5Hi, A9WriteLMfp6Hi, 2208 A9WriteLMfp7Hi, A9WriteLMfp8Hi, 2209 A9WriteLMfp5Hi, A9WriteLMfp5Hi, 2210 A9WriteLMfp6Hi, A9WriteLMfp6Hi, 2211 A9WriteLMfp7Hi, A9WriteLMfp7Hi, 2212 A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> { 2213 let Variadic = 1; 2214 } 2215 2216 // Distinguish between our multiple MI-level forms of the same 2217 // VLDM/VSTM instructions. 2218 def A9PreRA : SchedPredicate< 2219 "TargetRegisterInfo::isVirtualRegister(MI->getOperand(0).getReg())">; 2220 def A9PostRA : SchedPredicate< 2221 "TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).getReg())">; 2222 2223 // VLDM represents all destination registers as a single register 2224 // tuple, unlike LDM. So the number of write operands is not variadic. 2225 def A9WriteLMfp : SchedWriteVariant<[ 2226 SchedVar<A9PreRA, [A9WriteLMfpPreRA]>, 2227 SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>; 2228 2229 //===----------------------------------------------------------------------===// 2230 // Resources for other (non-LDM/VLDM) Variants. 2231 2232 // These mov immediate writers are unconditionally expanded with 2233 // additive latency. 2234 def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>; 2235 def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>; 2236 def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>; 2237 2238 // Some ALU operations can read loaded integer values one cycle early. 2239 def A9ReadALU : SchedReadAdvance<1, 2240 [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi, 2241 A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4, 2242 A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8, 2243 A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi, 2244 A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>; 2245 2246 // Read types for operands that are unconditionally read in cycle N 2247 // after the instruction issues, decreases producer latency by N-1. 2248 def A9Read2 : SchedReadAdvance<1>; 2249 def A9Read3 : SchedReadAdvance<2>; 2250 def A9Read4 : SchedReadAdvance<3>; 2251 2252 //===----------------------------------------------------------------------===// 2253 // Map itinerary classes to scheduler read/write resources per operand. 2254 // 2255 // For ARM, we piggyback scheduler resources on the Itinerary classes 2256 // to avoid perturbing the existing instruction definitions. 2257 2258 // This table follows the ARM Cortex-A9 Technical Reference Manuals, 2259 // mostly in order. 2260 2261 def :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi, 2262 IIC_iMVNi,IIC_iMVNsi, 2263 IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>; 2264 def :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>; 2265 def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>; 2266 2267 def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>; 2268 def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>; 2269 def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>; 2270 2271 def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>; 2272 def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>; 2273 def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>; 2274 def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>; 2275 def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>; 2276 def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB 2277 def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>; 2278 def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>; 2279 2280 // A9WriteHi ignored for MUL32. 2281 def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32, 2282 IIC_iMUL64,IIC_iMAC64]>; 2283 // FIXME: SMLALxx needs itin classes 2284 def :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>; 2285 2286 // TODO: For floating-point ops, we model the pipeline forwarding 2287 // latencies here. WAW latencies are sometimes longer. 2288 2289 def :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI, 2290 IIC_fpUNA32, IIC_fpUNA64, 2291 IIC_fpCMP32, IIC_fpCMP64]>; 2292 def :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>; 2293 def :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS, 2294 IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI, 2295 IIC_fpALU32, IIC_fpALU64]>; 2296 def :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>; 2297 def :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>; 2298 def :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>; 2299 def :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>; 2300 def :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>; 2301 def :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>; 2302 def :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>; 2303 def :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>; 2304 2305 def :ItinRW<[A9WriteB], [IIC_Br]>; 2306 2307 // A9 PLD is processed in a dedicated unit. 2308 def :ItinRW<[], [IIC_Preload]>; 2309 2310 // Note: We must assume that loads are aligned, since the machine 2311 // model cannot know this statically and A9 ignores alignment hints. 2312 2313 // A9WriteAdr consumes AGU regardless address writeback. But it's 2314 // latency is only relevant for users of an updated address. 2315 def :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r, 2316 IIC_iLoad_iu,IIC_iLoad_ru]>; 2317 def :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>; 2318 def :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r, 2319 IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>; 2320 def :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>; 2321 def :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r, 2322 IIC_iLoad_d_ru]>; 2323 // Store either has no def operands, or the one def for address writeback. 2324 def :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r, 2325 IIC_iStore_iu, IIC_iStore_ru, 2326 IIC_iStore_d_i, IIC_iStore_d_r, 2327 IIC_iStore_d_ru]>; 2328 def :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu, 2329 IIC_iStore_bh_i, IIC_iStore_bh_r, 2330 IIC_iStore_bh_iu, IIC_iStore_bh_ru]>; 2331 def :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>; 2332 2333 // A9WriteML will be expanded into a separate write for each def 2334 // operand. Address generation consumes resources, but A9WriteLMAdr 2335 // is listed after all def operands, so has no effective latency. 2336 // 2337 // Note: A9WriteLM expands into an even number of def operands. The 2338 // actual number of def operands may be less by one. 2339 def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>; 2340 2341 // Load multiple with address writeback has an extra def operand in 2342 // front of the loaded registers. 2343 // 2344 // Reuse the load-multiple variants for store-multiple because the 2345 // resources are identical, For stores only the address writeback 2346 // has a def operand so the WriteL latencies are unused. 2347 def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu, 2348 IIC_iStore_m, 2349 IIC_iStore_mu]>; 2350 def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>; 2351 def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>; 2352 2353 def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>; 2354 2355 def :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>; 2356 def :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>; 2357 def :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64, 2358 IIC_fpStore_m, IIC_fpStore_mu]>; 2359 2360 // Note: Unlike VLDM, VLD1 expects the writeback operand after the 2361 // normal writes. 2362 def :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u, 2363 IIC_VLD1x2, IIC_VLD1x2u]>; 2364 def :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u, 2365 IIC_VLD1x4, IIC_VLD1x4u, 2366 IIC_VLD4dup, IIC_VLD4dupu]>; 2367 def :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu, 2368 IIC_VLD2, IIC_VLD2u, 2369 IIC_VLD2dup, IIC_VLD2dupu]>; 2370 def :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu, 2371 IIC_VLD2x2, IIC_VLD2x2u, 2372 IIC_VLD2ln, IIC_VLD2lnu]>; 2373 def :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u, 2374 IIC_VLD3dup, IIC_VLD3dupu]>; 2375 def :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u, 2376 IIC_VLD4ln, IIC_VLD4lnu]>; 2377 def :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>; 2378 2379 // Vector stores use similar resources to vector loads, so use the 2380 // same write types. The address write must be first for stores with 2381 // address writeback. 2382 def :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u, 2383 IIC_VST1x2, IIC_VST1x2u, 2384 IIC_VST1ln, IIC_VST1lnu, 2385 IIC_VST2, IIC_VST2u, 2386 IIC_VST2x2, IIC_VST2x2u, 2387 IIC_VST2ln, IIC_VST2lnu]>; 2388 def :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u, 2389 IIC_VST1x4, IIC_VST1x4u, 2390 IIC_VST3, IIC_VST3u, 2391 IIC_VST3ln, IIC_VST3lnu, 2392 IIC_VST4, IIC_VST4u, 2393 IIC_VST4ln, IIC_VST4lnu]>; 2394 2395 // NEON moves. 2396 def :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>; 2397 def :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>; 2398 def :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>; 2399 2400 // NEON integer arithmetic 2401 // 2402 // VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL 2403 def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>; 2404 // VSUB/VMVN/VCLSD/VCLZD/VCNTD 2405 def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>; 2406 // VADDL/VSUBL/VNEG are mapped later under IIC_SHLi. 2407 // ... 2408 // VHADD/VRHADD/VQADD/VTST/VADH/VRADH 2409 def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>; 2410 2411 // VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL 2412 def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>; 2413 // VQNEG/VQABS 2414 def :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>; 2415 // VABS 2416 def :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>; 2417 // VPADD/VPADDL are mapped later under IIC_SHLi. 2418 // ... 2419 // VCLSQ/VCLZQ/VCNTQ, takes two cycles. 2420 def :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>; 2421 // VMOVimm/VMVNimm/VORRimm/VBICimm 2422 def :ItinRW<[A9WriteV3], [IIC_VMOVImm]>; 2423 def :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>; 2424 def :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>; 2425 2426 // NEON integer multiply 2427 // 2428 // Note: these don't quite match the timing docs, but they do match 2429 // the original A9 itinerary. 2430 def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>; 2431 def :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>; 2432 def :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>; 2433 def :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>; 2434 def :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>; 2435 def :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>; 2436 def :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>; 2437 def :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>; 2438 2439 // NEON integer shift 2440 // TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles. 2441 def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>; 2442 def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>; 2443 2444 // NEON permute 2445 def :ItinRW<[A9WriteV2, A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>; 2446 def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2], 2447 [IIC_VPERMQ3, IIC_VEXTQ]>; 2448 def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>; 2449 def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>; 2450 def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>; 2451 def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>; 2452 def :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>; 2453 def :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>; 2454 def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>; 2455 def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3], 2456 [IIC_VTBX4]>; 2457 2458 // NEON floating-point 2459 def :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>; 2460 def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>; 2461 def :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>; 2462 def :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>; 2463 def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>; 2464 def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>; 2465 def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>; 2466 def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>; 2467 2468 // Map SchedRWs that are identical for cortexa9 to existing resources. 2469 def : SchedAlias<WriteALU, A9WriteALU>; 2470 def : SchedAlias<WriteALUsr, A9WriteALUsr>; 2471 def : SchedAlias<WriteALUSsr, A9WriteALUsr>; 2472 def : SchedAlias<ReadALU, A9ReadALU>; 2473 def : SchedAlias<ReadALUsr, A9ReadALU>; 2474 def : InstRW< [WriteALU], 2475 (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr", 2476 "BICrr")>; 2477 def : InstRW< [WriteALUsi], (instregex "ANDrsi", "ORRrsi", "EORrsi", "BICrsi")>; 2478 def : InstRW< [WriteALUsr], (instregex "ANDrsr", "ORRrsr", "EORrsr", "BICrsr")>; 2479 2480 2481 def : SchedAlias<WriteCMP, A9WriteALU>; 2482 def : SchedAlias<WriteCMPsi, A9WriteALU>; 2483 def : SchedAlias<WriteCMPsr, A9WriteALU>; 2484 2485 def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi", 2486 "MOVCCsr")>; 2487 def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>; 2488 def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm", 2489 "MOV_ga_dyn")>; 2490 def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>; 2491 def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>; 2492 2493 def : InstRW< [WriteALU], (instregex "SEL")>; 2494 2495 def : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>; 2496 2497 def : InstRW< [A9WriteM], 2498 (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS", 2499 "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>; 2500 def : InstRW< [A9WriteM, A9WriteMHi], 2501 (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL", 2502 "UMAAL", "SMLALv5", "UMLALv5", "UMAALv5", "SMLALBB", "SMLALBT", "SMLALTB", 2503 "SMLALTT")>; 2504 // FIXME: These instructions used to have NoItinerary. Just copied the one from above. 2505 def : InstRW< [A9WriteM, A9WriteMHi], 2506 (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX", 2507 "SMLSLD", "SMLLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>; 2508 2509 def : InstRW<[A9WriteM16, A9WriteM16Hi], 2510 (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>; 2511 def : InstRW<[A9WriteM16, A9WriteM16Hi], 2512 (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>; 2513 2514 def : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>; 2515 def : InstRW<[A9WriteLsi], (instregex "LDRrs")>; 2516 def : InstRW<[A9WriteLb], 2517 (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB", 2518 "LDRH", "LDRSH", "LDRSB")>; 2519 def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>; 2520 2521 def : WriteRes<WriteDiv, []> { let Latency = 0; } 2522 2523 def : WriteRes<WriteBr, [A9UnitB]>; 2524 def : WriteRes<WriteBrL, [A9UnitB]>; 2525 def : WriteRes<WriteBrTbl, [A9UnitB]>; 2526 def : WriteRes<WritePreLd, []>; 2527 def : SchedAlias<WriteCvtFP, A9WriteF>; 2528 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } 2529 } // SchedModel = CortexA9Model 2530