1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the itinerary class data for the ARM Cortex A9 processors. 11 // 12 //===----------------------------------------------------------------------===// 13 14 // ===---------------------------------------------------------------------===// 15 // This section contains legacy support for itineraries. This is 16 // required until SD and PostRA schedulers are replaced by MachineScheduler. 17 18 // 19 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical 20 // Reference Manual". 21 // 22 // Functional units 23 def A9_Issue0 : FuncUnit; // Issue 0 24 def A9_Issue1 : FuncUnit; // Issue 1 25 def A9_Branch : FuncUnit; // Branch 26 def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0 27 def A9_ALU1 : FuncUnit; // ALU pipeline 1 28 def A9_AGU : FuncUnit; // Address generation unit for ld / st 29 def A9_NPipe : FuncUnit; // NEON pipeline 30 def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer 31 def A9_LSUnit : FuncUnit; // L/S Unit 32 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side 33 def A9_DRegsN : FuncUnit; // FP register set, NEON side 34 35 // Bypasses 36 def A9_LdBypass : Bypass; 37 38 def CortexA9Itineraries : ProcessorItineraries< 39 [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0, 40 A9_LSUnit, A9_DRegsVFP, A9_DRegsN], 41 [A9_LdBypass], [ 42 // Two fully-pipelined integer ALU pipelines 43 44 // 45 // Move instructions, unconditional 46 InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 47 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, 48 InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 49 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 50 InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 51 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 52 InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 53 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, 54 InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 55 InstrStage<1, [A9_ALU0, A9_ALU1]>, 56 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>, 57 InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 58 InstrStage<1, [A9_ALU0, A9_ALU1]>, 59 InstrStage<1, [A9_ALU0, A9_ALU1]>, 60 InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>, 61 InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 62 InstrStage<1, [A9_ALU0, A9_ALU1]>, 63 InstrStage<1, [A9_ALU0, A9_ALU1]>, 64 InstrStage<1, [A9_MUX0], 0>, 65 InstrStage<1, [A9_AGU], 0>, 66 InstrStage<1, [A9_LSUnit]>], [5]>, 67 // 68 // MVN instructions 69 InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 70 InstrStage<1, [A9_ALU0, A9_ALU1]>], 71 [1]>, 72 InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 73 InstrStage<1, [A9_ALU0, A9_ALU1]>], 74 [1, 1], [NoBypass, A9_LdBypass]>, 75 InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 76 InstrStage<2, [A9_ALU0, A9_ALU1]>], 77 [2, 1]>, 78 InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 79 InstrStage<3, [A9_ALU0, A9_ALU1]>], 80 [3, 1, 1]>, 81 // 82 // No operand cycles 83 InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 84 InstrStage<1, [A9_ALU0, A9_ALU1]>]>, 85 // 86 // Binary Instructions that produce a result 87 InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 88 InstrStage<1, [A9_ALU0, A9_ALU1]>], 89 [1, 1], [NoBypass, A9_LdBypass]>, 90 InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 91 InstrStage<1, [A9_ALU0, A9_ALU1]>], 92 [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>, 93 InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 94 InstrStage<2, [A9_ALU0, A9_ALU1]>], 95 [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>, 96 InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 97 InstrStage<2, [A9_ALU0, A9_ALU1]>], 98 [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>, 99 InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 100 InstrStage<3, [A9_ALU0, A9_ALU1]>], 101 [3, 1, 1, 1], 102 [NoBypass, A9_LdBypass, NoBypass, NoBypass]>, 103 // 104 // Bitwise Instructions that produce a result 105 InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 106 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 107 InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 108 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>, 109 InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 110 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, 111 InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 112 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>, 113 // 114 // Unary Instructions that produce a result 115 116 // CLZ, RBIT, etc. 117 InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 118 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 119 120 // BFC, BFI, UBFX, SBFX 121 InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 122 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>, 123 124 // 125 // Zero and sign extension instructions 126 InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 127 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>, 128 InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 129 InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>, 130 InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 131 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>, 132 // 133 // Compare instructions 134 InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 135 InstrStage<1, [A9_ALU0, A9_ALU1]>], 136 [1], [A9_LdBypass]>, 137 InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 138 InstrStage<1, [A9_ALU0, A9_ALU1]>], 139 [1, 1], [A9_LdBypass, A9_LdBypass]>, 140 InstrItinData<IIC_iCMPsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 141 InstrStage<2, [A9_ALU0, A9_ALU1]>], 142 [1, 1], [A9_LdBypass, NoBypass]>, 143 InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 144 InstrStage<3, [A9_ALU0, A9_ALU1]>], 145 [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>, 146 // 147 // Test instructions 148 InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 149 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, 150 InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 151 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 152 InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 153 InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>, 154 InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 155 InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>, 156 // 157 // Move instructions, conditional 158 // FIXME: Correctly model the extra input dep on the destination. 159 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 160 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, 161 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 162 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 163 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 164 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 165 InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 166 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, 167 InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 168 InstrStage<1, [A9_ALU0, A9_ALU1]>, 169 InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 170 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>, 171 172 // Integer multiply pipeline 173 // 174 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 175 InstrStage<2, [A9_ALU0]>], [3, 1, 1]>, 176 InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 177 InstrStage<2, [A9_ALU0]>], 178 [3, 1, 1, 1]>, 179 InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 180 InstrStage<2, [A9_ALU0]>], [4, 1, 1]>, 181 InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 182 InstrStage<2, [A9_ALU0]>], 183 [4, 1, 1, 1]>, 184 InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 185 InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>, 186 InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 187 InstrStage<3, [A9_ALU0]>], 188 [4, 5, 1, 1]>, 189 // Integer load pipeline 190 // FIXME: The timings are some rough approximations 191 // 192 // Immediate offset 193 InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 194 InstrStage<1, [A9_MUX0], 0>, 195 InstrStage<1, [A9_AGU], 0>, 196 InstrStage<1, [A9_LSUnit]>], 197 [3, 1], [A9_LdBypass]>, 198 InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 199 InstrStage<1, [A9_MUX0], 0>, 200 InstrStage<2, [A9_AGU], 0>, 201 InstrStage<1, [A9_LSUnit]>], 202 [4, 1], [A9_LdBypass]>, 203 // FIXME: If address is 64-bit aligned, AGU cycles is 1. 204 InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 205 InstrStage<1, [A9_MUX0], 0>, 206 InstrStage<2, [A9_AGU], 0>, 207 InstrStage<1, [A9_LSUnit]>], 208 [3, 3, 1], [A9_LdBypass]>, 209 // 210 // Register offset 211 InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 212 InstrStage<1, [A9_MUX0], 0>, 213 InstrStage<1, [A9_AGU], 0>, 214 InstrStage<1, [A9_LSUnit]>], 215 [3, 1, 1], [A9_LdBypass]>, 216 InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 217 InstrStage<1, [A9_MUX0], 0>, 218 InstrStage<2, [A9_AGU], 0>, 219 InstrStage<1, [A9_LSUnit]>], 220 [4, 1, 1], [A9_LdBypass]>, 221 InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 222 InstrStage<1, [A9_MUX0], 0>, 223 InstrStage<2, [A9_AGU], 0>, 224 InstrStage<1, [A9_LSUnit]>], 225 [3, 3, 1, 1], [A9_LdBypass]>, 226 // 227 // Scaled register offset 228 InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 229 InstrStage<1, [A9_MUX0], 0>, 230 InstrStage<1, [A9_AGU], 0>, 231 InstrStage<1, [A9_LSUnit], 0>], 232 [4, 1, 1], [A9_LdBypass]>, 233 InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 234 InstrStage<1, [A9_MUX0], 0>, 235 InstrStage<2, [A9_AGU], 0>, 236 InstrStage<1, [A9_LSUnit]>], 237 [5, 1, 1], [A9_LdBypass]>, 238 // 239 // Immediate offset with update 240 InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 241 InstrStage<1, [A9_MUX0], 0>, 242 InstrStage<1, [A9_AGU], 0>, 243 InstrStage<1, [A9_LSUnit]>], 244 [3, 2, 1], [A9_LdBypass]>, 245 InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 246 InstrStage<1, [A9_MUX0], 0>, 247 InstrStage<2, [A9_AGU], 0>, 248 InstrStage<1, [A9_LSUnit]>], 249 [4, 3, 1], [A9_LdBypass]>, 250 // 251 // Register offset with update 252 InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 253 InstrStage<1, [A9_MUX0], 0>, 254 InstrStage<1, [A9_AGU], 0>, 255 InstrStage<1, [A9_LSUnit]>], 256 [3, 2, 1, 1], [A9_LdBypass]>, 257 InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 258 InstrStage<1, [A9_MUX0], 0>, 259 InstrStage<2, [A9_AGU], 0>, 260 InstrStage<1, [A9_LSUnit]>], 261 [4, 3, 1, 1], [A9_LdBypass]>, 262 InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 263 InstrStage<1, [A9_MUX0], 0>, 264 InstrStage<2, [A9_AGU], 0>, 265 InstrStage<1, [A9_LSUnit]>], 266 [3, 3, 1, 1], [A9_LdBypass]>, 267 // 268 // Scaled register offset with update 269 InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 270 InstrStage<1, [A9_MUX0], 0>, 271 InstrStage<1, [A9_AGU], 0>, 272 InstrStage<1, [A9_LSUnit]>], 273 [4, 3, 1, 1], [A9_LdBypass]>, 274 InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 275 InstrStage<1, [A9_MUX0], 0>, 276 InstrStage<2, [A9_AGU], 0>, 277 InstrStage<1, [A9_LSUnit]>], 278 [5, 4, 1, 1], [A9_LdBypass]>, 279 // 280 // Load multiple, def is the 5th operand. 281 // FIXME: This assumes 3 to 4 registers. 282 InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 283 InstrStage<1, [A9_MUX0], 0>, 284 InstrStage<2, [A9_AGU], 1>, 285 InstrStage<2, [A9_LSUnit]>], 286 [1, 1, 1, 1, 3], 287 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], 288 -1>, // dynamic uops 289 // 290 // Load multiple + update, defs are the 1st and 5th operands. 291 InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 292 InstrStage<1, [A9_MUX0], 0>, 293 InstrStage<2, [A9_AGU], 1>, 294 InstrStage<2, [A9_LSUnit]>], 295 [2, 1, 1, 1, 3], 296 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], 297 -1>, // dynamic uops 298 // 299 // Load multiple plus branch 300 InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 301 InstrStage<1, [A9_MUX0], 0>, 302 InstrStage<1, [A9_AGU], 1>, 303 InstrStage<2, [A9_LSUnit]>, 304 InstrStage<1, [A9_Branch]>], 305 [1, 2, 1, 1, 3], 306 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], 307 -1>, // dynamic uops 308 // 309 // Pop, def is the 3rd operand. 310 InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 311 InstrStage<1, [A9_MUX0], 0>, 312 InstrStage<2, [A9_AGU], 1>, 313 InstrStage<2, [A9_LSUnit]>], 314 [1, 1, 3], 315 [NoBypass, NoBypass, A9_LdBypass], 316 -1>, // dynamic uops 317 // 318 // Pop + branch, def is the 3rd operand. 319 InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 320 InstrStage<1, [A9_MUX0], 0>, 321 InstrStage<2, [A9_AGU], 1>, 322 InstrStage<2, [A9_LSUnit]>, 323 InstrStage<1, [A9_Branch]>], 324 [1, 1, 3], 325 [NoBypass, NoBypass, A9_LdBypass], 326 -1>, // dynamic uops 327 // 328 // iLoadi + iALUr for t2LDRpci_pic. 329 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 330 InstrStage<1, [A9_MUX0], 0>, 331 InstrStage<1, [A9_AGU], 0>, 332 InstrStage<1, [A9_LSUnit]>, 333 InstrStage<1, [A9_ALU0, A9_ALU1]>], 334 [2, 1]>, 335 336 // Integer store pipeline 337 /// 338 // Immediate offset 339 InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 340 InstrStage<1, [A9_MUX0], 0>, 341 InstrStage<1, [A9_AGU], 0>, 342 InstrStage<1, [A9_LSUnit]>], [1, 1]>, 343 InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 344 InstrStage<1, [A9_MUX0], 0>, 345 InstrStage<2, [A9_AGU], 1>, 346 InstrStage<1, [A9_LSUnit]>], [1, 1]>, 347 // FIXME: If address is 64-bit aligned, AGU cycles is 1. 348 InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 349 InstrStage<1, [A9_MUX0], 0>, 350 InstrStage<2, [A9_AGU], 1>, 351 InstrStage<1, [A9_LSUnit]>], [1, 1]>, 352 // 353 // Register offset 354 InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 355 InstrStage<1, [A9_MUX0], 0>, 356 InstrStage<1, [A9_AGU], 0>, 357 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 358 InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 359 InstrStage<1, [A9_MUX0], 0>, 360 InstrStage<2, [A9_AGU], 1>, 361 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 362 InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 363 InstrStage<1, [A9_MUX0], 0>, 364 InstrStage<2, [A9_AGU], 1>, 365 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 366 // 367 // Scaled register offset 368 InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 369 InstrStage<1, [A9_MUX0], 0>, 370 InstrStage<1, [A9_AGU], 0>, 371 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 372 InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 373 InstrStage<1, [A9_MUX0], 0>, 374 InstrStage<2, [A9_AGU], 1>, 375 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 376 // 377 // Immediate offset with update 378 InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 379 InstrStage<1, [A9_MUX0], 0>, 380 InstrStage<1, [A9_AGU], 0>, 381 InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>, 382 InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 383 InstrStage<1, [A9_MUX0], 0>, 384 InstrStage<2, [A9_AGU], 1>, 385 InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>, 386 // 387 // Register offset with update 388 InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 389 InstrStage<1, [A9_MUX0], 0>, 390 InstrStage<1, [A9_AGU], 0>, 391 InstrStage<1, [A9_LSUnit]>], 392 [2, 1, 1, 1]>, 393 InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 394 InstrStage<1, [A9_MUX0], 0>, 395 InstrStage<2, [A9_AGU], 1>, 396 InstrStage<1, [A9_LSUnit]>], 397 [3, 1, 1, 1]>, 398 InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 399 InstrStage<1, [A9_MUX0], 0>, 400 InstrStage<2, [A9_AGU], 1>, 401 InstrStage<1, [A9_LSUnit]>], 402 [3, 1, 1, 1]>, 403 // 404 // Scaled register offset with update 405 InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 406 InstrStage<1, [A9_MUX0], 0>, 407 InstrStage<1, [A9_AGU], 0>, 408 InstrStage<1, [A9_LSUnit]>], 409 [2, 1, 1, 1]>, 410 InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 411 InstrStage<1, [A9_MUX0], 0>, 412 InstrStage<2, [A9_AGU], 1>, 413 InstrStage<1, [A9_LSUnit]>], 414 [3, 1, 1, 1]>, 415 // 416 // Store multiple 417 InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 418 InstrStage<1, [A9_MUX0], 0>, 419 InstrStage<1, [A9_AGU], 0>, 420 InstrStage<2, [A9_LSUnit]>], 421 [], [], -1>, // dynamic uops 422 // 423 // Store multiple + update 424 InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 425 InstrStage<1, [A9_MUX0], 0>, 426 InstrStage<1, [A9_AGU], 0>, 427 InstrStage<2, [A9_LSUnit]>], 428 [2], [], -1>, // dynamic uops 429 // 430 // Preload 431 InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>, 432 433 // Branch 434 // 435 // no delay slots, so the latency of a branch is unimportant 436 InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>, 437 InstrStage<1, [A9_Issue1], 0>, 438 InstrStage<1, [A9_Branch]>]>, 439 440 // VFP and NEON shares the same register file. This means that every VFP 441 // instruction should wait for full completion of the consecutive NEON 442 // instruction and vice-versa. We model this behavior with two artificial FUs: 443 // DRegsVFP and DRegsVFP. 444 // 445 // Every VFP instruction: 446 // - Acquires DRegsVFP resource for 1 cycle 447 // - Reserves DRegsN resource for the whole duration (including time to 448 // register file writeback!). 449 // Every NEON instruction does the same but with FUs swapped. 450 // 451 // Since the reserved FU cannot be acquired, this models precisely 452 // "cross-domain" stalls. 453 454 // VFP 455 // Issue through integer pipeline, and execute in NEON unit. 456 457 // FP Special Register to Integer Register File Move 458 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 459 InstrStage<1, [A9_MUX0], 0>, 460 InstrStage<1, [A9_DRegsVFP], 0, Required>, 461 InstrStage<2, [A9_DRegsN], 0, Reserved>, 462 InstrStage<1, [A9_NPipe]>], 463 [1]>, 464 // 465 // Single-precision FP Unary 466 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 467 InstrStage<1, [A9_MUX0], 0>, 468 InstrStage<1, [A9_DRegsVFP], 0, Required>, 469 // Extra latency cycles since wbck is 2 cycles 470 InstrStage<3, [A9_DRegsN], 0, Reserved>, 471 InstrStage<1, [A9_NPipe]>], 472 [1, 1]>, 473 // 474 // Double-precision FP Unary 475 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 476 InstrStage<1, [A9_MUX0], 0>, 477 InstrStage<1, [A9_DRegsVFP], 0, Required>, 478 // Extra latency cycles since wbck is 2 cycles 479 InstrStage<3, [A9_DRegsN], 0, Reserved>, 480 InstrStage<1, [A9_NPipe]>], 481 [1, 1]>, 482 483 // 484 // Single-precision FP Compare 485 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 486 InstrStage<1, [A9_MUX0], 0>, 487 InstrStage<1, [A9_DRegsVFP], 0, Required>, 488 // Extra latency cycles since wbck is 4 cycles 489 InstrStage<5, [A9_DRegsN], 0, Reserved>, 490 InstrStage<1, [A9_NPipe]>], 491 [1, 1]>, 492 // 493 // Double-precision FP Compare 494 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 495 InstrStage<1, [A9_MUX0], 0>, 496 InstrStage<1, [A9_DRegsVFP], 0, Required>, 497 // Extra latency cycles since wbck is 4 cycles 498 InstrStage<5, [A9_DRegsN], 0, Reserved>, 499 InstrStage<1, [A9_NPipe]>], 500 [1, 1]>, 501 // 502 // Single to Double FP Convert 503 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 504 InstrStage<1, [A9_MUX0], 0>, 505 InstrStage<1, [A9_DRegsVFP], 0, Required>, 506 InstrStage<5, [A9_DRegsN], 0, Reserved>, 507 InstrStage<1, [A9_NPipe]>], 508 [4, 1]>, 509 // 510 // Double to Single FP Convert 511 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 512 InstrStage<1, [A9_MUX0], 0>, 513 InstrStage<1, [A9_DRegsVFP], 0, Required>, 514 InstrStage<5, [A9_DRegsN], 0, Reserved>, 515 InstrStage<1, [A9_NPipe]>], 516 [4, 1]>, 517 518 // 519 // Single to Half FP Convert 520 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 521 InstrStage<1, [A9_MUX0], 0>, 522 InstrStage<1, [A9_DRegsVFP], 0, Required>, 523 InstrStage<5, [A9_DRegsN], 0, Reserved>, 524 InstrStage<1, [A9_NPipe]>], 525 [4, 1]>, 526 // 527 // Half to Single FP Convert 528 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 529 InstrStage<1, [A9_MUX0], 0>, 530 InstrStage<1, [A9_DRegsVFP], 0, Required>, 531 InstrStage<3, [A9_DRegsN], 0, Reserved>, 532 InstrStage<1, [A9_NPipe]>], 533 [2, 1]>, 534 535 // 536 // Single-Precision FP to Integer Convert 537 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 538 InstrStage<1, [A9_MUX0], 0>, 539 InstrStage<1, [A9_DRegsVFP], 0, Required>, 540 InstrStage<5, [A9_DRegsN], 0, Reserved>, 541 InstrStage<1, [A9_NPipe]>], 542 [4, 1]>, 543 // 544 // Double-Precision FP to Integer Convert 545 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 546 InstrStage<1, [A9_MUX0], 0>, 547 InstrStage<1, [A9_DRegsVFP], 0, Required>, 548 InstrStage<5, [A9_DRegsN], 0, Reserved>, 549 InstrStage<1, [A9_NPipe]>], 550 [4, 1]>, 551 // 552 // Integer to Single-Precision FP Convert 553 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 554 InstrStage<1, [A9_MUX0], 0>, 555 InstrStage<1, [A9_DRegsVFP], 0, Required>, 556 InstrStage<5, [A9_DRegsN], 0, Reserved>, 557 InstrStage<1, [A9_NPipe]>], 558 [4, 1]>, 559 // 560 // Integer to Double-Precision FP Convert 561 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 562 InstrStage<1, [A9_MUX0], 0>, 563 InstrStage<1, [A9_DRegsVFP], 0, Required>, 564 InstrStage<5, [A9_DRegsN], 0, Reserved>, 565 InstrStage<1, [A9_NPipe]>], 566 [4, 1]>, 567 // 568 // Single-precision FP ALU 569 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 570 InstrStage<1, [A9_MUX0], 0>, 571 InstrStage<1, [A9_DRegsVFP], 0, Required>, 572 InstrStage<5, [A9_DRegsN], 0, Reserved>, 573 InstrStage<1, [A9_NPipe]>], 574 [4, 1, 1]>, 575 // 576 // Double-precision FP ALU 577 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 578 InstrStage<1, [A9_MUX0], 0>, 579 InstrStage<1, [A9_DRegsVFP], 0, Required>, 580 InstrStage<5, [A9_DRegsN], 0, Reserved>, 581 InstrStage<1, [A9_NPipe]>], 582 [4, 1, 1]>, 583 // 584 // Single-precision FP Multiply 585 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 586 InstrStage<1, [A9_MUX0], 0>, 587 InstrStage<1, [A9_DRegsVFP], 0, Required>, 588 InstrStage<6, [A9_DRegsN], 0, Reserved>, 589 InstrStage<1, [A9_NPipe]>], 590 [5, 1, 1]>, 591 // 592 // Double-precision FP Multiply 593 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 594 InstrStage<1, [A9_MUX0], 0>, 595 InstrStage<1, [A9_DRegsVFP], 0, Required>, 596 InstrStage<7, [A9_DRegsN], 0, Reserved>, 597 InstrStage<2, [A9_NPipe]>], 598 [6, 1, 1]>, 599 // 600 // Single-precision FP MAC 601 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 602 InstrStage<1, [A9_MUX0], 0>, 603 InstrStage<1, [A9_DRegsVFP], 0, Required>, 604 InstrStage<9, [A9_DRegsN], 0, Reserved>, 605 InstrStage<1, [A9_NPipe]>], 606 [8, 1, 1, 1]>, 607 // 608 // Double-precision FP MAC 609 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 610 InstrStage<1, [A9_MUX0], 0>, 611 InstrStage<1, [A9_DRegsVFP], 0, Required>, 612 InstrStage<10, [A9_DRegsN], 0, Reserved>, 613 InstrStage<2, [A9_NPipe]>], 614 [9, 1, 1, 1]>, 615 // 616 // Single-precision Fused FP MAC 617 InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 618 InstrStage<1, [A9_MUX0], 0>, 619 InstrStage<1, [A9_DRegsVFP], 0, Required>, 620 InstrStage<9, [A9_DRegsN], 0, Reserved>, 621 InstrStage<1, [A9_NPipe]>], 622 [8, 1, 1, 1]>, 623 // 624 // Double-precision Fused FP MAC 625 InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 626 InstrStage<1, [A9_MUX0], 0>, 627 InstrStage<1, [A9_DRegsVFP], 0, Required>, 628 InstrStage<10, [A9_DRegsN], 0, Reserved>, 629 InstrStage<2, [A9_NPipe]>], 630 [9, 1, 1, 1]>, 631 // 632 // Single-precision FP DIV 633 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 634 InstrStage<1, [A9_MUX0], 0>, 635 InstrStage<1, [A9_DRegsVFP], 0, Required>, 636 InstrStage<16, [A9_DRegsN], 0, Reserved>, 637 InstrStage<10, [A9_NPipe]>], 638 [15, 1, 1]>, 639 // 640 // Double-precision FP DIV 641 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 642 InstrStage<1, [A9_MUX0], 0>, 643 InstrStage<1, [A9_DRegsVFP], 0, Required>, 644 InstrStage<26, [A9_DRegsN], 0, Reserved>, 645 InstrStage<20, [A9_NPipe]>], 646 [25, 1, 1]>, 647 // 648 // Single-precision FP SQRT 649 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 650 InstrStage<1, [A9_MUX0], 0>, 651 InstrStage<1, [A9_DRegsVFP], 0, Required>, 652 InstrStage<18, [A9_DRegsN], 0, Reserved>, 653 InstrStage<13, [A9_NPipe]>], 654 [17, 1]>, 655 // 656 // Double-precision FP SQRT 657 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 658 InstrStage<1, [A9_MUX0], 0>, 659 InstrStage<1, [A9_DRegsVFP], 0, Required>, 660 InstrStage<33, [A9_DRegsN], 0, Reserved>, 661 InstrStage<28, [A9_NPipe]>], 662 [32, 1]>, 663 664 // 665 // Integer to Single-precision Move 666 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 667 InstrStage<1, [A9_MUX0], 0>, 668 InstrStage<1, [A9_DRegsVFP], 0, Required>, 669 // Extra 1 latency cycle since wbck is 2 cycles 670 InstrStage<3, [A9_DRegsN], 0, Reserved>, 671 InstrStage<1, [A9_NPipe]>], 672 [1, 1]>, 673 // 674 // Integer to Double-precision Move 675 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 676 InstrStage<1, [A9_MUX0], 0>, 677 InstrStage<1, [A9_DRegsVFP], 0, Required>, 678 // Extra 1 latency cycle since wbck is 2 cycles 679 InstrStage<3, [A9_DRegsN], 0, Reserved>, 680 InstrStage<1, [A9_NPipe]>], 681 [1, 1, 1]>, 682 // 683 // Single-precision to Integer Move 684 // 685 // On A9 move-from-VFP is free to issue with no stall if other VFP 686 // operations are in flight. I assume it still can't dual-issue though. 687 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 688 InstrStage<1, [A9_MUX0], 0>], 689 [2, 1]>, 690 // 691 // Double-precision to Integer Move 692 // 693 // On A9 move-from-VFP is free to issue with no stall if other VFP 694 // operations are in flight. I assume it still can't dual-issue though. 695 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 696 InstrStage<1, [A9_MUX0], 0>], 697 [2, 1, 1]>, 698 // 699 // Single-precision FP Load 700 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 701 InstrStage<1, [A9_MUX0], 0>, 702 InstrStage<1, [A9_DRegsVFP], 0, Required>, 703 InstrStage<2, [A9_DRegsN], 0, Reserved>, 704 InstrStage<1, [A9_NPipe], 0>, 705 InstrStage<1, [A9_LSUnit]>], 706 [1, 1]>, 707 // 708 // Double-precision FP Load 709 // FIXME: Result latency is 1 if address is 64-bit aligned. 710 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 711 InstrStage<1, [A9_MUX0], 0>, 712 InstrStage<1, [A9_DRegsVFP], 0, Required>, 713 InstrStage<2, [A9_DRegsN], 0, Reserved>, 714 InstrStage<1, [A9_NPipe], 0>, 715 InstrStage<1, [A9_LSUnit]>], 716 [2, 1]>, 717 // 718 // FP Load Multiple 719 // FIXME: assumes 2 doubles which requires 2 LS cycles. 720 InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 721 InstrStage<1, [A9_MUX0], 0>, 722 InstrStage<1, [A9_DRegsVFP], 0, Required>, 723 InstrStage<2, [A9_DRegsN], 0, Reserved>, 724 InstrStage<1, [A9_NPipe], 0>, 725 InstrStage<2, [A9_LSUnit]>], 726 [1, 1, 1, 1], [], -1>, // dynamic uops 727 // 728 // FP Load Multiple + update 729 // FIXME: assumes 2 doubles which requires 2 LS cycles. 730 InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 731 InstrStage<1, [A9_MUX0], 0>, 732 InstrStage<1, [A9_DRegsVFP], 0, Required>, 733 InstrStage<2, [A9_DRegsN], 0, Reserved>, 734 InstrStage<1, [A9_NPipe], 0>, 735 InstrStage<2, [A9_LSUnit]>], 736 [2, 1, 1, 1], [], -1>, // dynamic uops 737 // 738 // Single-precision FP Store 739 InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 740 InstrStage<1, [A9_MUX0], 0>, 741 InstrStage<1, [A9_DRegsVFP], 0, Required>, 742 InstrStage<2, [A9_DRegsN], 0, Reserved>, 743 InstrStage<1, [A9_NPipe], 0>, 744 InstrStage<1, [A9_LSUnit]>], 745 [1, 1]>, 746 // 747 // Double-precision FP Store 748 InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 749 InstrStage<1, [A9_MUX0], 0>, 750 InstrStage<1, [A9_DRegsVFP], 0, Required>, 751 InstrStage<2, [A9_DRegsN], 0, Reserved>, 752 InstrStage<1, [A9_NPipe], 0>, 753 InstrStage<1, [A9_LSUnit]>], 754 [1, 1]>, 755 // 756 // FP Store Multiple 757 // FIXME: assumes 2 doubles which requires 2 LS cycles. 758 InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 759 InstrStage<1, [A9_MUX0], 0>, 760 InstrStage<1, [A9_DRegsVFP], 0, Required>, 761 InstrStage<2, [A9_DRegsN], 0, Reserved>, 762 InstrStage<1, [A9_NPipe], 0>, 763 InstrStage<2, [A9_LSUnit]>], 764 [1, 1, 1, 1], [], -1>, // dynamic uops 765 // 766 // FP Store Multiple + update 767 // FIXME: assumes 2 doubles which requires 2 LS cycles. 768 InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 769 InstrStage<1, [A9_MUX0], 0>, 770 InstrStage<1, [A9_DRegsVFP], 0, Required>, 771 InstrStage<2, [A9_DRegsN], 0, Reserved>, 772 InstrStage<1, [A9_NPipe], 0>, 773 InstrStage<2, [A9_LSUnit]>], 774 [2, 1, 1, 1], [], -1>, // dynamic uops 775 // NEON 776 // VLD1 777 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 778 InstrStage<1, [A9_MUX0], 0>, 779 InstrStage<1, [A9_DRegsN], 0, Required>, 780 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 781 InstrStage<1, [A9_NPipe], 0>, 782 InstrStage<1, [A9_LSUnit]>], 783 [1, 1]>, 784 // VLD1x2 785 InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 786 InstrStage<1, [A9_MUX0], 0>, 787 InstrStage<1, [A9_DRegsN], 0, Required>, 788 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 789 InstrStage<1, [A9_NPipe], 0>, 790 InstrStage<1, [A9_LSUnit]>], 791 [1, 1, 1]>, 792 // VLD1x3 793 InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 794 InstrStage<1, [A9_MUX0], 0>, 795 InstrStage<1, [A9_DRegsN], 0, Required>, 796 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 797 InstrStage<2, [A9_NPipe], 0>, 798 InstrStage<2, [A9_LSUnit]>], 799 [1, 1, 2, 1]>, 800 // VLD1x4 801 InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 802 InstrStage<1, [A9_MUX0], 0>, 803 InstrStage<1, [A9_DRegsN], 0, Required>, 804 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 805 InstrStage<2, [A9_NPipe], 0>, 806 InstrStage<2, [A9_LSUnit]>], 807 [1, 1, 2, 2, 1]>, 808 // VLD1u 809 InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 810 InstrStage<1, [A9_MUX0], 0>, 811 InstrStage<1, [A9_DRegsN], 0, Required>, 812 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 813 InstrStage<1, [A9_NPipe], 0>, 814 InstrStage<1, [A9_LSUnit]>], 815 [1, 2, 1]>, 816 // VLD1x2u 817 InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 818 InstrStage<1, [A9_MUX0], 0>, 819 InstrStage<1, [A9_DRegsN], 0, Required>, 820 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 821 InstrStage<1, [A9_NPipe], 0>, 822 InstrStage<1, [A9_LSUnit]>], 823 [1, 1, 2, 1]>, 824 // VLD1x3u 825 InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 826 InstrStage<1, [A9_MUX0], 0>, 827 InstrStage<1, [A9_DRegsN], 0, Required>, 828 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 829 InstrStage<2, [A9_NPipe], 0>, 830 InstrStage<2, [A9_LSUnit]>], 831 [1, 1, 2, 2, 1]>, 832 // VLD1x4u 833 InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 834 InstrStage<1, [A9_MUX0], 0>, 835 InstrStage<1, [A9_DRegsN], 0, Required>, 836 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 837 InstrStage<2, [A9_NPipe], 0>, 838 InstrStage<2, [A9_LSUnit]>], 839 [1, 1, 2, 2, 2, 1]>, 840 // 841 // VLD1ln 842 InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 843 InstrStage<1, [A9_MUX0], 0>, 844 InstrStage<1, [A9_DRegsN], 0, Required>, 845 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 846 InstrStage<2, [A9_NPipe], 0>, 847 InstrStage<2, [A9_LSUnit]>], 848 [3, 1, 1, 1]>, 849 // 850 // VLD1lnu 851 InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 852 InstrStage<1, [A9_MUX0], 0>, 853 InstrStage<1, [A9_DRegsN], 0, Required>, 854 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 855 InstrStage<2, [A9_NPipe], 0>, 856 InstrStage<2, [A9_LSUnit]>], 857 [3, 2, 1, 1, 1, 1]>, 858 // 859 // VLD1dup 860 InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 861 InstrStage<1, [A9_MUX0], 0>, 862 InstrStage<1, [A9_DRegsN], 0, Required>, 863 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 864 InstrStage<1, [A9_NPipe], 0>, 865 InstrStage<1, [A9_LSUnit]>], 866 [2, 1]>, 867 // 868 // VLD1dupu 869 InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 870 InstrStage<1, [A9_MUX0], 0>, 871 InstrStage<1, [A9_DRegsN], 0, Required>, 872 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 873 InstrStage<1, [A9_NPipe], 0>, 874 InstrStage<1, [A9_LSUnit]>], 875 [2, 2, 1, 1]>, 876 // 877 // VLD2 878 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 879 InstrStage<1, [A9_MUX0], 0>, 880 InstrStage<1, [A9_DRegsN], 0, Required>, 881 // Extra latency cycles since wbck is 7 cycles 882 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 883 InstrStage<1, [A9_NPipe], 0>, 884 InstrStage<1, [A9_LSUnit]>], 885 [2, 2, 1]>, 886 // 887 // VLD2x2 888 InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 889 InstrStage<1, [A9_MUX0], 0>, 890 InstrStage<1, [A9_DRegsN], 0, Required>, 891 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 892 InstrStage<2, [A9_NPipe], 0>, 893 InstrStage<2, [A9_LSUnit]>], 894 [2, 3, 2, 3, 1]>, 895 // 896 // VLD2ln 897 InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 898 InstrStage<1, [A9_MUX0], 0>, 899 InstrStage<1, [A9_DRegsN], 0, Required>, 900 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 901 InstrStage<2, [A9_NPipe], 0>, 902 InstrStage<2, [A9_LSUnit]>], 903 [3, 3, 1, 1, 1, 1]>, 904 // 905 // VLD2u 906 InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 907 InstrStage<1, [A9_MUX0], 0>, 908 InstrStage<1, [A9_DRegsN], 0, Required>, 909 // Extra latency cycles since wbck is 7 cycles 910 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 911 InstrStage<1, [A9_NPipe], 0>, 912 InstrStage<1, [A9_LSUnit]>], 913 [2, 2, 2, 1, 1, 1]>, 914 // 915 // VLD2x2u 916 InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 917 InstrStage<1, [A9_MUX0], 0>, 918 InstrStage<1, [A9_DRegsN], 0, Required>, 919 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 920 InstrStage<2, [A9_NPipe], 0>, 921 InstrStage<2, [A9_LSUnit]>], 922 [2, 3, 2, 3, 2, 1]>, 923 // 924 // VLD2lnu 925 InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 926 InstrStage<1, [A9_MUX0], 0>, 927 InstrStage<1, [A9_DRegsN], 0, Required>, 928 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 929 InstrStage<2, [A9_NPipe], 0>, 930 InstrStage<2, [A9_LSUnit]>], 931 [3, 3, 2, 1, 1, 1, 1, 1]>, 932 // 933 // VLD2dup 934 InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 935 InstrStage<1, [A9_MUX0], 0>, 936 InstrStage<1, [A9_DRegsN], 0, Required>, 937 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 938 InstrStage<1, [A9_NPipe], 0>, 939 InstrStage<1, [A9_LSUnit]>], 940 [2, 2, 1]>, 941 // 942 // VLD2dupu 943 InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 944 InstrStage<1, [A9_MUX0], 0>, 945 InstrStage<1, [A9_DRegsN], 0, Required>, 946 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 947 InstrStage<1, [A9_NPipe], 0>, 948 InstrStage<1, [A9_LSUnit]>], 949 [2, 2, 2, 1, 1]>, 950 // 951 // VLD3 952 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 953 InstrStage<1, [A9_MUX0], 0>, 954 InstrStage<1, [A9_DRegsN], 0, Required>, 955 InstrStage<9,[A9_DRegsVFP], 0, Reserved>, 956 InstrStage<3, [A9_NPipe], 0>, 957 InstrStage<3, [A9_LSUnit]>], 958 [3, 3, 4, 1]>, 959 // 960 // VLD3ln 961 InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 962 InstrStage<1, [A9_MUX0], 0>, 963 InstrStage<1, [A9_DRegsN], 0, Required>, 964 InstrStage<11,[A9_DRegsVFP], 0, Reserved>, 965 InstrStage<5, [A9_NPipe], 0>, 966 InstrStage<5, [A9_LSUnit]>], 967 [5, 5, 6, 1, 1, 1, 1, 2]>, 968 // 969 // VLD3u 970 InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 971 InstrStage<1, [A9_MUX0], 0>, 972 InstrStage<1, [A9_DRegsN], 0, Required>, 973 InstrStage<9,[A9_DRegsVFP], 0, Reserved>, 974 InstrStage<3, [A9_NPipe], 0>, 975 InstrStage<3, [A9_LSUnit]>], 976 [3, 3, 4, 2, 1]>, 977 // 978 // VLD3lnu 979 InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 980 InstrStage<1, [A9_MUX0], 0>, 981 InstrStage<1, [A9_DRegsN], 0, Required>, 982 InstrStage<11,[A9_DRegsVFP], 0, Reserved>, 983 InstrStage<5, [A9_NPipe], 0>, 984 InstrStage<5, [A9_LSUnit]>], 985 [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>, 986 // 987 // VLD3dup 988 InstrItinData<IIC_VLD3dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 989 InstrStage<1, [A9_MUX0], 0>, 990 InstrStage<1, [A9_DRegsN], 0, Required>, 991 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 992 InstrStage<3, [A9_NPipe], 0>, 993 InstrStage<3, [A9_LSUnit]>], 994 [3, 3, 4, 1]>, 995 // 996 // VLD3dupu 997 InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 998 InstrStage<1, [A9_MUX0], 0>, 999 InstrStage<1, [A9_DRegsN], 0, Required>, 1000 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1001 InstrStage<3, [A9_NPipe], 0>, 1002 InstrStage<3, [A9_LSUnit]>], 1003 [3, 3, 4, 2, 1, 1]>, 1004 // 1005 // VLD4 1006 InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1007 InstrStage<1, [A9_MUX0], 0>, 1008 InstrStage<1, [A9_DRegsN], 0, Required>, 1009 InstrStage<9,[A9_DRegsVFP], 0, Reserved>, 1010 InstrStage<3, [A9_NPipe], 0>, 1011 InstrStage<3, [A9_LSUnit]>], 1012 [3, 3, 4, 4, 1]>, 1013 // 1014 // VLD4ln 1015 InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1016 InstrStage<1, [A9_MUX0], 0>, 1017 InstrStage<1, [A9_DRegsN], 0, Required>, 1018 InstrStage<10,[A9_DRegsVFP], 0, Reserved>, 1019 InstrStage<4, [A9_NPipe], 0>, 1020 InstrStage<4, [A9_LSUnit]>], 1021 [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>, 1022 // 1023 // VLD4u 1024 InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1025 InstrStage<1, [A9_MUX0], 0>, 1026 InstrStage<1, [A9_DRegsN], 0, Required>, 1027 InstrStage<9,[A9_DRegsVFP], 0, Reserved>, 1028 InstrStage<3, [A9_NPipe], 0>, 1029 InstrStage<3, [A9_LSUnit]>], 1030 [3, 3, 4, 4, 2, 1]>, 1031 // 1032 // VLD4lnu 1033 InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1034 InstrStage<1, [A9_MUX0], 0>, 1035 InstrStage<1, [A9_DRegsN], 0, Required>, 1036 InstrStage<10,[A9_DRegsVFP], 0, Reserved>, 1037 InstrStage<4, [A9_NPipe], 0>, 1038 InstrStage<4, [A9_LSUnit]>], 1039 [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>, 1040 // 1041 // VLD4dup 1042 InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1043 InstrStage<1, [A9_MUX0], 0>, 1044 InstrStage<1, [A9_DRegsN], 0, Required>, 1045 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1046 InstrStage<2, [A9_NPipe], 0>, 1047 InstrStage<2, [A9_LSUnit]>], 1048 [2, 2, 3, 3, 1]>, 1049 // 1050 // VLD4dupu 1051 InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1052 InstrStage<1, [A9_MUX0], 0>, 1053 InstrStage<1, [A9_DRegsN], 0, Required>, 1054 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1055 InstrStage<2, [A9_NPipe], 0>, 1056 InstrStage<2, [A9_LSUnit]>], 1057 [2, 2, 3, 3, 2, 1, 1]>, 1058 // 1059 // VST1 1060 InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1061 InstrStage<1, [A9_MUX0], 0>, 1062 InstrStage<1, [A9_DRegsN], 0, Required>, 1063 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1064 InstrStage<1, [A9_NPipe], 0>, 1065 InstrStage<1, [A9_LSUnit]>], 1066 [1, 1, 1]>, 1067 // 1068 // VST1x2 1069 InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1070 InstrStage<1, [A9_MUX0], 0>, 1071 InstrStage<1, [A9_DRegsN], 0, Required>, 1072 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1073 InstrStage<1, [A9_NPipe], 0>, 1074 InstrStage<1, [A9_LSUnit]>], 1075 [1, 1, 1, 1]>, 1076 // 1077 // VST1x3 1078 InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1079 InstrStage<1, [A9_MUX0], 0>, 1080 InstrStage<1, [A9_DRegsN], 0, Required>, 1081 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1082 InstrStage<2, [A9_NPipe], 0>, 1083 InstrStage<2, [A9_LSUnit]>], 1084 [1, 1, 1, 1, 2]>, 1085 // 1086 // VST1x4 1087 InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1088 InstrStage<1, [A9_MUX0], 0>, 1089 InstrStage<1, [A9_DRegsN], 0, Required>, 1090 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1091 InstrStage<2, [A9_NPipe], 0>, 1092 InstrStage<2, [A9_LSUnit]>], 1093 [1, 1, 1, 1, 2, 2]>, 1094 // 1095 // VST1u 1096 InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1097 InstrStage<1, [A9_MUX0], 0>, 1098 InstrStage<1, [A9_DRegsN], 0, Required>, 1099 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1100 InstrStage<1, [A9_NPipe], 0>, 1101 InstrStage<1, [A9_LSUnit]>], 1102 [2, 1, 1, 1, 1]>, 1103 // 1104 // VST1x2u 1105 InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1106 InstrStage<1, [A9_MUX0], 0>, 1107 InstrStage<1, [A9_DRegsN], 0, Required>, 1108 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1109 InstrStage<1, [A9_NPipe], 0>, 1110 InstrStage<1, [A9_LSUnit]>], 1111 [2, 1, 1, 1, 1, 1]>, 1112 // 1113 // VST1x3u 1114 InstrItinData<IIC_VST1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1115 InstrStage<1, [A9_MUX0], 0>, 1116 InstrStage<1, [A9_DRegsN], 0, Required>, 1117 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1118 InstrStage<2, [A9_NPipe], 0>, 1119 InstrStage<2, [A9_LSUnit]>], 1120 [2, 1, 1, 1, 1, 1, 2]>, 1121 // 1122 // VST1x4u 1123 InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1124 InstrStage<1, [A9_MUX0], 0>, 1125 InstrStage<1, [A9_DRegsN], 0, Required>, 1126 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1127 InstrStage<2, [A9_NPipe], 0>, 1128 InstrStage<2, [A9_LSUnit]>], 1129 [2, 1, 1, 1, 1, 1, 2, 2]>, 1130 // 1131 // VST1ln 1132 InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1133 InstrStage<1, [A9_MUX0], 0>, 1134 InstrStage<1, [A9_DRegsN], 0, Required>, 1135 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1136 InstrStage<1, [A9_NPipe], 0>, 1137 InstrStage<1, [A9_LSUnit]>], 1138 [1, 1, 1]>, 1139 // 1140 // VST1lnu 1141 InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1142 InstrStage<1, [A9_MUX0], 0>, 1143 InstrStage<1, [A9_DRegsN], 0, Required>, 1144 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1145 InstrStage<1, [A9_NPipe], 0>, 1146 InstrStage<1, [A9_LSUnit]>], 1147 [2, 1, 1, 1, 1]>, 1148 // 1149 // VST2 1150 InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1151 InstrStage<1, [A9_MUX0], 0>, 1152 InstrStage<1, [A9_DRegsN], 0, Required>, 1153 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1154 InstrStage<1, [A9_NPipe], 0>, 1155 InstrStage<1, [A9_LSUnit]>], 1156 [1, 1, 1, 1]>, 1157 // 1158 // VST2x2 1159 InstrItinData<IIC_VST2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1160 InstrStage<1, [A9_MUX0], 0>, 1161 InstrStage<1, [A9_DRegsN], 0, Required>, 1162 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1163 InstrStage<3, [A9_NPipe], 0>, 1164 InstrStage<3, [A9_LSUnit]>], 1165 [1, 1, 1, 1, 2, 2]>, 1166 // 1167 // VST2u 1168 InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1169 InstrStage<1, [A9_MUX0], 0>, 1170 InstrStage<1, [A9_DRegsN], 0, Required>, 1171 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1172 InstrStage<1, [A9_NPipe], 0>, 1173 InstrStage<1, [A9_LSUnit]>], 1174 [2, 1, 1, 1, 1, 1]>, 1175 // 1176 // VST2x2u 1177 InstrItinData<IIC_VST2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1178 InstrStage<1, [A9_MUX0], 0>, 1179 InstrStage<1, [A9_DRegsN], 0, Required>, 1180 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1181 InstrStage<3, [A9_NPipe], 0>, 1182 InstrStage<3, [A9_LSUnit]>], 1183 [2, 1, 1, 1, 1, 1, 2, 2]>, 1184 // 1185 // VST2ln 1186 InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1187 InstrStage<1, [A9_MUX0], 0>, 1188 InstrStage<1, [A9_DRegsN], 0, Required>, 1189 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1190 InstrStage<1, [A9_NPipe], 0>, 1191 InstrStage<1, [A9_LSUnit]>], 1192 [1, 1, 1, 1]>, 1193 // 1194 // VST2lnu 1195 InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1196 InstrStage<1, [A9_MUX0], 0>, 1197 InstrStage<1, [A9_DRegsN], 0, Required>, 1198 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1199 InstrStage<1, [A9_NPipe], 0>, 1200 InstrStage<1, [A9_LSUnit]>], 1201 [2, 1, 1, 1, 1, 1]>, 1202 // 1203 // VST3 1204 InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1205 InstrStage<1, [A9_MUX0], 0>, 1206 InstrStage<1, [A9_DRegsN], 0, Required>, 1207 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1208 InstrStage<2, [A9_NPipe], 0>, 1209 InstrStage<2, [A9_LSUnit]>], 1210 [1, 1, 1, 1, 2]>, 1211 // 1212 // VST3u 1213 InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1214 InstrStage<1, [A9_MUX0], 0>, 1215 InstrStage<1, [A9_DRegsN], 0, Required>, 1216 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1217 InstrStage<2, [A9_NPipe], 0>, 1218 InstrStage<2, [A9_LSUnit]>], 1219 [2, 1, 1, 1, 1, 1, 2]>, 1220 // 1221 // VST3ln 1222 InstrItinData<IIC_VST3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1223 InstrStage<1, [A9_MUX0], 0>, 1224 InstrStage<1, [A9_DRegsN], 0, Required>, 1225 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1226 InstrStage<3, [A9_NPipe], 0>, 1227 InstrStage<3, [A9_LSUnit]>], 1228 [1, 1, 1, 1, 2]>, 1229 // 1230 // VST3lnu 1231 InstrItinData<IIC_VST3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1232 InstrStage<1, [A9_MUX0], 0>, 1233 InstrStage<1, [A9_DRegsN], 0, Required>, 1234 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1235 InstrStage<3, [A9_NPipe], 0>, 1236 InstrStage<3, [A9_LSUnit]>], 1237 [2, 1, 1, 1, 1, 1, 2]>, 1238 // 1239 // VST4 1240 InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1241 InstrStage<1, [A9_MUX0], 0>, 1242 InstrStage<1, [A9_DRegsN], 0, Required>, 1243 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1244 InstrStage<2, [A9_NPipe], 0>, 1245 InstrStage<2, [A9_LSUnit]>], 1246 [1, 1, 1, 1, 2, 2]>, 1247 // 1248 // VST4u 1249 InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1250 InstrStage<1, [A9_MUX0], 0>, 1251 InstrStage<1, [A9_DRegsN], 0, Required>, 1252 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1253 InstrStage<2, [A9_NPipe], 0>, 1254 InstrStage<2, [A9_LSUnit]>], 1255 [2, 1, 1, 1, 1, 1, 2, 2]>, 1256 // 1257 // VST4ln 1258 InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1259 InstrStage<1, [A9_MUX0], 0>, 1260 InstrStage<1, [A9_DRegsN], 0, Required>, 1261 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1262 InstrStage<2, [A9_NPipe], 0>, 1263 InstrStage<2, [A9_LSUnit]>], 1264 [1, 1, 1, 1, 2, 2]>, 1265 // 1266 // VST4lnu 1267 InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1268 InstrStage<1, [A9_MUX0], 0>, 1269 InstrStage<1, [A9_DRegsN], 0, Required>, 1270 InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1271 InstrStage<2, [A9_NPipe], 0>, 1272 InstrStage<2, [A9_LSUnit]>], 1273 [2, 1, 1, 1, 1, 1, 2, 2]>, 1274 1275 // 1276 // Double-register Integer Unary 1277 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1278 InstrStage<1, [A9_MUX0], 0>, 1279 InstrStage<1, [A9_DRegsN], 0, Required>, 1280 // Extra latency cycles since wbck is 6 cycles 1281 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1282 InstrStage<1, [A9_NPipe]>], 1283 [4, 2]>, 1284 // 1285 // Quad-register Integer Unary 1286 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1287 InstrStage<1, [A9_MUX0], 0>, 1288 InstrStage<1, [A9_DRegsN], 0, Required>, 1289 // Extra latency cycles since wbck is 6 cycles 1290 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1291 InstrStage<1, [A9_NPipe]>], 1292 [4, 2]>, 1293 // 1294 // Double-register Integer Q-Unary 1295 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1296 InstrStage<1, [A9_MUX0], 0>, 1297 InstrStage<1, [A9_DRegsN], 0, Required>, 1298 // Extra latency cycles since wbck is 6 cycles 1299 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1300 InstrStage<1, [A9_NPipe]>], 1301 [4, 1]>, 1302 // 1303 // Quad-register Integer CountQ-Unary 1304 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1305 InstrStage<1, [A9_MUX0], 0>, 1306 InstrStage<1, [A9_DRegsN], 0, Required>, 1307 // Extra latency cycles since wbck is 6 cycles 1308 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1309 InstrStage<1, [A9_NPipe]>], 1310 [4, 1]>, 1311 // 1312 // Double-register Integer Binary 1313 InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1314 InstrStage<1, [A9_MUX0], 0>, 1315 InstrStage<1, [A9_DRegsN], 0, Required>, 1316 // Extra latency cycles since wbck is 6 cycles 1317 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1318 InstrStage<1, [A9_NPipe]>], 1319 [3, 2, 2]>, 1320 // 1321 // Quad-register Integer Binary 1322 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1323 InstrStage<1, [A9_MUX0], 0>, 1324 InstrStage<1, [A9_DRegsN], 0, Required>, 1325 // Extra latency cycles since wbck is 6 cycles 1326 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1327 InstrStage<1, [A9_NPipe]>], 1328 [3, 2, 2]>, 1329 // 1330 // Double-register Integer Subtract 1331 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1332 InstrStage<1, [A9_MUX0], 0>, 1333 InstrStage<1, [A9_DRegsN], 0, Required>, 1334 // Extra latency cycles since wbck is 6 cycles 1335 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1336 InstrStage<1, [A9_NPipe]>], 1337 [3, 2, 1]>, 1338 // 1339 // Quad-register Integer Subtract 1340 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1341 InstrStage<1, [A9_MUX0], 0>, 1342 InstrStage<1, [A9_DRegsN], 0, Required>, 1343 // Extra latency cycles since wbck is 6 cycles 1344 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1345 InstrStage<1, [A9_NPipe]>], 1346 [3, 2, 1]>, 1347 // 1348 // Double-register Integer Shift 1349 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1350 InstrStage<1, [A9_MUX0], 0>, 1351 InstrStage<1, [A9_DRegsN], 0, Required>, 1352 // Extra latency cycles since wbck is 6 cycles 1353 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1354 InstrStage<1, [A9_NPipe]>], 1355 [3, 1, 1]>, 1356 // 1357 // Quad-register Integer Shift 1358 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1359 InstrStage<1, [A9_MUX0], 0>, 1360 InstrStage<1, [A9_DRegsN], 0, Required>, 1361 // Extra latency cycles since wbck is 6 cycles 1362 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1363 InstrStage<1, [A9_NPipe]>], 1364 [3, 1, 1]>, 1365 // 1366 // Double-register Integer Shift (4 cycle) 1367 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1368 InstrStage<1, [A9_MUX0], 0>, 1369 InstrStage<1, [A9_DRegsN], 0, Required>, 1370 // Extra latency cycles since wbck is 6 cycles 1371 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1372 InstrStage<1, [A9_NPipe]>], 1373 [4, 1, 1]>, 1374 // 1375 // Quad-register Integer Shift (4 cycle) 1376 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1377 InstrStage<1, [A9_MUX0], 0>, 1378 InstrStage<1, [A9_DRegsN], 0, Required>, 1379 // Extra latency cycles since wbck is 6 cycles 1380 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1381 InstrStage<1, [A9_NPipe]>], 1382 [4, 1, 1]>, 1383 // 1384 // Double-register Integer Binary (4 cycle) 1385 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1386 InstrStage<1, [A9_MUX0], 0>, 1387 InstrStage<1, [A9_DRegsN], 0, Required>, 1388 // Extra latency cycles since wbck is 6 cycles 1389 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1390 InstrStage<1, [A9_NPipe]>], 1391 [4, 2, 2]>, 1392 // 1393 // Quad-register Integer Binary (4 cycle) 1394 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1395 InstrStage<1, [A9_MUX0], 0>, 1396 InstrStage<1, [A9_DRegsN], 0, Required>, 1397 // Extra latency cycles since wbck is 6 cycles 1398 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1399 InstrStage<1, [A9_NPipe]>], 1400 [4, 2, 2]>, 1401 // 1402 // Double-register Integer Subtract (4 cycle) 1403 InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1404 InstrStage<1, [A9_MUX0], 0>, 1405 InstrStage<1, [A9_DRegsN], 0, Required>, 1406 // Extra latency cycles since wbck is 6 cycles 1407 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1408 InstrStage<1, [A9_NPipe]>], 1409 [4, 2, 1]>, 1410 // 1411 // Quad-register Integer Subtract (4 cycle) 1412 InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1413 InstrStage<1, [A9_MUX0], 0>, 1414 InstrStage<1, [A9_DRegsN], 0, Required>, 1415 // Extra latency cycles since wbck is 6 cycles 1416 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1417 InstrStage<1, [A9_NPipe]>], 1418 [4, 2, 1]>, 1419 1420 // 1421 // Double-register Integer Count 1422 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1423 InstrStage<1, [A9_MUX0], 0>, 1424 InstrStage<1, [A9_DRegsN], 0, Required>, 1425 // Extra latency cycles since wbck is 6 cycles 1426 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1427 InstrStage<1, [A9_NPipe]>], 1428 [3, 2, 2]>, 1429 // 1430 // Quad-register Integer Count 1431 // Result written in N3, but that is relative to the last cycle of multicycle, 1432 // so we use 4 for those cases 1433 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1434 InstrStage<1, [A9_MUX0], 0>, 1435 InstrStage<1, [A9_DRegsN], 0, Required>, 1436 // Extra latency cycles since wbck is 7 cycles 1437 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1438 InstrStage<2, [A9_NPipe]>], 1439 [4, 2, 2]>, 1440 // 1441 // Double-register Absolute Difference and Accumulate 1442 InstrItinData<IIC_VABAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1443 InstrStage<1, [A9_MUX0], 0>, 1444 InstrStage<1, [A9_DRegsN], 0, Required>, 1445 // Extra latency cycles since wbck is 6 cycles 1446 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1447 InstrStage<1, [A9_NPipe]>], 1448 [6, 3, 2, 1]>, 1449 // 1450 // Quad-register Absolute Difference and Accumulate 1451 InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1452 InstrStage<1, [A9_MUX0], 0>, 1453 InstrStage<1, [A9_DRegsN], 0, Required>, 1454 // Extra latency cycles since wbck is 6 cycles 1455 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1456 InstrStage<2, [A9_NPipe]>], 1457 [6, 3, 2, 1]>, 1458 // 1459 // Double-register Integer Pair Add Long 1460 InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1461 InstrStage<1, [A9_MUX0], 0>, 1462 InstrStage<1, [A9_DRegsN], 0, Required>, 1463 // Extra latency cycles since wbck is 6 cycles 1464 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1465 InstrStage<1, [A9_NPipe]>], 1466 [6, 3, 1]>, 1467 // 1468 // Quad-register Integer Pair Add Long 1469 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1470 InstrStage<1, [A9_MUX0], 0>, 1471 InstrStage<1, [A9_DRegsN], 0, Required>, 1472 // Extra latency cycles since wbck is 6 cycles 1473 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1474 InstrStage<2, [A9_NPipe]>], 1475 [6, 3, 1]>, 1476 1477 // 1478 // Double-register Integer Multiply (.8, .16) 1479 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1480 InstrStage<1, [A9_MUX0], 0>, 1481 InstrStage<1, [A9_DRegsN], 0, Required>, 1482 // Extra latency cycles since wbck is 6 cycles 1483 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1484 InstrStage<1, [A9_NPipe]>], 1485 [6, 2, 2]>, 1486 // 1487 // Quad-register Integer Multiply (.8, .16) 1488 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1489 InstrStage<1, [A9_MUX0], 0>, 1490 InstrStage<1, [A9_DRegsN], 0, Required>, 1491 // Extra latency cycles since wbck is 7 cycles 1492 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1493 InstrStage<2, [A9_NPipe]>], 1494 [7, 2, 2]>, 1495 1496 // 1497 // Double-register Integer Multiply (.32) 1498 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1499 InstrStage<1, [A9_MUX0], 0>, 1500 InstrStage<1, [A9_DRegsN], 0, Required>, 1501 // Extra latency cycles since wbck is 7 cycles 1502 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1503 InstrStage<2, [A9_NPipe]>], 1504 [7, 2, 1]>, 1505 // 1506 // Quad-register Integer Multiply (.32) 1507 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1508 InstrStage<1, [A9_MUX0], 0>, 1509 InstrStage<1, [A9_DRegsN], 0, Required>, 1510 // Extra latency cycles since wbck is 9 cycles 1511 InstrStage<10, [A9_DRegsVFP], 0, Reserved>, 1512 InstrStage<4, [A9_NPipe]>], 1513 [9, 2, 1]>, 1514 // 1515 // Double-register Integer Multiply-Accumulate (.8, .16) 1516 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1517 InstrStage<1, [A9_MUX0], 0>, 1518 InstrStage<1, [A9_DRegsN], 0, Required>, 1519 // Extra latency cycles since wbck is 6 cycles 1520 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1521 InstrStage<1, [A9_NPipe]>], 1522 [6, 3, 2, 2]>, 1523 // 1524 // Double-register Integer Multiply-Accumulate (.32) 1525 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1526 InstrStage<1, [A9_MUX0], 0>, 1527 InstrStage<1, [A9_DRegsN], 0, Required>, 1528 // Extra latency cycles since wbck is 7 cycles 1529 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1530 InstrStage<2, [A9_NPipe]>], 1531 [7, 3, 2, 1]>, 1532 // 1533 // Quad-register Integer Multiply-Accumulate (.8, .16) 1534 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1535 InstrStage<1, [A9_MUX0], 0>, 1536 InstrStage<1, [A9_DRegsN], 0, Required>, 1537 // Extra latency cycles since wbck is 7 cycles 1538 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1539 InstrStage<2, [A9_NPipe]>], 1540 [7, 3, 2, 2]>, 1541 // 1542 // Quad-register Integer Multiply-Accumulate (.32) 1543 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1544 InstrStage<1, [A9_MUX0], 0>, 1545 InstrStage<1, [A9_DRegsN], 0, Required>, 1546 // Extra latency cycles since wbck is 9 cycles 1547 InstrStage<10, [A9_DRegsVFP], 0, Reserved>, 1548 InstrStage<4, [A9_NPipe]>], 1549 [9, 3, 2, 1]>, 1550 1551 // 1552 // Move 1553 InstrItinData<IIC_VMOV, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1554 InstrStage<1, [A9_MUX0], 0>, 1555 InstrStage<1, [A9_DRegsN], 0, Required>, 1556 InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1557 InstrStage<1, [A9_NPipe]>], 1558 [1,1]>, 1559 // 1560 // Move Immediate 1561 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1562 InstrStage<1, [A9_MUX0], 0>, 1563 InstrStage<1, [A9_DRegsN], 0, Required>, 1564 // Extra latency cycles since wbck is 6 cycles 1565 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1566 InstrStage<1, [A9_NPipe]>], 1567 [3]>, 1568 // 1569 // Double-register Permute Move 1570 InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1571 InstrStage<1, [A9_MUX0], 0>, 1572 InstrStage<1, [A9_DRegsN], 0, Required>, 1573 // Extra latency cycles since wbck is 6 cycles 1574 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1575 InstrStage<1, [A9_NPipe]>], 1576 [2, 1]>, 1577 // 1578 // Quad-register Permute Move 1579 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1580 InstrStage<1, [A9_MUX0], 0>, 1581 InstrStage<1, [A9_DRegsN], 0, Required>, 1582 // Extra latency cycles since wbck is 6 cycles 1583 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1584 InstrStage<1, [A9_NPipe]>], 1585 [2, 1]>, 1586 // 1587 // Integer to Single-precision Move 1588 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1589 InstrStage<1, [A9_MUX0], 0>, 1590 InstrStage<1, [A9_DRegsN], 0, Required>, 1591 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1592 InstrStage<1, [A9_NPipe]>], 1593 [1, 1]>, 1594 // 1595 // Integer to Double-precision Move 1596 InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1597 InstrStage<1, [A9_MUX0], 0>, 1598 InstrStage<1, [A9_DRegsN], 0, Required>, 1599 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1600 InstrStage<1, [A9_NPipe]>], 1601 [1, 1, 1]>, 1602 // 1603 // Single-precision to Integer Move 1604 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1605 InstrStage<1, [A9_MUX0], 0>, 1606 InstrStage<1, [A9_DRegsN], 0, Required>, 1607 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1608 InstrStage<1, [A9_NPipe]>], 1609 [2, 1]>, 1610 // 1611 // Double-precision to Integer Move 1612 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1613 InstrStage<1, [A9_MUX0], 0>, 1614 InstrStage<1, [A9_DRegsN], 0, Required>, 1615 InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1616 InstrStage<1, [A9_NPipe]>], 1617 [2, 2, 1]>, 1618 // 1619 // Integer to Lane Move 1620 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1621 InstrStage<1, [A9_MUX0], 0>, 1622 InstrStage<1, [A9_DRegsN], 0, Required>, 1623 InstrStage<4, [A9_DRegsVFP], 0, Reserved>, 1624 InstrStage<2, [A9_NPipe]>], 1625 [3, 1, 1]>, 1626 1627 // 1628 // Vector narrow move 1629 InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1630 InstrStage<1, [A9_MUX0], 0>, 1631 InstrStage<1, [A9_DRegsN], 0, Required>, 1632 // Extra latency cycles since wbck is 6 cycles 1633 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1634 InstrStage<1, [A9_NPipe]>], 1635 [3, 1]>, 1636 // 1637 // Double-register FP Unary 1638 InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1639 InstrStage<1, [A9_MUX0], 0>, 1640 InstrStage<1, [A9_DRegsN], 0, Required>, 1641 // Extra latency cycles since wbck is 6 cycles 1642 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1643 InstrStage<1, [A9_NPipe]>], 1644 [5, 2]>, 1645 // 1646 // Quad-register FP Unary 1647 // Result written in N5, but that is relative to the last cycle of multicycle, 1648 // so we use 6 for those cases 1649 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1650 InstrStage<1, [A9_MUX0], 0>, 1651 InstrStage<1, [A9_DRegsN], 0, Required>, 1652 // Extra latency cycles since wbck is 7 cycles 1653 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1654 InstrStage<2, [A9_NPipe]>], 1655 [6, 2]>, 1656 // 1657 // Double-register FP Binary 1658 // FIXME: We're using this itin for many instructions and [2, 2] here is too 1659 // optimistic. 1660 InstrItinData<IIC_VBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1661 InstrStage<1, [A9_MUX0], 0>, 1662 InstrStage<1, [A9_DRegsN], 0, Required>, 1663 // Extra latency cycles since wbck is 6 cycles 1664 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1665 InstrStage<1, [A9_NPipe]>], 1666 [5, 2, 2]>, 1667 1668 // 1669 // VPADD, etc. 1670 InstrItinData<IIC_VPBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1671 InstrStage<1, [A9_MUX0], 0>, 1672 InstrStage<1, [A9_DRegsN], 0, Required>, 1673 // Extra latency cycles since wbck is 6 cycles 1674 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1675 InstrStage<1, [A9_NPipe]>], 1676 [5, 1, 1]>, 1677 // 1678 // Double-register FP VMUL 1679 InstrItinData<IIC_VFMULD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1680 InstrStage<1, [A9_MUX0], 0>, 1681 InstrStage<1, [A9_DRegsN], 0, Required>, 1682 // Extra latency cycles since wbck is 6 cycles 1683 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1684 InstrStage<1, [A9_NPipe]>], 1685 [5, 2, 1]>, 1686 // 1687 // Quad-register FP Binary 1688 // Result written in N5, but that is relative to the last cycle of multicycle, 1689 // so we use 6 for those cases 1690 // FIXME: We're using this itin for many instructions and [2, 2] here is too 1691 // optimistic. 1692 InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1693 InstrStage<1, [A9_MUX0], 0>, 1694 InstrStage<1, [A9_DRegsN], 0, Required>, 1695 // Extra latency cycles since wbck is 7 cycles 1696 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1697 InstrStage<2, [A9_NPipe]>], 1698 [6, 2, 2]>, 1699 // 1700 // Quad-register FP VMUL 1701 InstrItinData<IIC_VFMULQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1702 InstrStage<1, [A9_MUX0], 0>, 1703 InstrStage<1, [A9_DRegsN], 0, Required>, 1704 // Extra latency cycles since wbck is 7 cycles 1705 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1706 InstrStage<1, [A9_NPipe]>], 1707 [6, 2, 1]>, 1708 // 1709 // Double-register FP Multiple-Accumulate 1710 InstrItinData<IIC_VMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1711 InstrStage<1, [A9_MUX0], 0>, 1712 InstrStage<1, [A9_DRegsN], 0, Required>, 1713 // Extra latency cycles since wbck is 7 cycles 1714 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1715 InstrStage<2, [A9_NPipe]>], 1716 [6, 3, 2, 1]>, 1717 // 1718 // Quad-register FP Multiple-Accumulate 1719 // Result written in N9, but that is relative to the last cycle of multicycle, 1720 // so we use 10 for those cases 1721 InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1722 InstrStage<1, [A9_MUX0], 0>, 1723 InstrStage<1, [A9_DRegsN], 0, Required>, 1724 // Extra latency cycles since wbck is 9 cycles 1725 InstrStage<10, [A9_DRegsVFP], 0, Reserved>, 1726 InstrStage<4, [A9_NPipe]>], 1727 [8, 4, 2, 1]>, 1728 // 1729 // Double-register Fused FP Multiple-Accumulate 1730 InstrItinData<IIC_VFMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1731 InstrStage<1, [A9_MUX0], 0>, 1732 InstrStage<1, [A9_DRegsN], 0, Required>, 1733 // Extra latency cycles since wbck is 7 cycles 1734 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1735 InstrStage<2, [A9_NPipe]>], 1736 [6, 3, 2, 1]>, 1737 // 1738 // Quad-register Fused FP Multiple-Accumulate 1739 // Result written in N9, but that is relative to the last cycle of multicycle, 1740 // so we use 10 for those cases 1741 InstrItinData<IIC_VFMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1742 InstrStage<1, [A9_MUX0], 0>, 1743 InstrStage<1, [A9_DRegsN], 0, Required>, 1744 // Extra latency cycles since wbck is 9 cycles 1745 InstrStage<10, [A9_DRegsVFP], 0, Reserved>, 1746 InstrStage<4, [A9_NPipe]>], 1747 [8, 4, 2, 1]>, 1748 // 1749 // Double-register Reciprical Step 1750 InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1751 InstrStage<1, [A9_MUX0], 0>, 1752 InstrStage<1, [A9_DRegsN], 0, Required>, 1753 // Extra latency cycles since wbck is 10 cycles 1754 InstrStage<11, [A9_DRegsVFP], 0, Reserved>, 1755 InstrStage<1, [A9_NPipe]>], 1756 [9, 2, 2]>, 1757 // 1758 // Quad-register Reciprical Step 1759 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1760 InstrStage<1, [A9_MUX0], 0>, 1761 InstrStage<1, [A9_DRegsN], 0, Required>, 1762 // Extra latency cycles since wbck is 11 cycles 1763 InstrStage<12, [A9_DRegsVFP], 0, Reserved>, 1764 InstrStage<2, [A9_NPipe]>], 1765 [10, 2, 2]>, 1766 // 1767 // Double-register Permute 1768 InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1769 InstrStage<1, [A9_MUX0], 0>, 1770 InstrStage<1, [A9_DRegsN], 0, Required>, 1771 // Extra latency cycles since wbck is 6 cycles 1772 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1773 InstrStage<1, [A9_NPipe]>], 1774 [2, 2, 1, 1]>, 1775 // 1776 // Quad-register Permute 1777 // Result written in N2, but that is relative to the last cycle of multicycle, 1778 // so we use 3 for those cases 1779 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1780 InstrStage<1, [A9_MUX0], 0>, 1781 InstrStage<1, [A9_DRegsN], 0, Required>, 1782 // Extra latency cycles since wbck is 7 cycles 1783 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1784 InstrStage<2, [A9_NPipe]>], 1785 [3, 3, 1, 1]>, 1786 // 1787 // Quad-register Permute (3 cycle issue) 1788 // Result written in N2, but that is relative to the last cycle of multicycle, 1789 // so we use 4 for those cases 1790 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1791 InstrStage<1, [A9_MUX0], 0>, 1792 InstrStage<1, [A9_DRegsN], 0, Required>, 1793 // Extra latency cycles since wbck is 8 cycles 1794 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1795 InstrStage<3, [A9_NPipe]>], 1796 [4, 4, 1, 1]>, 1797 1798 // 1799 // Double-register VEXT 1800 InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1801 InstrStage<1, [A9_MUX0], 0>, 1802 InstrStage<1, [A9_DRegsN], 0, Required>, 1803 // Extra latency cycles since wbck is 6 cycles 1804 InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1805 InstrStage<1, [A9_NPipe]>], 1806 [2, 1, 1]>, 1807 // 1808 // Quad-register VEXT 1809 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1810 InstrStage<1, [A9_MUX0], 0>, 1811 InstrStage<1, [A9_DRegsN], 0, Required>, 1812 // Extra latency cycles since wbck is 7 cycles 1813 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1814 InstrStage<2, [A9_NPipe]>], 1815 [3, 1, 2]>, 1816 // 1817 // VTB 1818 InstrItinData<IIC_VTB1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1819 InstrStage<1, [A9_MUX0], 0>, 1820 InstrStage<1, [A9_DRegsN], 0, Required>, 1821 // Extra latency cycles since wbck is 7 cycles 1822 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1823 InstrStage<2, [A9_NPipe]>], 1824 [3, 2, 1]>, 1825 InstrItinData<IIC_VTB2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1826 InstrStage<1, [A9_MUX0], 0>, 1827 InstrStage<2, [A9_DRegsN], 0, Required>, 1828 // Extra latency cycles since wbck is 7 cycles 1829 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1830 InstrStage<2, [A9_NPipe]>], 1831 [3, 2, 2, 1]>, 1832 InstrItinData<IIC_VTB3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1833 InstrStage<1, [A9_MUX0], 0>, 1834 InstrStage<2, [A9_DRegsN], 0, Required>, 1835 // Extra latency cycles since wbck is 8 cycles 1836 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1837 InstrStage<3, [A9_NPipe]>], 1838 [4, 2, 2, 3, 1]>, 1839 InstrItinData<IIC_VTB4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1840 InstrStage<1, [A9_MUX0], 0>, 1841 InstrStage<1, [A9_DRegsN], 0, Required>, 1842 // Extra latency cycles since wbck is 8 cycles 1843 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1844 InstrStage<3, [A9_NPipe]>], 1845 [4, 2, 2, 3, 3, 1]>, 1846 // 1847 // VTBX 1848 InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1849 InstrStage<1, [A9_MUX0], 0>, 1850 InstrStage<1, [A9_DRegsN], 0, Required>, 1851 // Extra latency cycles since wbck is 7 cycles 1852 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1853 InstrStage<2, [A9_NPipe]>], 1854 [3, 1, 2, 1]>, 1855 InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1856 InstrStage<1, [A9_MUX0], 0>, 1857 InstrStage<1, [A9_DRegsN], 0, Required>, 1858 // Extra latency cycles since wbck is 7 cycles 1859 InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1860 InstrStage<2, [A9_NPipe]>], 1861 [3, 1, 2, 2, 1]>, 1862 InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1863 InstrStage<1, [A9_MUX0], 0>, 1864 InstrStage<1, [A9_DRegsN], 0, Required>, 1865 // Extra latency cycles since wbck is 8 cycles 1866 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1867 InstrStage<3, [A9_NPipe]>], 1868 [4, 1, 2, 2, 3, 1]>, 1869 InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1870 InstrStage<1, [A9_MUX0], 0>, 1871 InstrStage<1, [A9_DRegsN], 0, Required>, 1872 // Extra latency cycles since wbck is 8 cycles 1873 InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1874 InstrStage<2, [A9_NPipe]>], 1875 [4, 1, 2, 2, 3, 3, 1]> 1876 ]>; 1877 1878 // ===---------------------------------------------------------------------===// 1879 // The following definitions describe the simpler per-operand machine model. 1880 // This works with MachineScheduler and will eventually replace itineraries. 1881 1882 1883 // Cortex-A9 machine model for scheduling and other instruction cost heuristics. 1884 def CortexA9Model : SchedMachineModel { 1885 let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. 1886 let MicroOpBufferSize = 56; // Based on available renamed registers. 1887 let LoadLatency = 2; // Optimistic load latency assuming bypass. 1888 // This is overriden by OperandCycles if the 1889 // Itineraries are queried instead. 1890 let MispredictPenalty = 8; // Based on estimate of pipeline depth. 1891 1892 let Itineraries = CortexA9Itineraries; 1893 } 1894 1895 //===----------------------------------------------------------------------===// 1896 // Define each kind of processor resource and number available. 1897 1898 let SchedModel = CortexA9Model in { 1899 1900 def A9UnitALU : ProcResource<2>; 1901 def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; } 1902 def A9UnitAGU : ProcResource<1>; 1903 def A9UnitLS : ProcResource<1>; 1904 def A9UnitFP : ProcResource<1> { let BufferSize = 0; } 1905 def A9UnitB : ProcResource<1>; 1906 1907 //===----------------------------------------------------------------------===// 1908 // Define scheduler read/write types with their resources and latency on A9. 1909 1910 // Consume an issue slot, but no processor resources. This is useful when all 1911 // other writes associated with the operand have NumMicroOps = 0. 1912 def A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; } 1913 1914 // Write an integer register. 1915 def A9WriteI : SchedWriteRes<[A9UnitALU]>; 1916 // Write an integer shifted-by register 1917 def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; } 1918 1919 // Basic ALU. 1920 def A9WriteALU : SchedWriteRes<[A9UnitALU]>; 1921 // ALU with operand shifted by immediate. 1922 def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; } 1923 // ALU with operand shifted by register. 1924 def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; } 1925 1926 // Multiplication 1927 def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; } 1928 def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5; 1929 let NumMicroOps = 0; } 1930 def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; } 1931 def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4; 1932 let NumMicroOps = 0; } 1933 1934 // Floating-point 1935 // Only one FP or AGU instruction may issue per cycle. We model this 1936 // by having FP instructions consume the AGU resource. 1937 def A9WriteF : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; } 1938 def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; } 1939 def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } 1940 def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } 1941 def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; } 1942 def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } 1943 def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; } 1944 def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; } 1945 def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; } 1946 def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; } 1947 1948 // NEON has an odd mix of latencies. Simply name the write types by latency. 1949 def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; } 1950 def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; } 1951 def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; } 1952 def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; } 1953 def A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } 1954 def A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } 1955 def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; } 1956 def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } 1957 def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; } 1958 1959 // Reserve A9UnitFP for 2 consecutive cycles. 1960 def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { 1961 let Latency = 4; 1962 let ResourceCycles = [2]; 1963 } 1964 def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { 1965 let Latency = 7; 1966 let ResourceCycles = [2]; 1967 } 1968 def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { 1969 let Latency = 9; 1970 let ResourceCycles = [2]; 1971 } 1972 1973 // Branches don't have a def operand but still consume resources. 1974 def A9WriteB : SchedWriteRes<[A9UnitB]>; 1975 1976 // Address generation. 1977 def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; } 1978 1979 // Load Integer. 1980 def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; } 1981 // Load the upper 32-bits using the same micro-op. 1982 def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3; 1983 let NumMicroOps = 0; } 1984 // Offset shifted by register. 1985 def A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; } 1986 // Load (and zero extend) a byte. 1987 def A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; } 1988 def A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; } 1989 1990 // Load or Store Float, aligned. 1991 def A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; } 1992 1993 // Store Integer. 1994 def A9WriteS : SchedWriteRes<[A9UnitLS]>; 1995 1996 //===----------------------------------------------------------------------===// 1997 // Define resources dynamically for load multiple variants. 1998 1999 // Define helpers for extra latency without consuming resources. 2000 def A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; } 2001 foreach NumCycles = 2-8 in { 2002 def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>; 2003 } // foreach NumCycles 2004 2005 // Define address generation sequences and predicates for 8 flavors of LDMs. 2006 foreach NumAddr = 1-8 in { 2007 2008 // Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive 2009 // latency for instructions that generate multiple loads or stores. 2010 def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>; 2011 2012 // Define a predicate to select the LDM based on number of memory addresses. 2013 def A9LMAdr#NumAddr#Pred : 2014 SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>; 2015 2016 } // foreach NumAddr 2017 2018 // Fall-back for unknown LDMs. 2019 def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(MI) == 0">; 2020 2021 // LDM/VLDM/VLDn address generation latency & resources. 2022 // Dynamically select the A9WriteAdrN sequence using a predicate. 2023 def A9WriteLMAdr : SchedWriteVariant<[ 2024 SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>, 2025 SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>, 2026 SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>, 2027 SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>, 2028 SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>, 2029 SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>, 2030 SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>, 2031 SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>, 2032 // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers. 2033 SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>; 2034 2035 // Define LDM Resources. 2036 // These take no issue resource, so they can be combined with other 2037 // writes like WriteB. 2038 // A9WriteLMLo takes a single LS resource and 2 cycles. 2039 def A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2; 2040 let NumMicroOps = 0; } 2041 // Assuming aligned access, the upper half of each pair is free with 2042 // the same latency. 2043 def A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2; 2044 let NumMicroOps = 0; } 2045 // Each A9WriteL#N variant adds N cycles of latency without consuming 2046 // additional resources. 2047 foreach NumAddr = 1-8 in { 2048 def A9WriteL#NumAddr : WriteSequence< 2049 [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; 2050 def A9WriteL#NumAddr#Hi : WriteSequence< 2051 [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; 2052 } 2053 2054 //===----------------------------------------------------------------------===// 2055 // LDM: Load multiple into 32-bit integer registers. 2056 2057 // A9WriteLM variants expand into a pair of writes for each 64-bit 2058 // value loaded. When the number of registers is odd, the last 2059 // A9WriteLnHi is naturally ignored because the instruction has no 2060 // following def operands. These variants take no issue resource, so 2061 // they may need to be part of a WriteSequence that includes A9WriteIssue. 2062 def A9WriteLM : SchedWriteVariant<[ 2063 SchedVar<A9LMAdr1Pred, [A9WriteL1, A9WriteL1Hi]>, 2064 SchedVar<A9LMAdr2Pred, [A9WriteL1, A9WriteL1Hi, 2065 A9WriteL2, A9WriteL2Hi]>, 2066 SchedVar<A9LMAdr3Pred, [A9WriteL1, A9WriteL1Hi, 2067 A9WriteL2, A9WriteL2Hi, 2068 A9WriteL3, A9WriteL3Hi]>, 2069 SchedVar<A9LMAdr4Pred, [A9WriteL1, A9WriteL1Hi, 2070 A9WriteL2, A9WriteL2Hi, 2071 A9WriteL3, A9WriteL3Hi, 2072 A9WriteL4, A9WriteL4Hi]>, 2073 SchedVar<A9LMAdr5Pred, [A9WriteL1, A9WriteL1Hi, 2074 A9WriteL2, A9WriteL2Hi, 2075 A9WriteL3, A9WriteL3Hi, 2076 A9WriteL4, A9WriteL4Hi, 2077 A9WriteL5, A9WriteL5Hi]>, 2078 SchedVar<A9LMAdr6Pred, [A9WriteL1, A9WriteL1Hi, 2079 A9WriteL2, A9WriteL2Hi, 2080 A9WriteL3, A9WriteL3Hi, 2081 A9WriteL4, A9WriteL4Hi, 2082 A9WriteL5, A9WriteL5Hi, 2083 A9WriteL6, A9WriteL6Hi]>, 2084 SchedVar<A9LMAdr7Pred, [A9WriteL1, A9WriteL1Hi, 2085 A9WriteL2, A9WriteL2Hi, 2086 A9WriteL3, A9WriteL3Hi, 2087 A9WriteL4, A9WriteL4Hi, 2088 A9WriteL5, A9WriteL5Hi, 2089 A9WriteL6, A9WriteL6Hi, 2090 A9WriteL7, A9WriteL7Hi]>, 2091 SchedVar<A9LMAdr8Pred, [A9WriteL1, A9WriteL1Hi, 2092 A9WriteL2, A9WriteL2Hi, 2093 A9WriteL3, A9WriteL3Hi, 2094 A9WriteL4, A9WriteL4Hi, 2095 A9WriteL5, A9WriteL5Hi, 2096 A9WriteL6, A9WriteL6Hi, 2097 A9WriteL7, A9WriteL7Hi, 2098 A9WriteL8, A9WriteL8Hi]>, 2099 // For unknown LDMs, define the maximum number of writes, but only 2100 // make the first two consume resources. 2101 SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi, 2102 A9WriteL2, A9WriteL2Hi, 2103 A9WriteL3Hi, A9WriteL3Hi, 2104 A9WriteL4Hi, A9WriteL4Hi, 2105 A9WriteL5Hi, A9WriteL5Hi, 2106 A9WriteL6Hi, A9WriteL6Hi, 2107 A9WriteL7Hi, A9WriteL7Hi, 2108 A9WriteL8Hi, A9WriteL8Hi]>]> { 2109 let Variadic = 1; 2110 } 2111 2112 //===----------------------------------------------------------------------===// 2113 // VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support. 2114 2115 // A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources 2116 // so can be used in WriteSequences for in single-issue instructions that 2117 // encapsulate multiple loads. 2118 def A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { 2119 let Latency = 1; 2120 let NumMicroOps = 0; 2121 } 2122 2123 foreach NumAddr = 1-8 in { 2124 2125 // Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops. 2126 def A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>; 2127 2128 // A9WriteLfp1-8 definitions are statically expanded into a sequence of 2129 // A9WriteLfpOps with additive latency that takes a single issue slot. 2130 // Used directly to describe NEON VLDn. 2131 def A9WriteLfp#NumAddr : WriteSequence< 2132 [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>; 2133 2134 // A9WriteLfp1-8Mov adds a cycle of latency and FP resource for 2135 // permuting loaded values. 2136 def A9WriteLfp#NumAddr#Mov : WriteSequence< 2137 [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>; 2138 2139 } // foreach NumAddr 2140 2141 // Define VLDM/VSTM PreRA resources. 2142 // A9WriteLMfpPreRA are dynamically expanded into the correct 2143 // A9WriteLfp1-8 sequence based on a predicate. This supports the 2144 // preRA VLDM variants in which all 64-bit loads are written to the 2145 // same tuple of either single or double precision registers. 2146 def A9WriteLMfpPreRA : SchedWriteVariant<[ 2147 SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>, 2148 SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>, 2149 SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>, 2150 SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>, 2151 SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>, 2152 SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>, 2153 SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>, 2154 SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>, 2155 // For unknown VLDM/VSTM PreRA, assume 2xS registers. 2156 SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>; 2157 2158 // Define VLDM/VSTM PostRA Resources. 2159 // A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency. 2160 def A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; } 2161 2162 foreach NumAddr = 1-8 in { 2163 2164 // Each A9WriteL#N variant adds N cycles of latency without consuming 2165 // additional resources. 2166 def A9WriteLMfp#NumAddr : WriteSequence< 2167 [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; 2168 2169 // Assuming aligned access, the upper half of each pair is free with 2170 // the same latency. 2171 def A9WriteLMfp#NumAddr#Hi : WriteSequence< 2172 [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; 2173 2174 } // foreach NumAddr 2175 2176 // VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a 2177 // pair of writes for each 64-bit data loaded. When the number of 2178 // registers is odd, the last WriteLMfpnHi is naturally ignored because 2179 // the instruction has no following def operands. 2180 def A9WriteLMfpPostRA : SchedWriteVariant<[ 2181 SchedVar<A9LMAdr1Pred, [A9WriteLMfp1, A9WriteLMfp1Hi]>, 2182 SchedVar<A9LMAdr2Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, 2183 A9WriteLMfp2, A9WriteLMfp2Hi]>, 2184 SchedVar<A9LMAdr3Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, 2185 A9WriteLMfp2, A9WriteLMfp2Hi, 2186 A9WriteLMfp3, A9WriteLMfp3Hi]>, 2187 SchedVar<A9LMAdr4Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, 2188 A9WriteLMfp2, A9WriteLMfp2Hi, 2189 A9WriteLMfp3, A9WriteLMfp3Hi, 2190 A9WriteLMfp4, A9WriteLMfp4Hi]>, 2191 SchedVar<A9LMAdr5Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, 2192 A9WriteLMfp2, A9WriteLMfp2Hi, 2193 A9WriteLMfp3, A9WriteLMfp3Hi, 2194 A9WriteLMfp4, A9WriteLMfp4Hi, 2195 A9WriteLMfp5, A9WriteLMfp5Hi]>, 2196 SchedVar<A9LMAdr6Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, 2197 A9WriteLMfp2, A9WriteLMfp2Hi, 2198 A9WriteLMfp3, A9WriteLMfp3Hi, 2199 A9WriteLMfp4, A9WriteLMfp4Hi, 2200 A9WriteLMfp5, A9WriteLMfp5Hi, 2201 A9WriteLMfp6, A9WriteLMfp6Hi]>, 2202 SchedVar<A9LMAdr7Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, 2203 A9WriteLMfp2, A9WriteLMfp2Hi, 2204 A9WriteLMfp3, A9WriteLMfp3Hi, 2205 A9WriteLMfp4, A9WriteLMfp4Hi, 2206 A9WriteLMfp5, A9WriteLMfp5Hi, 2207 A9WriteLMfp6, A9WriteLMfp6Hi, 2208 A9WriteLMfp7, A9WriteLMfp7Hi]>, 2209 SchedVar<A9LMAdr8Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, 2210 A9WriteLMfp2, A9WriteLMfp2Hi, 2211 A9WriteLMfp3, A9WriteLMfp3Hi, 2212 A9WriteLMfp4, A9WriteLMfp4Hi, 2213 A9WriteLMfp5, A9WriteLMfp5Hi, 2214 A9WriteLMfp6, A9WriteLMfp6Hi, 2215 A9WriteLMfp7, A9WriteLMfp7Hi, 2216 A9WriteLMfp8, A9WriteLMfp8Hi]>, 2217 // For unknown LDMs, define the maximum number of writes, but only 2218 // make the first two consume resources. 2219 SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp1Hi, 2220 A9WriteLMfp2, A9WriteLMfp2Hi, 2221 A9WriteLMfp3Hi, A9WriteLMfp3Hi, 2222 A9WriteLMfp4Hi, A9WriteLMfp4Hi, 2223 A9WriteLMfp5Hi, A9WriteLMfp5Hi, 2224 A9WriteLMfp6Hi, A9WriteLMfp6Hi, 2225 A9WriteLMfp7Hi, A9WriteLMfp7Hi, 2226 A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> { 2227 let Variadic = 1; 2228 } 2229 2230 // Distinguish between our multiple MI-level forms of the same 2231 // VLDM/VSTM instructions. 2232 def A9PreRA : SchedPredicate< 2233 "TargetRegisterInfo::isVirtualRegister(MI->getOperand(0).getReg())">; 2234 def A9PostRA : SchedPredicate< 2235 "TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).getReg())">; 2236 2237 // VLDM represents all destination registers as a single register 2238 // tuple, unlike LDM. So the number of write operands is not variadic. 2239 def A9WriteLMfp : SchedWriteVariant<[ 2240 SchedVar<A9PreRA, [A9WriteLMfpPreRA]>, 2241 SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>; 2242 2243 //===----------------------------------------------------------------------===// 2244 // Resources for other (non LDM/VLDM) Variants. 2245 2246 // These mov immediate writers are unconditionally expanded with 2247 // additive latency. 2248 def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>; 2249 def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>; 2250 def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>; 2251 2252 // Some ALU operations can read loaded integer values one cycle early. 2253 def A9ReadALU : SchedReadAdvance<1, 2254 [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi, 2255 A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4, 2256 A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8, 2257 A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi, 2258 A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>; 2259 2260 // Read types for operands that are unconditionally read in cycle N 2261 // after the instruction issues, decreases producer latency by N-1. 2262 def A9Read2 : SchedReadAdvance<1>; 2263 def A9Read3 : SchedReadAdvance<2>; 2264 def A9Read4 : SchedReadAdvance<3>; 2265 2266 //===----------------------------------------------------------------------===// 2267 // Map itinerary classes to scheduler read/write resources per operand. 2268 // 2269 // For ARM, we piggyback scheduler resources on the Itinerary classes 2270 // to avoid perturbing the existing instruction definitions. 2271 2272 // This table follows the ARM Cortex-A9 Technical Reference Manuals, 2273 // mostly in order. 2274 2275 def :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi, 2276 IIC_iMVNi,IIC_iMVNsi, 2277 IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>; 2278 def :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>; 2279 def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>; 2280 2281 def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>; 2282 def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>; 2283 def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>; 2284 2285 def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>; 2286 def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>; 2287 def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>; 2288 def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>; 2289 def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>; 2290 def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB 2291 def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>; 2292 def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>; 2293 2294 // A9WriteHi ignored for MUL32. 2295 def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32, 2296 IIC_iMUL64,IIC_iMAC64]>; 2297 // FIXME: SMLALxx needs itin classes 2298 def :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>; 2299 2300 // TODO: For floating-point ops, we model the pipeline forwarding 2301 // latencies here. WAW latencies are sometimes longer. 2302 2303 def :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI, 2304 IIC_fpUNA32, IIC_fpUNA64, 2305 IIC_fpCMP32, IIC_fpCMP64]>; 2306 def :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>; 2307 def :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS, 2308 IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI, 2309 IIC_fpALU32, IIC_fpALU64]>; 2310 def :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>; 2311 def :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>; 2312 def :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>; 2313 def :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>; 2314 def :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>; 2315 def :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>; 2316 def :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>; 2317 def :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>; 2318 2319 def :ItinRW<[A9WriteB], [IIC_Br]>; 2320 2321 // A9 PLD is processed in a dedicated unit. 2322 def :ItinRW<[], [IIC_Preload]>; 2323 2324 // Note: We must assume that loads are aligned, since the machine 2325 // model cannot know this statically and A9 ignores alignment hints. 2326 2327 // A9WriteAdr consumes AGU regardless address writeback. But it's 2328 // latency is only relevant for users of an updated address. 2329 def :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r, 2330 IIC_iLoad_iu,IIC_iLoad_ru]>; 2331 def :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>; 2332 def :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r, 2333 IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>; 2334 def :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>; 2335 def :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r, 2336 IIC_iLoad_d_ru]>; 2337 // Store either has no def operands, or the one def for address writeback. 2338 def :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r, 2339 IIC_iStore_iu, IIC_iStore_ru, 2340 IIC_iStore_d_i, IIC_iStore_d_r, 2341 IIC_iStore_d_ru]>; 2342 def :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu, 2343 IIC_iStore_bh_i, IIC_iStore_bh_r, 2344 IIC_iStore_bh_iu, IIC_iStore_bh_ru]>; 2345 def :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>; 2346 2347 // A9WriteML will be expanded into a separate write for each def 2348 // operand. Address generation consumes resources, but A9WriteLMAdr 2349 // is listed after all def operands, so has no effective latency. 2350 // 2351 // Note: A9WriteLM expands into an even number of def operands. The 2352 // actual number of def operands may be less by one. 2353 def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>; 2354 2355 // Load multiple with address writeback has an extra def operand in 2356 // front of the loaded registers. 2357 // 2358 // Reuse the load-multiple variants for store-multiple because the 2359 // resources are identical, For stores only the address writeback 2360 // has a def operand so the WriteL latencies are unused. 2361 def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu, 2362 IIC_iStore_m, 2363 IIC_iStore_mu]>; 2364 def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>; 2365 def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>; 2366 2367 def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>; 2368 2369 def :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>; 2370 def :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>; 2371 def :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64, 2372 IIC_fpStore_m, IIC_fpStore_mu]>; 2373 2374 // Note: Unlike VLDM, VLD1 expects the writeback operand after the 2375 // normal writes. 2376 def :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u, 2377 IIC_VLD1x2, IIC_VLD1x2u]>; 2378 def :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u, 2379 IIC_VLD1x4, IIC_VLD1x4u, 2380 IIC_VLD4dup, IIC_VLD4dupu]>; 2381 def :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu, 2382 IIC_VLD2, IIC_VLD2u, 2383 IIC_VLD2dup, IIC_VLD2dupu]>; 2384 def :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu, 2385 IIC_VLD2x2, IIC_VLD2x2u, 2386 IIC_VLD2ln, IIC_VLD2lnu]>; 2387 def :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u, 2388 IIC_VLD3dup, IIC_VLD3dupu]>; 2389 def :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u, 2390 IIC_VLD4ln, IIC_VLD4lnu]>; 2391 def :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>; 2392 2393 // Vector stores use similar resources to vector loads, so use the 2394 // same write types. The address write must be first for stores with 2395 // address writeback. 2396 def :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u, 2397 IIC_VST1x2, IIC_VST1x2u, 2398 IIC_VST1ln, IIC_VST1lnu, 2399 IIC_VST2, IIC_VST2u, 2400 IIC_VST2x2, IIC_VST2x2u, 2401 IIC_VST2ln, IIC_VST2lnu]>; 2402 def :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u, 2403 IIC_VST1x4, IIC_VST1x4u, 2404 IIC_VST3, IIC_VST3u, 2405 IIC_VST3ln, IIC_VST3lnu, 2406 IIC_VST4, IIC_VST4u, 2407 IIC_VST4ln, IIC_VST4lnu]>; 2408 2409 // NEON moves. 2410 def :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>; 2411 def :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>; 2412 def :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>; 2413 2414 // NEON integer arithmetic 2415 // 2416 // VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL 2417 def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>; 2418 // VSUB/VMVN/VCLSD/VCLZD/VCNTD 2419 def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>; 2420 // VADDL/VSUBL/VNEG are mapped later under IIC_SHLi. 2421 // ... 2422 // VHADD/VRHADD/VQADD/VTST/VADH/VRADH 2423 def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>; 2424 // VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL 2425 def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>; 2426 // VQNEG/VQABS 2427 def :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>; 2428 // VABS 2429 def :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>; 2430 // VPADD/VPADDL are mapped later under IIC_SHLi. 2431 // ... 2432 // VCLSQ/VCLZQ/VCNTQ, takes two cycles. 2433 def :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>; 2434 // VMOVimm/VMVNimm/VORRimm/VBICimm 2435 def :ItinRW<[A9WriteV3], [IIC_VMOVImm]>; 2436 def :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>; 2437 def :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>; 2438 2439 // NEON integer multiply 2440 // 2441 // Note: these don't quite match the timing docs, but they do match 2442 // the original A9 itinerary. 2443 def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>; 2444 def :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>; 2445 def :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>; 2446 def :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>; 2447 def :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>; 2448 def :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>; 2449 def :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>; 2450 def :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>; 2451 2452 // NEON integer shift 2453 // TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles. 2454 def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>; 2455 def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>; 2456 2457 // NEON permute 2458 def :ItinRW<[A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>; 2459 def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2], 2460 [IIC_VPERMQ3, IIC_VEXTQ]>; 2461 def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>; 2462 def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>; 2463 def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>; 2464 def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>; 2465 def :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>; 2466 def :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>; 2467 def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>; 2468 def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3], 2469 [IIC_VTBX4]>; 2470 2471 // NEON floating-point 2472 def :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>; 2473 def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>; 2474 def :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>; 2475 def :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>; 2476 def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>; 2477 def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>; 2478 def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>; 2479 def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>; 2480 2481 // Map SchedRWs that are identical for cortexa9 to existing resources. 2482 def : SchedAlias<WriteALU, A9WriteALU>; 2483 def : SchedAlias<WriteALUsr, A9WriteALUsr>; 2484 def : SchedAlias<WriteALUSsr, A9WriteALUsr>; 2485 def : SchedAlias<ReadALU, A9ReadALU>; 2486 def : SchedAlias<ReadALUsr, A9ReadALU>; 2487 def : InstRW< [WriteALU], 2488 (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr", 2489 "BICrr")>; 2490 def : InstRW< [WriteALUsi], (instregex "ANDrsi", "ORRrsi", "EORrsi", "BICrsi")>; 2491 def : InstRW< [WriteALUsr], (instregex "ANDrsr", "ORRrsr", "EORrsr", "BICrsr")>; 2492 2493 2494 def : SchedAlias<WriteCMP, A9WriteALU>; 2495 def : SchedAlias<WriteCMPsi, A9WriteALU>; 2496 def : SchedAlias<WriteCMPsr, A9WriteALU>; 2497 2498 def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi", 2499 "MOVCCsr")>; 2500 def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>; 2501 def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm", 2502 "MOV_ga_dyn")>; 2503 def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>; 2504 def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>; 2505 2506 def : InstRW< [WriteALU], (instregex "SEL")>; 2507 2508 def : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>; 2509 2510 def : InstRW< [A9WriteM], 2511 (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS", 2512 "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>; 2513 def : InstRW< [A9WriteM, A9WriteMHi], 2514 (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL", 2515 "UMAAL", "SMLALv5", "UMLALv5", "UMAALv5", "SMLALBB", "SMLALBT", "SMLALTB", 2516 "SMLALTT")>; 2517 // FIXME: These instructions used to have NoItinerary. Just copied the one from above. 2518 def : InstRW< [A9WriteM, A9WriteMHi], 2519 (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX", 2520 "SMLSLD", "SMLLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>; 2521 2522 def : InstRW<[A9WriteM16, A9WriteM16Hi], 2523 (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>; 2524 def : InstRW<[A9WriteM16, A9WriteM16Hi], 2525 (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>; 2526 2527 def : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>; 2528 def : InstRW<[A9WriteLsi], (instregex "LDRrs")>; 2529 def : InstRW<[A9WriteLb], 2530 (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB", 2531 "LDRH", "LDRSH", "LDRSB")>; 2532 def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>; 2533 2534 def : WriteRes<WriteDiv, []> { let Latency = 0; } 2535 2536 def : WriteRes<WriteBr, [A9UnitB]>; 2537 def : WriteRes<WriteBrL, [A9UnitB]>; 2538 def : WriteRes<WriteBrTbl, [A9UnitB]>; 2539 def : WriteRes<WritePreLd, []>; 2540 def : SchedAlias<WriteCvtFP, A9WriteF>; 2541 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } 2542 } // SchedModel = CortexA9Model 2543