1 //=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the itinerary class data for the Swift processor.. 11 // 12 //===----------------------------------------------------------------------===// 13 14 // ===---------------------------------------------------------------------===// 15 // This section contains legacy support for itineraries. This is 16 // required until SD and PostRA schedulers are replaced by MachineScheduler. 17 18 def SW_DIS0 : FuncUnit; 19 def SW_DIS1 : FuncUnit; 20 def SW_DIS2 : FuncUnit; 21 22 def SW_ALU0 : FuncUnit; 23 def SW_ALU1 : FuncUnit; 24 def SW_LS : FuncUnit; 25 def SW_IDIV : FuncUnit; 26 def SW_FDIV : FuncUnit; 27 28 // FIXME: Need bypasses. 29 // FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and 30 // IIC_iMOVix2ld better. 31 // FIXME: Model the special immediate shifts that are not microcoded. 32 // FIXME: Do we need to model the fact that uses of r15 in a micro-op force it 33 // to issue on pipe 1? 34 // FIXME: Model the pipelined behavior of CMP / TST instructions. 35 // FIXME: Better model the microcode stages of multiply instructions, especially 36 // conditional variants. 37 // FIXME: Add preload instruction when it is documented. 38 // FIXME: Model non-pipelined nature of FP div / sqrt unit. 39 40 def SwiftItineraries : ProcessorItineraries< 41 [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [ 42 // 43 // Move instructions, unconditional 44 InstrItinData<IIC_iMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 45 InstrStage<1, [SW_ALU0, SW_ALU1]>], 46 [1]>, 47 InstrItinData<IIC_iMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 48 InstrStage<1, [SW_ALU0, SW_ALU1]>], 49 [1]>, 50 InstrItinData<IIC_iMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 51 InstrStage<1, [SW_ALU0, SW_ALU1]>], 52 [1]>, 53 InstrItinData<IIC_iMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 54 InstrStage<1, [SW_ALU0, SW_ALU1]>], 55 [1]>, 56 InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 57 InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 58 InstrStage<1, [SW_ALU0, SW_ALU1]>, 59 InstrStage<1, [SW_ALU0, SW_ALU1]>], 60 [2]>, 61 InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 62 InstrStage<1, [SW_ALU0, SW_ALU1]>, 63 InstrStage<1, [SW_ALU0, SW_ALU1]>, 64 InstrStage<1, [SW_ALU0, SW_ALU1]>], 65 [3]>, 66 InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 67 InstrStage<1, [SW_ALU0, SW_ALU1]>, 68 InstrStage<1, [SW_ALU0, SW_ALU1]>, 69 InstrStage<1, [SW_LS]>], 70 [5]>, 71 // 72 // MVN instructions 73 InstrItinData<IIC_iMVNi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 74 InstrStage<1, [SW_ALU0, SW_ALU1]>], 75 [1]>, 76 InstrItinData<IIC_iMVNr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 77 InstrStage<1, [SW_ALU0, SW_ALU1]>], 78 [1]>, 79 InstrItinData<IIC_iMVNsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 80 InstrStage<1, [SW_ALU0, SW_ALU1]>], 81 [1]>, 82 InstrItinData<IIC_iMVNsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 83 InstrStage<1, [SW_ALU0, SW_ALU1]>], 84 [1]>, 85 // 86 // No operand cycles 87 InstrItinData<IIC_iALUx , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 88 InstrStage<1, [SW_ALU0, SW_ALU1]>]>, 89 // 90 // Binary Instructions that produce a result 91 InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 92 InstrStage<1, [SW_ALU0, SW_ALU1]>], 93 [1, 1]>, 94 InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 95 InstrStage<1, [SW_ALU0, SW_ALU1]>], 96 [1, 1, 1]>, 97 InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 98 InstrStage<1, [SW_ALU0, SW_ALU1]>], 99 [2, 1, 1]>, 100 InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 101 InstrStage<1, [SW_ALU0, SW_ALU1]>], 102 [2, 1, 1]>, 103 InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 104 InstrStage<1, [SW_ALU0, SW_ALU1]>], 105 [2, 1, 1, 1]>, 106 // 107 // Bitwise Instructions that produce a result 108 InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 109 InstrStage<1, [SW_ALU0, SW_ALU1]>], 110 [1, 1]>, 111 InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 112 InstrStage<1, [SW_ALU0, SW_ALU1]>], 113 [1, 1, 1]>, 114 InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 115 InstrStage<1, [SW_ALU0, SW_ALU1]>], 116 [2, 1, 1]>, 117 InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 118 InstrStage<1, [SW_ALU0, SW_ALU1]>], 119 [2, 1, 1, 1]>, 120 // 121 // Unary Instructions that produce a result 122 123 // CLZ, RBIT, etc. 124 InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 125 InstrStage<1, [SW_ALU0, SW_ALU1]>], 126 [1, 1]>, 127 128 // BFC, BFI, UBFX, SBFX 129 InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 130 InstrStage<1, [SW_ALU0, SW_ALU1]>], 131 [2, 1]>, 132 133 // 134 // Zero and sign extension instructions 135 InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 136 InstrStage<1, [SW_ALU0, SW_ALU1]>], 137 [1, 1]>, 138 InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 139 InstrStage<1, [SW_ALU0, SW_ALU1]>], 140 [1, 1, 1]>, 141 InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 142 InstrStage<1, [SW_ALU0, SW_ALU1]>], 143 [1, 1, 1, 1]>, 144 // 145 // Compare instructions 146 InstrItinData<IIC_iCMPi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 147 InstrStage<1, [SW_ALU0, SW_ALU1]>], 148 [1]>, 149 InstrItinData<IIC_iCMPr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 150 InstrStage<1, [SW_ALU0, SW_ALU1]>], 151 [1, 1]>, 152 InstrItinData<IIC_iCMPsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 153 InstrStage<2, [SW_ALU0, SW_ALU1]>], 154 [1, 1]>, 155 InstrItinData<IIC_iCMPsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 156 InstrStage<2, [SW_ALU0, SW_ALU1]>], 157 [1, 1, 1]>, 158 // 159 // Test instructions 160 InstrItinData<IIC_iTSTi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 161 InstrStage<1, [SW_ALU0, SW_ALU1]>], 162 [1]>, 163 InstrItinData<IIC_iTSTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 164 InstrStage<1, [SW_ALU0, SW_ALU1]>], 165 [1, 1]>, 166 InstrItinData<IIC_iTSTsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 167 InstrStage<2, [SW_ALU0, SW_ALU1]>], 168 [1, 1]>, 169 InstrItinData<IIC_iTSTsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 170 InstrStage<2, [SW_ALU0, SW_ALU1]>], 171 [1, 1, 1]>, 172 // 173 // Move instructions, conditional 174 // FIXME: Correctly model the extra input dep on the destination. 175 InstrItinData<IIC_iCMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 176 InstrStage<1, [SW_ALU0, SW_ALU1]>], 177 [1]>, 178 InstrItinData<IIC_iCMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 179 InstrStage<1, [SW_ALU0, SW_ALU1]>], 180 [1, 1]>, 181 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 182 InstrStage<1, [SW_ALU0, SW_ALU1]>], 183 [1, 1]>, 184 InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 185 InstrStage<1, [SW_ALU0, SW_ALU1]>], 186 [2, 1, 1]>, 187 InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 188 InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 189 InstrStage<1, [SW_ALU0, SW_ALU1]>, 190 InstrStage<1, [SW_ALU0, SW_ALU1]>], 191 [2]>, 192 193 // Integer multiply pipeline 194 // 195 InstrItinData<IIC_iMUL16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 196 InstrStage<1, [SW_ALU0]>], 197 [3, 1, 1]>, 198 InstrItinData<IIC_iMAC16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 199 InstrStage<1, [SW_ALU0]>], 200 [3, 1, 1, 1]>, 201 InstrItinData<IIC_iMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 202 InstrStage<1, [SW_ALU0]>], 203 [4, 1, 1]>, 204 InstrItinData<IIC_iMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 205 InstrStage<1, [SW_ALU0]>], 206 [4, 1, 1, 1]>, 207 InstrItinData<IIC_iMUL64 , [InstrStage<1, [SW_DIS0], 0>, 208 InstrStage<1, [SW_DIS1], 0>, 209 InstrStage<1, [SW_DIS2], 0>, 210 InstrStage<1, [SW_ALU0], 1>, 211 InstrStage<1, [SW_ALU0], 3>, 212 InstrStage<1, [SW_ALU0]>], 213 [5, 5, 1, 1]>, 214 InstrItinData<IIC_iMAC64 , [InstrStage<1, [SW_DIS0], 0>, 215 InstrStage<1, [SW_DIS1], 0>, 216 InstrStage<1, [SW_DIS2], 0>, 217 InstrStage<1, [SW_ALU0], 1>, 218 InstrStage<1, [SW_ALU0], 1>, 219 InstrStage<1, [SW_ALU0, SW_ALU1], 3>, 220 InstrStage<1, [SW_ALU0, SW_ALU1]>], 221 [5, 6, 1, 1]>, 222 // 223 // Integer divide 224 InstrItinData<IIC_iDIV , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 225 InstrStage<1, [SW_ALU0], 0>, 226 InstrStage<14, [SW_IDIV]>], 227 [14, 1, 1]>, 228 229 // Integer load pipeline 230 // FIXME: The timings are some rough approximations 231 // 232 // Immediate offset 233 InstrItinData<IIC_iLoad_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 234 InstrStage<1, [SW_LS]>], 235 [3, 1]>, 236 InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 237 InstrStage<1, [SW_LS]>], 238 [3, 1]>, 239 InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>, 240 InstrStage<1, [SW_DIS1], 0>, 241 InstrStage<1, [SW_LS], 1>, 242 InstrStage<1, [SW_LS]>], 243 [3, 4, 1]>, 244 // 245 // Register offset 246 InstrItinData<IIC_iLoad_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 247 InstrStage<1, [SW_LS]>], 248 [3, 1, 1]>, 249 InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 250 InstrStage<1, [SW_LS]>], 251 [3, 1, 1]>, 252 InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>, 253 InstrStage<1, [SW_DIS1], 0>, 254 InstrStage<1, [SW_DIS2], 0>, 255 InstrStage<1, [SW_LS], 1>, 256 InstrStage<1, [SW_LS], 3>, 257 InstrStage<1, [SW_ALU0, SW_ALU1]>], 258 [3, 4, 1, 1]>, 259 // 260 // Scaled register offset 261 InstrItinData<IIC_iLoad_si , [InstrStage<1, [SW_DIS0], 0>, 262 InstrStage<1, [SW_DIS1], 0>, 263 InstrStage<1, [SW_ALU0, SW_ALU1], 2>, 264 InstrStage<1, [SW_LS]>], 265 [5, 1, 1]>, 266 InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>, 267 InstrStage<1, [SW_DIS1], 0>, 268 InstrStage<1, [SW_ALU0, SW_ALU1], 2>, 269 InstrStage<1, [SW_LS]>], 270 [5, 1, 1]>, 271 // 272 // Immediate offset with update 273 InstrItinData<IIC_iLoad_iu , [InstrStage<1, [SW_DIS0], 0>, 274 InstrStage<1, [SW_DIS1], 0>, 275 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 276 InstrStage<1, [SW_LS]>], 277 [3, 1, 1]>, 278 InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>, 279 InstrStage<1, [SW_DIS1], 0>, 280 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 281 InstrStage<1, [SW_LS]>], 282 [3, 1, 1]>, 283 // 284 // Register offset with update 285 InstrItinData<IIC_iLoad_ru , [InstrStage<1, [SW_DIS0], 0>, 286 InstrStage<1, [SW_DIS1], 0>, 287 InstrStage<1, [SW_ALU0], 1>, 288 InstrStage<1, [SW_LS]>], 289 [3, 1, 1, 1]>, 290 InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>, 291 InstrStage<1, [SW_DIS1], 0>, 292 InstrStage<1, [SW_ALU0], 1>, 293 InstrStage<1, [SW_LS]>], 294 [3, 1, 1, 1]>, 295 InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>, 296 InstrStage<1, [SW_DIS1], 0>, 297 InstrStage<1, [SW_DIS2], 0>, 298 InstrStage<1, [SW_ALU0, SW_ALU1], 0>, 299 InstrStage<1, [SW_LS], 3>, 300 InstrStage<1, [SW_LS], 0>, 301 InstrStage<1, [SW_ALU0, SW_ALU1]>], 302 [3, 4, 1, 1]>, 303 // 304 // Scaled register offset with update 305 InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>, 306 InstrStage<1, [SW_DIS1], 0>, 307 InstrStage<1, [SW_DIS2], 0>, 308 InstrStage<1, [SW_ALU0, SW_ALU1], 2>, 309 InstrStage<1, [SW_LS], 3>, 310 InstrStage<1, [SW_ALU0, SW_ALU1]>], 311 [5, 3, 1, 1]>, 312 InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>, 313 InstrStage<1, [SW_DIS1], 0>, 314 InstrStage<1, [SW_DIS2], 0>, 315 InstrStage<1, [SW_ALU0, SW_ALU1], 2>, 316 InstrStage<1, [SW_LS], 0>, 317 InstrStage<1, [SW_ALU0, SW_ALU1]>], 318 [5, 3, 1, 1]>, 319 // 320 // Load multiple, def is the 5th operand. 321 // FIXME: This assumes 3 to 4 registers. 322 InstrItinData<IIC_iLoad_m , [InstrStage<1, [SW_DIS0], 0>, 323 InstrStage<1, [SW_DIS1], 0>, 324 InstrStage<1, [SW_DIS2], 0>, 325 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 326 InstrStage<1, [SW_LS]>], 327 [1, 1, 1, 1, 3], [], -1>, // dynamic uops 328 329 // 330 // Load multiple + update, defs are the 1st and 5th operands. 331 InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>, 332 InstrStage<1, [SW_DIS1], 0>, 333 InstrStage<1, [SW_DIS2], 0>, 334 InstrStage<1, [SW_ALU0, SW_ALU1], 0>, 335 InstrStage<1, [SW_LS], 3>, 336 InstrStage<1, [SW_ALU0, SW_ALU1]>], 337 [2, 1, 1, 1, 3], [], -1>, // dynamic uops 338 // 339 // Load multiple plus branch 340 InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>, 341 InstrStage<1, [SW_DIS1], 0>, 342 InstrStage<1, [SW_DIS2], 0>, 343 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 344 InstrStage<1, [SW_LS]>], 345 [1, 1, 1, 1, 3], [], -1>, // dynamic uops 346 // 347 // Pop, def is the 3rd operand. 348 InstrItinData<IIC_iPop , [InstrStage<1, [SW_DIS0], 0>, 349 InstrStage<1, [SW_DIS1], 0>, 350 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 351 InstrStage<1, [SW_LS]>], 352 [1, 1, 3], [], -1>, // dynamic uops 353 // 354 // Pop + branch, def is the 3rd operand. 355 InstrItinData<IIC_iPop_Br, [InstrStage<1, [SW_DIS0], 0>, 356 InstrStage<1, [SW_DIS1], 0>, 357 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 358 InstrStage<1, [SW_LS]>], 359 [1, 1, 3], [], -1>, // dynamic uops 360 361 // 362 // iLoadi + iALUr for t2LDRpci_pic. 363 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 364 InstrStage<1, [SW_LS], 3>, 365 InstrStage<1, [SW_ALU0, SW_ALU1]>], 366 [4, 1]>, 367 368 // Integer store pipeline 369 /// 370 // Immediate offset 371 InstrItinData<IIC_iStore_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 372 InstrStage<1, [SW_LS]>], 373 [1, 1]>, 374 InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 375 InstrStage<1, [SW_LS]>], 376 [1, 1]>, 377 InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>, 378 InstrStage<1, [SW_DIS1], 0>, 379 InstrStage<1, [SW_DIS2], 0>, 380 InstrStage<1, [SW_LS], 0>, 381 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 382 InstrStage<1, [SW_LS]>], 383 [1, 1]>, 384 // 385 // Register offset 386 InstrItinData<IIC_iStore_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 387 InstrStage<1, [SW_LS]>], 388 [1, 1, 1]>, 389 InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 390 InstrStage<1, [SW_LS]>], 391 [1, 1, 1]>, 392 InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>, 393 InstrStage<1, [SW_DIS1], 0>, 394 InstrStage<1, [SW_DIS2], 0>, 395 InstrStage<1, [SW_LS], 0>, 396 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 397 InstrStage<1, [SW_LS]>], 398 [1, 1, 1]>, 399 // 400 // Scaled register offset 401 InstrItinData<IIC_iStore_si , [InstrStage<1, [SW_DIS0], 0>, 402 InstrStage<1, [SW_DIS1], 0>, 403 InstrStage<1, [SW_ALU0, SW_ALU1], 2>, 404 InstrStage<1, [SW_LS]>], 405 [1, 1, 1]>, 406 InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>, 407 InstrStage<1, [SW_DIS1], 0>, 408 InstrStage<1, [SW_ALU0, SW_ALU1], 2>, 409 InstrStage<1, [SW_LS]>], 410 [1, 1, 1]>, 411 // 412 // Immediate offset with update 413 InstrItinData<IIC_iStore_iu , [InstrStage<1, [SW_DIS0], 0>, 414 InstrStage<1, [SW_DIS1], 0>, 415 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 416 InstrStage<1, [SW_LS]>], 417 [1, 1, 1]>, 418 InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>, 419 InstrStage<1, [SW_DIS1], 0>, 420 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 421 InstrStage<1, [SW_LS]>], 422 [1, 1, 1]>, 423 // 424 // Register offset with update 425 InstrItinData<IIC_iStore_ru , [InstrStage<1, [SW_DIS0], 0>, 426 InstrStage<1, [SW_DIS1], 0>, 427 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 428 InstrStage<1, [SW_LS]>], 429 [1, 1, 1, 1]>, 430 InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>, 431 InstrStage<1, [SW_DIS1], 0>, 432 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 433 InstrStage<1, [SW_LS]>], 434 [1, 1, 1, 1]>, 435 InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>, 436 InstrStage<1, [SW_DIS1], 0>, 437 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 438 InstrStage<1, [SW_LS]>], 439 [1, 1, 1, 1]>, 440 // 441 // Scaled register offset with update 442 InstrItinData<IIC_iStore_siu, [InstrStage<1, [SW_DIS0], 0>, 443 InstrStage<1, [SW_DIS1], 0>, 444 InstrStage<1, [SW_ALU0, SW_ALU1], 2>, 445 InstrStage<1, [SW_LS], 0>, 446 InstrStage<1, [SW_ALU0, SW_ALU1], 1>], 447 [3, 1, 1, 1]>, 448 InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>, 449 InstrStage<1, [SW_DIS1], 0>, 450 InstrStage<1, [SW_ALU0, SW_ALU1], 2>, 451 InstrStage<1, [SW_LS], 0>, 452 InstrStage<1, [SW_ALU0, SW_ALU1], 1>], 453 [3, 1, 1, 1]>, 454 // 455 // Store multiple 456 InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>, 457 InstrStage<1, [SW_DIS1], 0>, 458 InstrStage<1, [SW_DIS2], 0>, 459 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 460 InstrStage<1, [SW_LS], 1>, 461 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 462 InstrStage<1, [SW_LS], 1>, 463 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 464 InstrStage<1, [SW_LS]>], 465 [], [], -1>, // dynamic uops 466 // 467 // Store multiple + update 468 InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>, 469 InstrStage<1, [SW_DIS1], 0>, 470 InstrStage<1, [SW_DIS2], 0>, 471 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 472 InstrStage<1, [SW_LS], 1>, 473 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 474 InstrStage<1, [SW_LS], 1>, 475 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 476 InstrStage<1, [SW_LS]>], 477 [2], [], -1>, // dynamic uops 478 479 // 480 // Preload 481 InstrItinData<IIC_Preload, [InstrStage<1, [SW_DIS0], 0>], [1, 1]>, 482 483 // Branch 484 // 485 // no delay slots, so the latency of a branch is unimportant 486 InstrItinData<IIC_Br , [InstrStage<1, [SW_DIS0], 0>]>, 487 488 // FP Special Register to Integer Register File Move 489 InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 490 InstrStage<1, [SW_ALU0, SW_ALU1]>], 491 [1]>, 492 // 493 // Single-precision FP Unary 494 // 495 // Most floating-point moves get issued on ALU0. 496 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 497 InstrStage<1, [SW_ALU0]>], 498 [2, 1]>, 499 // 500 // Double-precision FP Unary 501 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 502 InstrStage<1, [SW_ALU0]>], 503 [2, 1]>, 504 505 // 506 // Single-precision FP Compare 507 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 508 InstrStage<1, [SW_ALU0]>], 509 [1, 1]>, 510 // 511 // Double-precision FP Compare 512 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 513 InstrStage<1, [SW_ALU0]>], 514 [1, 1]>, 515 // 516 // Single to Double FP Convert 517 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 518 InstrStage<1, [SW_ALU1]>], 519 [4, 1]>, 520 // 521 // Double to Single FP Convert 522 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 523 InstrStage<1, [SW_ALU1]>], 524 [4, 1]>, 525 526 // 527 // Single to Half FP Convert 528 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>, 529 InstrStage<1, [SW_DIS1], 0>, 530 InstrStage<1, [SW_ALU1], 4>, 531 InstrStage<1, [SW_ALU1]>], 532 [6, 1]>, 533 // 534 // Half to Single FP Convert 535 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 536 InstrStage<1, [SW_ALU1]>], 537 [4, 1]>, 538 539 // 540 // Single-Precision FP to Integer Convert 541 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 542 InstrStage<1, [SW_ALU1]>], 543 [4, 1]>, 544 // 545 // Double-Precision FP to Integer Convert 546 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 547 InstrStage<1, [SW_ALU1]>], 548 [4, 1]>, 549 // 550 // Integer to Single-Precision FP Convert 551 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 552 InstrStage<1, [SW_ALU1]>], 553 [4, 1]>, 554 // 555 // Integer to Double-Precision FP Convert 556 InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 557 InstrStage<1, [SW_ALU1]>], 558 [4, 1]>, 559 // 560 // Single-precision FP ALU 561 InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 562 InstrStage<1, [SW_ALU0]>], 563 [2, 1, 1]>, 564 // 565 // Double-precision FP ALU 566 InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 567 InstrStage<1, [SW_ALU0]>], 568 [2, 1, 1]>, 569 // 570 // Single-precision FP Multiply 571 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 572 InstrStage<1, [SW_ALU1]>], 573 [4, 1, 1]>, 574 // 575 // Double-precision FP Multiply 576 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 577 InstrStage<1, [SW_ALU1]>], 578 [6, 1, 1]>, 579 // 580 // Single-precision FP MAC 581 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 582 InstrStage<1, [SW_ALU1]>], 583 [8, 1, 1]>, 584 // 585 // Double-precision FP MAC 586 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 587 InstrStage<1, [SW_ALU1]>], 588 [12, 1, 1]>, 589 // 590 // Single-precision Fused FP MAC 591 InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 592 InstrStage<1, [SW_ALU1]>], 593 [8, 1, 1]>, 594 // 595 // Double-precision Fused FP MAC 596 InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 597 InstrStage<1, [SW_ALU1]>], 598 [12, 1, 1]>, 599 // 600 // Single-precision FP DIV 601 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 602 InstrStage<1, [SW_ALU1], 0>, 603 InstrStage<15, [SW_FDIV]>], 604 [17, 1, 1]>, 605 // 606 // Double-precision FP DIV 607 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 608 InstrStage<1, [SW_ALU1], 0>, 609 InstrStage<30, [SW_FDIV]>], 610 [32, 1, 1]>, 611 // 612 // Single-precision FP SQRT 613 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 614 InstrStage<1, [SW_ALU1], 0>, 615 InstrStage<15, [SW_FDIV]>], 616 [17, 1]>, 617 // 618 // Double-precision FP SQRT 619 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 620 InstrStage<1, [SW_ALU1], 0>, 621 InstrStage<30, [SW_FDIV]>], 622 [32, 1, 1]>, 623 624 // 625 // Integer to Single-precision Move 626 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [SW_DIS0], 0>, 627 InstrStage<1, [SW_DIS1], 0>, 628 InstrStage<1, [SW_LS], 4>, 629 InstrStage<1, [SW_ALU0]>], 630 [6, 1]>, 631 // 632 // Integer to Double-precision Move 633 InstrItinData<IIC_fpMOVID, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 634 InstrStage<1, [SW_LS]>], 635 [4, 1]>, 636 // 637 // Single-precision to Integer Move 638 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 639 InstrStage<1, [SW_LS]>], 640 [3, 1]>, 641 // 642 // Double-precision to Integer Move 643 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [SW_DIS0], 0>, 644 InstrStage<1, [SW_DIS1], 0>, 645 InstrStage<1, [SW_LS], 3>, 646 InstrStage<1, [SW_LS]>], 647 [3, 4, 1]>, 648 // 649 // Single-precision FP Load 650 InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 651 InstrStage<1, [SW_LS]>], 652 [4, 1]>, 653 // 654 // Double-precision FP Load 655 InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 656 InstrStage<1, [SW_LS]>], 657 [4, 1]>, 658 // 659 // FP Load Multiple 660 // FIXME: Assumes a single Q register. 661 InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 662 InstrStage<1, [SW_LS]>], 663 [1, 1, 1, 4], [], -1>, // dynamic uops 664 // 665 // FP Load Multiple + update 666 // FIXME: Assumes a single Q register. 667 InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>, 668 InstrStage<1, [SW_DIS1], 0>, 669 InstrStage<1, [SW_LS], 4>, 670 InstrStage<1, [SW_ALU0, SW_ALU1]>], 671 [2, 1, 1, 1, 4], [], -1>, // dynamic uops 672 // 673 // Single-precision FP Store 674 InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 675 InstrStage<1, [SW_LS]>], 676 [1, 1]>, 677 // 678 // Double-precision FP Store 679 InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 680 InstrStage<1, [SW_LS]>], 681 [1, 1]>, 682 // 683 // FP Store Multiple 684 // FIXME: Assumes a single Q register. 685 InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 686 InstrStage<1, [SW_LS]>], 687 [1, 1, 1], [], -1>, // dynamic uops 688 // 689 // FP Store Multiple + update 690 // FIXME: Assumes a single Q register. 691 InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>, 692 InstrStage<1, [SW_DIS1], 0>, 693 InstrStage<1, [SW_LS], 4>, 694 InstrStage<1, [SW_ALU0, SW_ALU1]>], 695 [2, 1, 1, 1], [], -1>, // dynamic uops 696 // NEON 697 // 698 // Double-register Integer Unary 699 InstrItinData<IIC_VUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 700 InstrStage<1, [SW_ALU0]>], 701 [4, 1]>, 702 // 703 // Quad-register Integer Unary 704 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 705 InstrStage<1, [SW_ALU0]>], 706 [4, 1]>, 707 // 708 // Double-register Integer Q-Unary 709 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 710 InstrStage<1, [SW_ALU0]>], 711 [4, 1]>, 712 // 713 // Quad-register Integer CountQ-Unary 714 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 715 InstrStage<1, [SW_ALU0]>], 716 [4, 1]>, 717 // 718 // Double-register Integer Binary 719 InstrItinData<IIC_VBINiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 720 InstrStage<1, [SW_ALU0]>], 721 [2, 1, 1]>, 722 // 723 // Quad-register Integer Binary 724 InstrItinData<IIC_VBINiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 725 InstrStage<1, [SW_ALU0]>], 726 [2, 1, 1]>, 727 // 728 // Double-register Integer Subtract 729 InstrItinData<IIC_VSUBiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 730 InstrStage<1, [SW_ALU0]>], 731 [2, 1, 1]>, 732 // 733 // Quad-register Integer Subtract 734 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 735 InstrStage<1, [SW_ALU0]>], 736 [2, 1, 1]>, 737 // 738 // Double-register Integer Shift 739 InstrItinData<IIC_VSHLiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 740 InstrStage<1, [SW_ALU0]>], 741 [2, 1, 1]>, 742 // 743 // Quad-register Integer Shift 744 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 745 InstrStage<1, [SW_ALU0]>], 746 [2, 1, 1]>, 747 // 748 // Double-register Integer Shift (4 cycle) 749 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 750 InstrStage<1, [SW_ALU0]>], 751 [4, 1, 1]>, 752 // 753 // Quad-register Integer Shift (4 cycle) 754 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 755 InstrStage<1, [SW_ALU0]>], 756 [4, 1, 1]>, 757 // 758 // Double-register Integer Binary (4 cycle) 759 InstrItinData<IIC_VBINi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 760 InstrStage<1, [SW_ALU0]>], 761 [4, 1, 1]>, 762 // 763 // Quad-register Integer Binary (4 cycle) 764 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 765 InstrStage<1, [SW_ALU0]>], 766 [4, 1, 1]>, 767 // 768 // Double-register Integer Subtract (4 cycle) 769 InstrItinData<IIC_VSUBi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 770 InstrStage<1, [SW_ALU0]>], 771 [4, 1, 1]>, 772 // 773 // Quad-register Integer Subtract (4 cycle) 774 InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 775 InstrStage<1, [SW_ALU0]>], 776 [4, 1, 1]>, 777 778 // 779 // Double-register Integer Count 780 InstrItinData<IIC_VCNTiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 781 InstrStage<1, [SW_ALU0]>], 782 [2, 1, 1]>, 783 // 784 // Quad-register Integer Count 785 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 786 InstrStage<1, [SW_ALU0]>], 787 [2, 1, 1]>, 788 // 789 // Double-register Absolute Difference and Accumulate 790 InstrItinData<IIC_VABAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 791 InstrStage<1, [SW_ALU0]>], 792 [4, 1, 1, 1]>, 793 // 794 // Quad-register Absolute Difference and Accumulate 795 InstrItinData<IIC_VABAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 796 InstrStage<1, [SW_ALU0]>], 797 [4, 1, 1, 1]>, 798 // 799 // Double-register Integer Pair Add Long 800 InstrItinData<IIC_VPALiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 801 InstrStage<1, [SW_ALU0]>], 802 [4, 1, 1]>, 803 // 804 // Quad-register Integer Pair Add Long 805 InstrItinData<IIC_VPALiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 806 InstrStage<1, [SW_ALU0]>], 807 [4, 1, 1]>, 808 809 // 810 // Double-register Integer Multiply (.8, .16) 811 InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 812 InstrStage<1, [SW_ALU1]>], 813 [4, 1, 1]>, 814 // 815 // Quad-register Integer Multiply (.8, .16) 816 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 817 InstrStage<1, [SW_ALU1]>], 818 [4, 1, 1]>, 819 820 // 821 // Double-register Integer Multiply (.32) 822 InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 823 InstrStage<1, [SW_ALU1]>], 824 [4, 1, 1]>, 825 // 826 // Quad-register Integer Multiply (.32) 827 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 828 InstrStage<1, [SW_ALU1]>], 829 [4, 1, 1]>, 830 // 831 // Double-register Integer Multiply-Accumulate (.8, .16) 832 InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 833 InstrStage<1, [SW_ALU1]>], 834 [4, 1, 1, 1]>, 835 // 836 // Double-register Integer Multiply-Accumulate (.32) 837 InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 838 InstrStage<1, [SW_ALU1]>], 839 [4, 1, 1, 1]>, 840 // 841 // Quad-register Integer Multiply-Accumulate (.8, .16) 842 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 843 InstrStage<1, [SW_ALU1]>], 844 [4, 1, 1, 1]>, 845 // 846 // Quad-register Integer Multiply-Accumulate (.32) 847 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 848 InstrStage<1, [SW_ALU1]>], 849 [4, 1, 1, 1]>, 850 851 // 852 // Move 853 InstrItinData<IIC_VMOV, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 854 InstrStage<1, [SW_ALU0]>], 855 [2, 1]>, 856 // 857 // Move Immediate 858 InstrItinData<IIC_VMOVImm, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 859 InstrStage<1, [SW_ALU0]>], 860 [2]>, 861 // 862 // Double-register Permute Move 863 InstrItinData<IIC_VMOVD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 864 InstrStage<1, [SW_ALU1]>], 865 [2, 1]>, 866 // 867 // Quad-register Permute Move 868 InstrItinData<IIC_VMOVQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 869 InstrStage<1, [SW_ALU1]>], 870 [2, 1]>, 871 // 872 // Integer to Single-precision Move 873 InstrItinData<IIC_VMOVIS , [InstrStage<1, [SW_DIS0], 0>, 874 InstrStage<1, [SW_DIS1], 0>, 875 InstrStage<1, [SW_LS], 4>, 876 InstrStage<1, [SW_ALU0]>], 877 [6, 1]>, 878 // 879 // Integer to Double-precision Move 880 InstrItinData<IIC_VMOVID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 881 InstrStage<1, [SW_LS]>], 882 [4, 1, 1]>, 883 // 884 // Single-precision to Integer Move 885 InstrItinData<IIC_VMOVSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 886 InstrStage<1, [SW_LS]>], 887 [3, 1]>, 888 // 889 // Double-precision to Integer Move 890 InstrItinData<IIC_VMOVDI , [InstrStage<1, [SW_DIS0], 0>, 891 InstrStage<1, [SW_DIS1], 0>, 892 InstrStage<1, [SW_LS], 3>, 893 InstrStage<1, [SW_LS]>], 894 [3, 4, 1]>, 895 // 896 // Integer to Lane Move 897 // FIXME: I think this is correct, but it is not clear from the tuning guide. 898 InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>, 899 InstrStage<1, [SW_DIS1], 0>, 900 InstrStage<1, [SW_LS], 4>, 901 InstrStage<1, [SW_ALU0]>], 902 [6, 1]>, 903 904 // 905 // Vector narrow move 906 InstrItinData<IIC_VMOVN, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 907 InstrStage<1, [SW_ALU1]>], 908 [2, 1]>, 909 // 910 // Double-register FP Unary 911 // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here, 912 // and they issue on a different pipeline. 913 InstrItinData<IIC_VUNAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 914 InstrStage<1, [SW_ALU0]>], 915 [2, 1]>, 916 // 917 // Quad-register FP Unary 918 // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here, 919 // and they issue on a different pipeline. 920 InstrItinData<IIC_VUNAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 921 InstrStage<1, [SW_ALU0]>], 922 [2, 1]>, 923 // 924 // Double-register FP Binary 925 // FIXME: We're using this itin for many instructions. 926 InstrItinData<IIC_VBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 927 InstrStage<1, [SW_ALU0]>], 928 [4, 1, 1]>, 929 930 // 931 // VPADD, etc. 932 InstrItinData<IIC_VPBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 933 InstrStage<1, [SW_ALU0]>], 934 [4, 1, 1]>, 935 // 936 // Double-register FP VMUL 937 InstrItinData<IIC_VFMULD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 938 InstrStage<1, [SW_ALU1]>], 939 [4, 1, 1]>, 940 // 941 // Quad-register FP Binary 942 InstrItinData<IIC_VBINQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 943 InstrStage<1, [SW_ALU0]>], 944 [4, 1, 1]>, 945 // 946 // Quad-register FP VMUL 947 InstrItinData<IIC_VFMULQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 948 InstrStage<1, [SW_ALU1]>], 949 [4, 1, 1]>, 950 // 951 // Double-register FP Multiple-Accumulate 952 InstrItinData<IIC_VMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 953 InstrStage<1, [SW_ALU1]>], 954 [8, 1, 1]>, 955 // 956 // Quad-register FP Multiple-Accumulate 957 InstrItinData<IIC_VMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 958 InstrStage<1, [SW_ALU1]>], 959 [8, 1, 1]>, 960 // 961 // Double-register Fused FP Multiple-Accumulate 962 InstrItinData<IIC_VFMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 963 InstrStage<1, [SW_ALU1]>], 964 [8, 1, 1]>, 965 // 966 // Quad-register FusedF P Multiple-Accumulate 967 InstrItinData<IIC_VFMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 968 InstrStage<1, [SW_ALU1]>], 969 [8, 1, 1]>, 970 // 971 // Double-register Reciprical Step 972 InstrItinData<IIC_VRECSD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 973 InstrStage<1, [SW_ALU1]>], 974 [8, 1, 1]>, 975 // 976 // Quad-register Reciprical Step 977 InstrItinData<IIC_VRECSQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 978 InstrStage<1, [SW_ALU1]>], 979 [8, 1, 1]>, 980 // 981 // Double-register Permute 982 // FIXME: The latencies are unclear from the documentation. 983 InstrItinData<IIC_VPERMD, [InstrStage<1, [SW_DIS0], 0>, 984 InstrStage<1, [SW_DIS1], 0>, 985 InstrStage<1, [SW_DIS2], 0>, 986 InstrStage<1, [SW_ALU1], 2>, 987 InstrStage<1, [SW_ALU1], 2>, 988 InstrStage<1, [SW_ALU1]>], 989 [3, 4, 3, 4]>, 990 // 991 // Quad-register Permute 992 // FIXME: The latencies are unclear from the documentation. 993 InstrItinData<IIC_VPERMQ, [InstrStage<1, [SW_DIS0], 0>, 994 InstrStage<1, [SW_DIS1], 0>, 995 InstrStage<1, [SW_DIS2], 0>, 996 InstrStage<1, [SW_ALU1], 2>, 997 InstrStage<1, [SW_ALU1], 2>, 998 InstrStage<1, [SW_ALU1]>], 999 [3, 4, 3, 4]>, 1000 // 1001 // Quad-register Permute (3 cycle issue on A9) 1002 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [SW_DIS0], 0>, 1003 InstrStage<1, [SW_DIS1], 0>, 1004 InstrStage<1, [SW_DIS2], 0>, 1005 InstrStage<1, [SW_ALU1], 2>, 1006 InstrStage<1, [SW_ALU1], 2>, 1007 InstrStage<1, [SW_ALU1]>], 1008 [3, 4, 3, 4]>, 1009 1010 // 1011 // Double-register VEXT 1012 InstrItinData<IIC_VEXTD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 1013 InstrStage<1, [SW_ALU1]>], 1014 [2, 1, 1]>, 1015 // 1016 // Quad-register VEXT 1017 InstrItinData<IIC_VEXTQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 1018 InstrStage<1, [SW_ALU1]>], 1019 [2, 1, 1]>, 1020 // 1021 // VTB 1022 InstrItinData<IIC_VTB1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 1023 InstrStage<1, [SW_ALU1]>], 1024 [2, 1, 1]>, 1025 InstrItinData<IIC_VTB2, [InstrStage<1, [SW_DIS0], 0>, 1026 InstrStage<1, [SW_DIS1], 0>, 1027 InstrStage<1, [SW_ALU1], 2>, 1028 InstrStage<1, [SW_ALU1]>], 1029 [4, 1, 3, 3]>, 1030 InstrItinData<IIC_VTB3, [InstrStage<1, [SW_DIS0], 0>, 1031 InstrStage<1, [SW_DIS1], 0>, 1032 InstrStage<1, [SW_DIS2], 0>, 1033 InstrStage<1, [SW_ALU1], 2>, 1034 InstrStage<1, [SW_ALU1], 2>, 1035 InstrStage<1, [SW_ALU1]>], 1036 [6, 1, 3, 5, 5]>, 1037 InstrItinData<IIC_VTB4, [InstrStage<1, [SW_DIS0], 0>, 1038 InstrStage<1, [SW_DIS1], 0>, 1039 InstrStage<1, [SW_DIS2], 0>, 1040 InstrStage<1, [SW_ALU1], 2>, 1041 InstrStage<1, [SW_ALU1], 2>, 1042 InstrStage<1, [SW_ALU1], 2>, 1043 InstrStage<1, [SW_ALU1]>], 1044 [8, 1, 3, 5, 7, 7]>, 1045 // 1046 // VTBX 1047 InstrItinData<IIC_VTBX1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 1048 InstrStage<1, [SW_ALU1]>], 1049 [2, 1, 1]>, 1050 InstrItinData<IIC_VTBX2, [InstrStage<1, [SW_DIS0], 0>, 1051 InstrStage<1, [SW_DIS1], 0>, 1052 InstrStage<1, [SW_ALU1], 2>, 1053 InstrStage<1, [SW_ALU1]>], 1054 [4, 1, 3, 3]>, 1055 InstrItinData<IIC_VTBX3, [InstrStage<1, [SW_DIS0], 0>, 1056 InstrStage<1, [SW_DIS1], 0>, 1057 InstrStage<1, [SW_DIS2], 0>, 1058 InstrStage<1, [SW_ALU1], 2>, 1059 InstrStage<1, [SW_ALU1], 2>, 1060 InstrStage<1, [SW_ALU1]>], 1061 [6, 1, 3, 5, 5]>, 1062 InstrItinData<IIC_VTBX4, [InstrStage<1, [SW_DIS0], 0>, 1063 InstrStage<1, [SW_DIS1], 0>, 1064 InstrStage<1, [SW_DIS2], 0>, 1065 InstrStage<1, [SW_ALU1], 2>, 1066 InstrStage<1, [SW_ALU1], 2>, 1067 InstrStage<1, [SW_ALU1], 2>, 1068 InstrStage<1, [SW_ALU1]>], 1069 [8, 1, 3, 5, 7, 7]> 1070 ]>; 1071 1072 // ===---------------------------------------------------------------------===// 1073 // This following definitions describe the simple machine model which 1074 // will replace itineraries. 1075 1076 // Swift machine model for scheduling and other instruction cost heuristics. 1077 def SwiftModel : SchedMachineModel { 1078 let IssueWidth = 3; // 3 micro-ops are dispatched per cycle. 1079 let MinLatency = 0; // Data dependencies are allowed within dispatch groups. 1080 let LoadLatency = 3; 1081 1082 let Itineraries = SwiftItineraries; 1083 } 1084 1085 // TODO: Add Swift processor and scheduler resources. 1086