1 //=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the itinerary class data for the Swift processor.. 11 // 12 //===----------------------------------------------------------------------===// 13 14 // ===---------------------------------------------------------------------===// 15 // This section contains legacy support for itineraries. This is 16 // required until SD and PostRA schedulers are replaced by MachineScheduler. 17 18 def SW_DIS0 : FuncUnit; 19 def SW_DIS1 : FuncUnit; 20 def SW_DIS2 : FuncUnit; 21 22 def SW_ALU0 : FuncUnit; 23 def SW_ALU1 : FuncUnit; 24 def SW_LS : FuncUnit; 25 def SW_IDIV : FuncUnit; 26 def SW_FDIV : FuncUnit; 27 28 // FIXME: Need bypasses. 29 // FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and 30 // IIC_iMOVix2ld better. 31 // FIXME: Model the special immediate shifts that are not microcoded. 32 // FIXME: Do we need to model the fact that uses of r15 in a micro-op force it 33 // to issue on pipe 1? 34 // FIXME: Model the pipelined behavior of CMP / TST instructions. 35 // FIXME: Better model the microcode stages of multiply instructions, especially 36 // conditional variants. 37 // FIXME: Add preload instruction when it is documented. 38 // FIXME: Model non-pipelined nature of FP div / sqrt unit. 39 40 def SwiftItineraries : ProcessorItineraries< 41 [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [ 42 // 43 // Move instructions, unconditional 44 InstrItinData<IIC_iMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 45 InstrStage<1, [SW_ALU0, SW_ALU1]>], 46 [1]>, 47 InstrItinData<IIC_iMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 48 InstrStage<1, [SW_ALU0, SW_ALU1]>], 49 [1]>, 50 InstrItinData<IIC_iMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 51 InstrStage<1, [SW_ALU0, SW_ALU1]>], 52 [1]>, 53 InstrItinData<IIC_iMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 54 InstrStage<1, [SW_ALU0, SW_ALU1]>], 55 [1]>, 56 InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 57 InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 58 InstrStage<1, [SW_ALU0, SW_ALU1]>, 59 InstrStage<1, [SW_ALU0, SW_ALU1]>], 60 [2]>, 61 InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 62 InstrStage<1, [SW_ALU0, SW_ALU1]>, 63 InstrStage<1, [SW_ALU0, SW_ALU1]>, 64 InstrStage<1, [SW_ALU0, SW_ALU1]>], 65 [3]>, 66 InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 67 InstrStage<1, [SW_ALU0, SW_ALU1]>, 68 InstrStage<1, [SW_ALU0, SW_ALU1]>, 69 InstrStage<1, [SW_LS]>], 70 [5]>, 71 // 72 // MVN instructions 73 InstrItinData<IIC_iMVNi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 74 InstrStage<1, [SW_ALU0, SW_ALU1]>], 75 [1]>, 76 InstrItinData<IIC_iMVNr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 77 InstrStage<1, [SW_ALU0, SW_ALU1]>], 78 [1]>, 79 InstrItinData<IIC_iMVNsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 80 InstrStage<1, [SW_ALU0, SW_ALU1]>], 81 [1]>, 82 InstrItinData<IIC_iMVNsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 83 InstrStage<1, [SW_ALU0, SW_ALU1]>], 84 [1]>, 85 // 86 // No operand cycles 87 InstrItinData<IIC_iALUx , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 88 InstrStage<1, [SW_ALU0, SW_ALU1]>]>, 89 // 90 // Binary Instructions that produce a result 91 InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 92 InstrStage<1, [SW_ALU0, SW_ALU1]>], 93 [1, 1]>, 94 InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 95 InstrStage<1, [SW_ALU0, SW_ALU1]>], 96 [1, 1, 1]>, 97 InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 98 InstrStage<1, [SW_ALU0, SW_ALU1]>], 99 [2, 1, 1]>, 100 InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 101 InstrStage<1, [SW_ALU0, SW_ALU1]>], 102 [2, 1, 1]>, 103 InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 104 InstrStage<1, [SW_ALU0, SW_ALU1]>], 105 [2, 1, 1, 1]>, 106 // 107 // Bitwise Instructions that produce a result 108 InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 109 InstrStage<1, [SW_ALU0, SW_ALU1]>], 110 [1, 1]>, 111 InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 112 InstrStage<1, [SW_ALU0, SW_ALU1]>], 113 [1, 1, 1]>, 114 InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 115 InstrStage<1, [SW_ALU0, SW_ALU1]>], 116 [2, 1, 1]>, 117 InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 118 InstrStage<1, [SW_ALU0, SW_ALU1]>], 119 [2, 1, 1, 1]>, 120 // 121 // Unary Instructions that produce a result 122 123 // CLZ, RBIT, etc. 124 InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 125 InstrStage<1, [SW_ALU0, SW_ALU1]>], 126 [1, 1]>, 127 128 // BFC, BFI, UBFX, SBFX 129 InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 130 InstrStage<1, [SW_ALU0, SW_ALU1]>], 131 [2, 1]>, 132 133 // 134 // Zero and sign extension instructions 135 InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 136 InstrStage<1, [SW_ALU0, SW_ALU1]>], 137 [1, 1]>, 138 InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 139 InstrStage<1, [SW_ALU0, SW_ALU1]>], 140 [1, 1, 1]>, 141 InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 142 InstrStage<1, [SW_ALU0, SW_ALU1]>], 143 [1, 1, 1, 1]>, 144 // 145 // Compare instructions 146 InstrItinData<IIC_iCMPi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 147 InstrStage<1, [SW_ALU0, SW_ALU1]>], 148 [1]>, 149 InstrItinData<IIC_iCMPr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 150 InstrStage<1, [SW_ALU0, SW_ALU1]>], 151 [1, 1]>, 152 InstrItinData<IIC_iCMPsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 153 InstrStage<2, [SW_ALU0, SW_ALU1]>], 154 [1, 1]>, 155 InstrItinData<IIC_iCMPsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 156 InstrStage<2, [SW_ALU0, SW_ALU1]>], 157 [1, 1, 1]>, 158 // 159 // Test instructions 160 InstrItinData<IIC_iTSTi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 161 InstrStage<1, [SW_ALU0, SW_ALU1]>], 162 [1]>, 163 InstrItinData<IIC_iTSTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 164 InstrStage<1, [SW_ALU0, SW_ALU1]>], 165 [1, 1]>, 166 InstrItinData<IIC_iTSTsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 167 InstrStage<2, [SW_ALU0, SW_ALU1]>], 168 [1, 1]>, 169 InstrItinData<IIC_iTSTsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 170 InstrStage<2, [SW_ALU0, SW_ALU1]>], 171 [1, 1, 1]>, 172 // 173 // Move instructions, conditional 174 // FIXME: Correctly model the extra input dep on the destination. 175 InstrItinData<IIC_iCMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 176 InstrStage<1, [SW_ALU0, SW_ALU1]>], 177 [1]>, 178 InstrItinData<IIC_iCMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 179 InstrStage<1, [SW_ALU0, SW_ALU1]>], 180 [1, 1]>, 181 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 182 InstrStage<1, [SW_ALU0, SW_ALU1]>], 183 [1, 1]>, 184 InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 185 InstrStage<1, [SW_ALU0, SW_ALU1]>], 186 [2, 1, 1]>, 187 InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 188 InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 189 InstrStage<1, [SW_ALU0, SW_ALU1]>, 190 InstrStage<1, [SW_ALU0, SW_ALU1]>], 191 [2]>, 192 193 // Integer multiply pipeline 194 // 195 InstrItinData<IIC_iMUL16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 196 InstrStage<1, [SW_ALU0]>], 197 [3, 1, 1]>, 198 InstrItinData<IIC_iMAC16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 199 InstrStage<1, [SW_ALU0]>], 200 [3, 1, 1, 1]>, 201 InstrItinData<IIC_iMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 202 InstrStage<1, [SW_ALU0]>], 203 [4, 1, 1]>, 204 InstrItinData<IIC_iMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 205 InstrStage<1, [SW_ALU0]>], 206 [4, 1, 1, 1]>, 207 InstrItinData<IIC_iMUL64 , [InstrStage<1, [SW_DIS0], 0>, 208 InstrStage<1, [SW_DIS1], 0>, 209 InstrStage<1, [SW_DIS2], 0>, 210 InstrStage<1, [SW_ALU0], 1>, 211 InstrStage<1, [SW_ALU0], 3>, 212 InstrStage<1, [SW_ALU0]>], 213 [5, 5, 1, 1]>, 214 InstrItinData<IIC_iMAC64 , [InstrStage<1, [SW_DIS0], 0>, 215 InstrStage<1, [SW_DIS1], 0>, 216 InstrStage<1, [SW_DIS2], 0>, 217 InstrStage<1, [SW_ALU0], 1>, 218 InstrStage<1, [SW_ALU0], 1>, 219 InstrStage<1, [SW_ALU0, SW_ALU1], 3>, 220 InstrStage<1, [SW_ALU0, SW_ALU1]>], 221 [5, 6, 1, 1]>, 222 // 223 // Integer divide 224 InstrItinData<IIC_iDIV , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 225 InstrStage<1, [SW_ALU0], 0>, 226 InstrStage<14, [SW_IDIV]>], 227 [14, 1, 1]>, 228 229 // Integer load pipeline 230 // FIXME: The timings are some rough approximations 231 // 232 // Immediate offset 233 InstrItinData<IIC_iLoad_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 234 InstrStage<1, [SW_LS]>], 235 [3, 1]>, 236 InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 237 InstrStage<1, [SW_LS]>], 238 [3, 1]>, 239 InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>, 240 InstrStage<1, [SW_DIS1], 0>, 241 InstrStage<1, [SW_LS], 1>, 242 InstrStage<1, [SW_LS]>], 243 [3, 4, 1]>, 244 // 245 // Register offset 246 InstrItinData<IIC_iLoad_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 247 InstrStage<1, [SW_LS]>], 248 [3, 1, 1]>, 249 InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 250 InstrStage<1, [SW_LS]>], 251 [3, 1, 1]>, 252 InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>, 253 InstrStage<1, [SW_DIS1], 0>, 254 InstrStage<1, [SW_DIS2], 0>, 255 InstrStage<1, [SW_LS], 1>, 256 InstrStage<1, [SW_LS], 3>, 257 InstrStage<1, [SW_ALU0, SW_ALU1]>], 258 [3, 4, 1, 1]>, 259 // 260 // Scaled register offset 261 InstrItinData<IIC_iLoad_si , [InstrStage<1, [SW_DIS0], 0>, 262 InstrStage<1, [SW_DIS1], 0>, 263 InstrStage<1, [SW_ALU0, SW_ALU1], 2>, 264 InstrStage<1, [SW_LS]>], 265 [5, 1, 1]>, 266 InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>, 267 InstrStage<1, [SW_DIS1], 0>, 268 InstrStage<1, [SW_ALU0, SW_ALU1], 2>, 269 InstrStage<1, [SW_LS]>], 270 [5, 1, 1]>, 271 // 272 // Immediate offset with update 273 InstrItinData<IIC_iLoad_iu , [InstrStage<1, [SW_DIS0], 0>, 274 InstrStage<1, [SW_DIS1], 0>, 275 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 276 InstrStage<1, [SW_LS]>], 277 [3, 1, 1]>, 278 InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>, 279 InstrStage<1, [SW_DIS1], 0>, 280 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 281 InstrStage<1, [SW_LS]>], 282 [3, 1, 1]>, 283 // 284 // Register offset with update 285 InstrItinData<IIC_iLoad_ru , [InstrStage<1, [SW_DIS0], 0>, 286 InstrStage<1, [SW_DIS1], 0>, 287 InstrStage<1, [SW_ALU0], 1>, 288 InstrStage<1, [SW_LS]>], 289 [3, 1, 1, 1]>, 290 InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>, 291 InstrStage<1, [SW_DIS1], 0>, 292 InstrStage<1, [SW_ALU0], 1>, 293 InstrStage<1, [SW_LS]>], 294 [3, 1, 1, 1]>, 295 InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>, 296 InstrStage<1, [SW_DIS1], 0>, 297 InstrStage<1, [SW_DIS2], 0>, 298 InstrStage<1, [SW_ALU0, SW_ALU1], 0>, 299 InstrStage<1, [SW_LS], 3>, 300 InstrStage<1, [SW_LS], 0>, 301 InstrStage<1, [SW_ALU0, SW_ALU1]>], 302 [3, 4, 1, 1]>, 303 // 304 // Scaled register offset with update 305 InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>, 306 InstrStage<1, [SW_DIS1], 0>, 307 InstrStage<1, [SW_DIS2], 0>, 308 InstrStage<1, [SW_ALU0, SW_ALU1], 2>, 309 InstrStage<1, [SW_LS], 3>, 310 InstrStage<1, [SW_ALU0, SW_ALU1]>], 311 [5, 3, 1, 1]>, 312 InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>, 313 InstrStage<1, [SW_DIS1], 0>, 314 InstrStage<1, [SW_DIS2], 0>, 315 InstrStage<1, [SW_ALU0, SW_ALU1], 2>, 316 InstrStage<1, [SW_LS], 0>, 317 InstrStage<1, [SW_ALU0, SW_ALU1]>], 318 [5, 3, 1, 1]>, 319 // 320 // Load multiple, def is the 5th operand. 321 // FIXME: This assumes 3 to 4 registers. 322 InstrItinData<IIC_iLoad_m , [InstrStage<1, [SW_DIS0], 0>, 323 InstrStage<1, [SW_DIS1], 0>, 324 InstrStage<1, [SW_DIS2], 0>, 325 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 326 InstrStage<1, [SW_LS]>], 327 [1, 1, 1, 1, 3], [], -1>, // dynamic uops 328 329 // 330 // Load multiple + update, defs are the 1st and 5th operands. 331 InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>, 332 InstrStage<1, [SW_DIS1], 0>, 333 InstrStage<1, [SW_DIS2], 0>, 334 InstrStage<1, [SW_ALU0, SW_ALU1], 0>, 335 InstrStage<1, [SW_LS], 3>, 336 InstrStage<1, [SW_ALU0, SW_ALU1]>], 337 [2, 1, 1, 1, 3], [], -1>, // dynamic uops 338 // 339 // Load multiple plus branch 340 InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>, 341 InstrStage<1, [SW_DIS1], 0>, 342 InstrStage<1, [SW_DIS2], 0>, 343 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 344 InstrStage<1, [SW_LS]>], 345 [1, 1, 1, 1, 3], [], -1>, // dynamic uops 346 // 347 // Pop, def is the 3rd operand. 348 InstrItinData<IIC_iPop , [InstrStage<1, [SW_DIS0], 0>, 349 InstrStage<1, [SW_DIS1], 0>, 350 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 351 InstrStage<1, [SW_LS]>], 352 [1, 1, 3], [], -1>, // dynamic uops 353 // 354 // Pop + branch, def is the 3rd operand. 355 InstrItinData<IIC_iPop_Br, [InstrStage<1, [SW_DIS0], 0>, 356 InstrStage<1, [SW_DIS1], 0>, 357 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 358 InstrStage<1, [SW_LS]>], 359 [1, 1, 3], [], -1>, // dynamic uops 360 361 // 362 // iLoadi + iALUr for t2LDRpci_pic. 363 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 364 InstrStage<1, [SW_LS], 3>, 365 InstrStage<1, [SW_ALU0, SW_ALU1]>], 366 [4, 1]>, 367 368 // Integer store pipeline 369 /// 370 // Immediate offset 371 InstrItinData<IIC_iStore_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 372 InstrStage<1, [SW_LS]>], 373 [1, 1]>, 374 InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 375 InstrStage<1, [SW_LS]>], 376 [1, 1]>, 377 InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>, 378 InstrStage<1, [SW_DIS1], 0>, 379 InstrStage<1, [SW_DIS2], 0>, 380 InstrStage<1, [SW_LS], 0>, 381 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 382 InstrStage<1, [SW_LS]>], 383 [1, 1]>, 384 // 385 // Register offset 386 InstrItinData<IIC_iStore_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 387 InstrStage<1, [SW_LS]>], 388 [1, 1, 1]>, 389 InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 390 InstrStage<1, [SW_LS]>], 391 [1, 1, 1]>, 392 InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>, 393 InstrStage<1, [SW_DIS1], 0>, 394 InstrStage<1, [SW_DIS2], 0>, 395 InstrStage<1, [SW_LS], 0>, 396 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 397 InstrStage<1, [SW_LS]>], 398 [1, 1, 1]>, 399 // 400 // Scaled register offset 401 InstrItinData<IIC_iStore_si , [InstrStage<1, [SW_DIS0], 0>, 402 InstrStage<1, [SW_DIS1], 0>, 403 InstrStage<1, [SW_ALU0, SW_ALU1], 2>, 404 InstrStage<1, [SW_LS]>], 405 [1, 1, 1]>, 406 InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>, 407 InstrStage<1, [SW_DIS1], 0>, 408 InstrStage<1, [SW_ALU0, SW_ALU1], 2>, 409 InstrStage<1, [SW_LS]>], 410 [1, 1, 1]>, 411 // 412 // Immediate offset with update 413 InstrItinData<IIC_iStore_iu , [InstrStage<1, [SW_DIS0], 0>, 414 InstrStage<1, [SW_DIS1], 0>, 415 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 416 InstrStage<1, [SW_LS]>], 417 [1, 1, 1]>, 418 InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>, 419 InstrStage<1, [SW_DIS1], 0>, 420 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 421 InstrStage<1, [SW_LS]>], 422 [1, 1, 1]>, 423 // 424 // Register offset with update 425 InstrItinData<IIC_iStore_ru , [InstrStage<1, [SW_DIS0], 0>, 426 InstrStage<1, [SW_DIS1], 0>, 427 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 428 InstrStage<1, [SW_LS]>], 429 [1, 1, 1, 1]>, 430 InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>, 431 InstrStage<1, [SW_DIS1], 0>, 432 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 433 InstrStage<1, [SW_LS]>], 434 [1, 1, 1, 1]>, 435 InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>, 436 InstrStage<1, [SW_DIS1], 0>, 437 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 438 InstrStage<1, [SW_LS]>], 439 [1, 1, 1, 1]>, 440 // 441 // Scaled register offset with update 442 InstrItinData<IIC_iStore_siu, [InstrStage<1, [SW_DIS0], 0>, 443 InstrStage<1, [SW_DIS1], 0>, 444 InstrStage<1, [SW_ALU0, SW_ALU1], 2>, 445 InstrStage<1, [SW_LS], 0>, 446 InstrStage<1, [SW_ALU0, SW_ALU1], 1>], 447 [3, 1, 1, 1]>, 448 InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>, 449 InstrStage<1, [SW_DIS1], 0>, 450 InstrStage<1, [SW_ALU0, SW_ALU1], 2>, 451 InstrStage<1, [SW_LS], 0>, 452 InstrStage<1, [SW_ALU0, SW_ALU1], 1>], 453 [3, 1, 1, 1]>, 454 // 455 // Store multiple 456 InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>, 457 InstrStage<1, [SW_DIS1], 0>, 458 InstrStage<1, [SW_DIS2], 0>, 459 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 460 InstrStage<1, [SW_LS], 1>, 461 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 462 InstrStage<1, [SW_LS], 1>, 463 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 464 InstrStage<1, [SW_LS]>], 465 [], [], -1>, // dynamic uops 466 // 467 // Store multiple + update 468 InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>, 469 InstrStage<1, [SW_DIS1], 0>, 470 InstrStage<1, [SW_DIS2], 0>, 471 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 472 InstrStage<1, [SW_LS], 1>, 473 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 474 InstrStage<1, [SW_LS], 1>, 475 InstrStage<1, [SW_ALU0, SW_ALU1], 1>, 476 InstrStage<1, [SW_LS]>], 477 [2], [], -1>, // dynamic uops 478 479 // 480 // Preload 481 InstrItinData<IIC_Preload, [InstrStage<1, [SW_DIS0], 0>], [1, 1]>, 482 483 // Branch 484 // 485 // no delay slots, so the latency of a branch is unimportant 486 InstrItinData<IIC_Br , [InstrStage<1, [SW_DIS0], 0>]>, 487 488 // FP Special Register to Integer Register File Move 489 InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 490 InstrStage<1, [SW_ALU0, SW_ALU1]>], 491 [1]>, 492 // 493 // Single-precision FP Unary 494 // 495 // Most floating-point moves get issued on ALU0. 496 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 497 InstrStage<1, [SW_ALU0]>], 498 [2, 1]>, 499 // 500 // Double-precision FP Unary 501 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 502 InstrStage<1, [SW_ALU0]>], 503 [2, 1]>, 504 505 // 506 // Single-precision FP Compare 507 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 508 InstrStage<1, [SW_ALU0]>], 509 [1, 1]>, 510 // 511 // Double-precision FP Compare 512 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 513 InstrStage<1, [SW_ALU0]>], 514 [1, 1]>, 515 // 516 // Single to Double FP Convert 517 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 518 InstrStage<1, [SW_ALU1]>], 519 [4, 1]>, 520 // 521 // Double to Single FP Convert 522 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 523 InstrStage<1, [SW_ALU1]>], 524 [4, 1]>, 525 526 // 527 // Single to Half FP Convert 528 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>, 529 InstrStage<1, [SW_DIS1], 0>, 530 InstrStage<1, [SW_ALU1], 4>, 531 InstrStage<1, [SW_ALU1]>], 532 [6, 1]>, 533 // 534 // Half to Single FP Convert 535 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 536 InstrStage<1, [SW_ALU1]>], 537 [4, 1]>, 538 539 // 540 // Single-Precision FP to Integer Convert 541 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 542 InstrStage<1, [SW_ALU1]>], 543 [4, 1]>, 544 // 545 // Double-Precision FP to Integer Convert 546 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 547 InstrStage<1, [SW_ALU1]>], 548 [4, 1]>, 549 // 550 // Integer to Single-Precision FP Convert 551 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 552 InstrStage<1, [SW_ALU1]>], 553 [4, 1]>, 554 // 555 // Integer to Double-Precision FP Convert 556 InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 557 InstrStage<1, [SW_ALU1]>], 558 [4, 1]>, 559 // 560 // Single-precision FP ALU 561 InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 562 InstrStage<1, [SW_ALU0]>], 563 [2, 1, 1]>, 564 // 565 // Double-precision FP ALU 566 InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 567 InstrStage<1, [SW_ALU0]>], 568 [2, 1, 1]>, 569 // 570 // Single-precision FP Multiply 571 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 572 InstrStage<1, [SW_ALU1]>], 573 [4, 1, 1]>, 574 // 575 // Double-precision FP Multiply 576 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 577 InstrStage<1, [SW_ALU1]>], 578 [6, 1, 1]>, 579 // 580 // Single-precision FP MAC 581 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 582 InstrStage<1, [SW_ALU1]>], 583 [8, 1, 1]>, 584 // 585 // Double-precision FP MAC 586 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 587 InstrStage<1, [SW_ALU1]>], 588 [12, 1, 1]>, 589 // 590 // Single-precision Fused FP MAC 591 InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 592 InstrStage<1, [SW_ALU1]>], 593 [8, 1, 1]>, 594 // 595 // Double-precision Fused FP MAC 596 InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 597 InstrStage<1, [SW_ALU1]>], 598 [12, 1, 1]>, 599 // 600 // Single-precision FP DIV 601 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 602 InstrStage<1, [SW_ALU1], 0>, 603 InstrStage<15, [SW_FDIV]>], 604 [17, 1, 1]>, 605 // 606 // Double-precision FP DIV 607 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 608 InstrStage<1, [SW_ALU1], 0>, 609 InstrStage<30, [SW_FDIV]>], 610 [32, 1, 1]>, 611 // 612 // Single-precision FP SQRT 613 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 614 InstrStage<1, [SW_ALU1], 0>, 615 InstrStage<15, [SW_FDIV]>], 616 [17, 1]>, 617 // 618 // Double-precision FP SQRT 619 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 620 InstrStage<1, [SW_ALU1], 0>, 621 InstrStage<30, [SW_FDIV]>], 622 [32, 1, 1]>, 623 624 // 625 // Integer to Single-precision Move 626 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [SW_DIS0], 0>, 627 InstrStage<1, [SW_DIS1], 0>, 628 InstrStage<1, [SW_LS], 4>, 629 InstrStage<1, [SW_ALU0]>], 630 [6, 1]>, 631 // 632 // Integer to Double-precision Move 633 InstrItinData<IIC_fpMOVID, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 634 InstrStage<1, [SW_LS]>], 635 [4, 1]>, 636 // 637 // Single-precision to Integer Move 638 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 639 InstrStage<1, [SW_LS]>], 640 [3, 1]>, 641 // 642 // Double-precision to Integer Move 643 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [SW_DIS0], 0>, 644 InstrStage<1, [SW_DIS1], 0>, 645 InstrStage<1, [SW_LS], 3>, 646 InstrStage<1, [SW_LS]>], 647 [3, 4, 1]>, 648 // 649 // Single-precision FP Load 650 InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 651 InstrStage<1, [SW_LS]>], 652 [4, 1]>, 653 // 654 // Double-precision FP Load 655 InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 656 InstrStage<1, [SW_LS]>], 657 [4, 1]>, 658 // 659 // FP Load Multiple 660 // FIXME: Assumes a single Q register. 661 InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 662 InstrStage<1, [SW_LS]>], 663 [1, 1, 1, 4], [], -1>, // dynamic uops 664 // 665 // FP Load Multiple + update 666 // FIXME: Assumes a single Q register. 667 InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>, 668 InstrStage<1, [SW_DIS1], 0>, 669 InstrStage<1, [SW_LS], 4>, 670 InstrStage<1, [SW_ALU0, SW_ALU1]>], 671 [2, 1, 1, 1, 4], [], -1>, // dynamic uops 672 // 673 // Single-precision FP Store 674 InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 675 InstrStage<1, [SW_LS]>], 676 [1, 1]>, 677 // 678 // Double-precision FP Store 679 InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 680 InstrStage<1, [SW_LS]>], 681 [1, 1]>, 682 // 683 // FP Store Multiple 684 // FIXME: Assumes a single Q register. 685 InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 686 InstrStage<1, [SW_LS]>], 687 [1, 1, 1], [], -1>, // dynamic uops 688 // 689 // FP Store Multiple + update 690 // FIXME: Assumes a single Q register. 691 InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>, 692 InstrStage<1, [SW_DIS1], 0>, 693 InstrStage<1, [SW_LS], 4>, 694 InstrStage<1, [SW_ALU0, SW_ALU1]>], 695 [2, 1, 1, 1], [], -1>, // dynamic uops 696 // NEON 697 // 698 // Double-register Integer Unary 699 InstrItinData<IIC_VUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 700 InstrStage<1, [SW_ALU0]>], 701 [4, 1]>, 702 // 703 // Quad-register Integer Unary 704 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 705 InstrStage<1, [SW_ALU0]>], 706 [4, 1]>, 707 // 708 // Double-register Integer Q-Unary 709 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 710 InstrStage<1, [SW_ALU0]>], 711 [4, 1]>, 712 // 713 // Quad-register Integer CountQ-Unary 714 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 715 InstrStage<1, [SW_ALU0]>], 716 [4, 1]>, 717 // 718 // Double-register Integer Binary 719 InstrItinData<IIC_VBINiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 720 InstrStage<1, [SW_ALU0]>], 721 [2, 1, 1]>, 722 // 723 // Quad-register Integer Binary 724 InstrItinData<IIC_VBINiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 725 InstrStage<1, [SW_ALU0]>], 726 [2, 1, 1]>, 727 // 728 // Double-register Integer Subtract 729 InstrItinData<IIC_VSUBiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 730 InstrStage<1, [SW_ALU0]>], 731 [2, 1, 1]>, 732 // 733 // Quad-register Integer Subtract 734 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 735 InstrStage<1, [SW_ALU0]>], 736 [2, 1, 1]>, 737 // 738 // Double-register Integer Shift 739 InstrItinData<IIC_VSHLiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 740 InstrStage<1, [SW_ALU0]>], 741 [2, 1, 1]>, 742 // 743 // Quad-register Integer Shift 744 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 745 InstrStage<1, [SW_ALU0]>], 746 [2, 1, 1]>, 747 // 748 // Double-register Integer Shift (4 cycle) 749 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 750 InstrStage<1, [SW_ALU0]>], 751 [4, 1, 1]>, 752 // 753 // Quad-register Integer Shift (4 cycle) 754 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 755 InstrStage<1, [SW_ALU0]>], 756 [4, 1, 1]>, 757 // 758 // Double-register Integer Binary (4 cycle) 759 InstrItinData<IIC_VBINi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 760 InstrStage<1, [SW_ALU0]>], 761 [4, 1, 1]>, 762 // 763 // Quad-register Integer Binary (4 cycle) 764 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 765 InstrStage<1, [SW_ALU0]>], 766 [4, 1, 1]>, 767 // 768 // Double-register Integer Subtract (4 cycle) 769 InstrItinData<IIC_VSUBi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 770 InstrStage<1, [SW_ALU0]>], 771 [4, 1, 1]>, 772 // 773 // Quad-register Integer Subtract (4 cycle) 774 InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 775 InstrStage<1, [SW_ALU0]>], 776 [4, 1, 1]>, 777 778 // 779 // Double-register Integer Count 780 InstrItinData<IIC_VCNTiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 781 InstrStage<1, [SW_ALU0]>], 782 [2, 1, 1]>, 783 // 784 // Quad-register Integer Count 785 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 786 InstrStage<1, [SW_ALU0]>], 787 [2, 1, 1]>, 788 // 789 // Double-register Absolute Difference and Accumulate 790 InstrItinData<IIC_VABAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 791 InstrStage<1, [SW_ALU0]>], 792 [4, 1, 1, 1]>, 793 // 794 // Quad-register Absolute Difference and Accumulate 795 InstrItinData<IIC_VABAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 796 InstrStage<1, [SW_ALU0]>], 797 [4, 1, 1, 1]>, 798 // 799 // Double-register Integer Pair Add Long 800 InstrItinData<IIC_VPALiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 801 InstrStage<1, [SW_ALU0]>], 802 [4, 1, 1]>, 803 // 804 // Quad-register Integer Pair Add Long 805 InstrItinData<IIC_VPALiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 806 InstrStage<1, [SW_ALU0]>], 807 [4, 1, 1]>, 808 809 // 810 // Double-register Integer Multiply (.8, .16) 811 InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 812 InstrStage<1, [SW_ALU1]>], 813 [4, 1, 1]>, 814 // 815 // Quad-register Integer Multiply (.8, .16) 816 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 817 InstrStage<1, [SW_ALU1]>], 818 [4, 1, 1]>, 819 820 // 821 // Double-register Integer Multiply (.32) 822 InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 823 InstrStage<1, [SW_ALU1]>], 824 [4, 1, 1]>, 825 // 826 // Quad-register Integer Multiply (.32) 827 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 828 InstrStage<1, [SW_ALU1]>], 829 [4, 1, 1]>, 830 // 831 // Double-register Integer Multiply-Accumulate (.8, .16) 832 InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 833 InstrStage<1, [SW_ALU1]>], 834 [4, 1, 1, 1]>, 835 // 836 // Double-register Integer Multiply-Accumulate (.32) 837 InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 838 InstrStage<1, [SW_ALU1]>], 839 [4, 1, 1, 1]>, 840 // 841 // Quad-register Integer Multiply-Accumulate (.8, .16) 842 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 843 InstrStage<1, [SW_ALU1]>], 844 [4, 1, 1, 1]>, 845 // 846 // Quad-register Integer Multiply-Accumulate (.32) 847 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 848 InstrStage<1, [SW_ALU1]>], 849 [4, 1, 1, 1]>, 850 851 // 852 // Move 853 InstrItinData<IIC_VMOV, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 854 InstrStage<1, [SW_ALU0]>], 855 [2, 1]>, 856 // 857 // Move Immediate 858 InstrItinData<IIC_VMOVImm, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 859 InstrStage<1, [SW_ALU0]>], 860 [2]>, 861 // 862 // Double-register Permute Move 863 InstrItinData<IIC_VMOVD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 864 InstrStage<1, [SW_ALU1]>], 865 [2, 1]>, 866 // 867 // Quad-register Permute Move 868 InstrItinData<IIC_VMOVQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 869 InstrStage<1, [SW_ALU1]>], 870 [2, 1]>, 871 // 872 // Integer to Single-precision Move 873 InstrItinData<IIC_VMOVIS , [InstrStage<1, [SW_DIS0], 0>, 874 InstrStage<1, [SW_DIS1], 0>, 875 InstrStage<1, [SW_LS], 4>, 876 InstrStage<1, [SW_ALU0]>], 877 [6, 1]>, 878 // 879 // Integer to Double-precision Move 880 InstrItinData<IIC_VMOVID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 881 InstrStage<1, [SW_LS]>], 882 [4, 1, 1]>, 883 // 884 // Single-precision to Integer Move 885 InstrItinData<IIC_VMOVSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 886 InstrStage<1, [SW_LS]>], 887 [3, 1]>, 888 // 889 // Double-precision to Integer Move 890 InstrItinData<IIC_VMOVDI , [InstrStage<1, [SW_DIS0], 0>, 891 InstrStage<1, [SW_DIS1], 0>, 892 InstrStage<1, [SW_LS], 3>, 893 InstrStage<1, [SW_LS]>], 894 [3, 4, 1]>, 895 // 896 // Integer to Lane Move 897 // FIXME: I think this is correct, but it is not clear from the tuning guide. 898 InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>, 899 InstrStage<1, [SW_DIS1], 0>, 900 InstrStage<1, [SW_LS], 4>, 901 InstrStage<1, [SW_ALU0]>], 902 [6, 1]>, 903 904 // 905 // Vector narrow move 906 InstrItinData<IIC_VMOVN, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 907 InstrStage<1, [SW_ALU1]>], 908 [2, 1]>, 909 // 910 // Double-register FP Unary 911 // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here, 912 // and they issue on a different pipeline. 913 InstrItinData<IIC_VUNAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 914 InstrStage<1, [SW_ALU0]>], 915 [2, 1]>, 916 // 917 // Quad-register FP Unary 918 // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here, 919 // and they issue on a different pipeline. 920 InstrItinData<IIC_VUNAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 921 InstrStage<1, [SW_ALU0]>], 922 [2, 1]>, 923 // 924 // Double-register FP Binary 925 // FIXME: We're using this itin for many instructions. 926 InstrItinData<IIC_VBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 927 InstrStage<1, [SW_ALU0]>], 928 [4, 1, 1]>, 929 930 // 931 // VPADD, etc. 932 InstrItinData<IIC_VPBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 933 InstrStage<1, [SW_ALU0]>], 934 [4, 1, 1]>, 935 // 936 // Double-register FP VMUL 937 InstrItinData<IIC_VFMULD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 938 InstrStage<1, [SW_ALU1]>], 939 [4, 1, 1]>, 940 // 941 // Quad-register FP Binary 942 InstrItinData<IIC_VBINQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 943 InstrStage<1, [SW_ALU0]>], 944 [4, 1, 1]>, 945 // 946 // Quad-register FP VMUL 947 InstrItinData<IIC_VFMULQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 948 InstrStage<1, [SW_ALU1]>], 949 [4, 1, 1]>, 950 // 951 // Double-register FP Multiple-Accumulate 952 InstrItinData<IIC_VMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 953 InstrStage<1, [SW_ALU1]>], 954 [8, 1, 1]>, 955 // 956 // Quad-register FP Multiple-Accumulate 957 InstrItinData<IIC_VMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 958 InstrStage<1, [SW_ALU1]>], 959 [8, 1, 1]>, 960 // 961 // Double-register Fused FP Multiple-Accumulate 962 InstrItinData<IIC_VFMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 963 InstrStage<1, [SW_ALU1]>], 964 [8, 1, 1]>, 965 // 966 // Quad-register FusedF P Multiple-Accumulate 967 InstrItinData<IIC_VFMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 968 InstrStage<1, [SW_ALU1]>], 969 [8, 1, 1]>, 970 // 971 // Double-register Reciprical Step 972 InstrItinData<IIC_VRECSD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 973 InstrStage<1, [SW_ALU1]>], 974 [8, 1, 1]>, 975 // 976 // Quad-register Reciprical Step 977 InstrItinData<IIC_VRECSQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 978 InstrStage<1, [SW_ALU1]>], 979 [8, 1, 1]>, 980 // 981 // Double-register Permute 982 // FIXME: The latencies are unclear from the documentation. 983 InstrItinData<IIC_VPERMD, [InstrStage<1, [SW_DIS0], 0>, 984 InstrStage<1, [SW_DIS1], 0>, 985 InstrStage<1, [SW_DIS2], 0>, 986 InstrStage<1, [SW_ALU1], 2>, 987 InstrStage<1, [SW_ALU1], 2>, 988 InstrStage<1, [SW_ALU1]>], 989 [3, 4, 3, 4]>, 990 // 991 // Quad-register Permute 992 // FIXME: The latencies are unclear from the documentation. 993 InstrItinData<IIC_VPERMQ, [InstrStage<1, [SW_DIS0], 0>, 994 InstrStage<1, [SW_DIS1], 0>, 995 InstrStage<1, [SW_DIS2], 0>, 996 InstrStage<1, [SW_ALU1], 2>, 997 InstrStage<1, [SW_ALU1], 2>, 998 InstrStage<1, [SW_ALU1]>], 999 [3, 4, 3, 4]>, 1000 // 1001 // Quad-register Permute (3 cycle issue on A9) 1002 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [SW_DIS0], 0>, 1003 InstrStage<1, [SW_DIS1], 0>, 1004 InstrStage<1, [SW_DIS2], 0>, 1005 InstrStage<1, [SW_ALU1], 2>, 1006 InstrStage<1, [SW_ALU1], 2>, 1007 InstrStage<1, [SW_ALU1]>], 1008 [3, 4, 3, 4]>, 1009 1010 // 1011 // Double-register VEXT 1012 InstrItinData<IIC_VEXTD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 1013 InstrStage<1, [SW_ALU1]>], 1014 [2, 1, 1]>, 1015 // 1016 // Quad-register VEXT 1017 InstrItinData<IIC_VEXTQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 1018 InstrStage<1, [SW_ALU1]>], 1019 [2, 1, 1]>, 1020 // 1021 // VTB 1022 InstrItinData<IIC_VTB1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 1023 InstrStage<1, [SW_ALU1]>], 1024 [2, 1, 1]>, 1025 InstrItinData<IIC_VTB2, [InstrStage<1, [SW_DIS0], 0>, 1026 InstrStage<1, [SW_DIS1], 0>, 1027 InstrStage<1, [SW_ALU1], 2>, 1028 InstrStage<1, [SW_ALU1]>], 1029 [4, 1, 3, 3]>, 1030 InstrItinData<IIC_VTB3, [InstrStage<1, [SW_DIS0], 0>, 1031 InstrStage<1, [SW_DIS1], 0>, 1032 InstrStage<1, [SW_DIS2], 0>, 1033 InstrStage<1, [SW_ALU1], 2>, 1034 InstrStage<1, [SW_ALU1], 2>, 1035 InstrStage<1, [SW_ALU1]>], 1036 [6, 1, 3, 5, 5]>, 1037 InstrItinData<IIC_VTB4, [InstrStage<1, [SW_DIS0], 0>, 1038 InstrStage<1, [SW_DIS1], 0>, 1039 InstrStage<1, [SW_DIS2], 0>, 1040 InstrStage<1, [SW_ALU1], 2>, 1041 InstrStage<1, [SW_ALU1], 2>, 1042 InstrStage<1, [SW_ALU1], 2>, 1043 InstrStage<1, [SW_ALU1]>], 1044 [8, 1, 3, 5, 7, 7]>, 1045 // 1046 // VTBX 1047 InstrItinData<IIC_VTBX1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, 1048 InstrStage<1, [SW_ALU1]>], 1049 [2, 1, 1]>, 1050 InstrItinData<IIC_VTBX2, [InstrStage<1, [SW_DIS0], 0>, 1051 InstrStage<1, [SW_DIS1], 0>, 1052 InstrStage<1, [SW_ALU1], 2>, 1053 InstrStage<1, [SW_ALU1]>], 1054 [4, 1, 3, 3]>, 1055 InstrItinData<IIC_VTBX3, [InstrStage<1, [SW_DIS0], 0>, 1056 InstrStage<1, [SW_DIS1], 0>, 1057 InstrStage<1, [SW_DIS2], 0>, 1058 InstrStage<1, [SW_ALU1], 2>, 1059 InstrStage<1, [SW_ALU1], 2>, 1060 InstrStage<1, [SW_ALU1]>], 1061 [6, 1, 3, 5, 5]>, 1062 InstrItinData<IIC_VTBX4, [InstrStage<1, [SW_DIS0], 0>, 1063 InstrStage<1, [SW_DIS1], 0>, 1064 InstrStage<1, [SW_DIS2], 0>, 1065 InstrStage<1, [SW_ALU1], 2>, 1066 InstrStage<1, [SW_ALU1], 2>, 1067 InstrStage<1, [SW_ALU1], 2>, 1068 InstrStage<1, [SW_ALU1]>], 1069 [8, 1, 3, 5, 7, 7]> 1070 ]>; 1071 1072 // ===---------------------------------------------------------------------===// 1073 // This following definitions describe the simple machine model which 1074 // will replace itineraries. 1075 1076 // Swift machine model for scheduling and other instruction cost heuristics. 1077 def SwiftModel : SchedMachineModel { 1078 let IssueWidth = 3; // 3 micro-ops are dispatched per cycle. 1079 let MicroOpBufferSize = 45; // Based on NEON renamed registers. 1080 let LoadLatency = 3; 1081 let MispredictPenalty = 14; // A branch direction mispredict. 1082 1083 let Itineraries = SwiftItineraries; 1084 } 1085 1086 // Swift predicates. 1087 def IsFastImmShiftSwiftPred : SchedPredicate<[{TII->isSwiftFastImmShift(MI)}]>; 1088 1089 // Swift resource mapping. 1090 let SchedModel = SwiftModel in { 1091 // Processor resources. 1092 def SwiftUnitP01 : ProcResource<2>; // ALU unit. 1093 def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit. 1094 def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit. 1095 def SwiftUnitP2 : ProcResource<1>; // LS unit. 1096 def SwiftUnitDiv : ProcResource<1>; 1097 1098 // Generic resource requirements. 1099 def SwiftWriteP0OneCycle : SchedWriteRes<[SwiftUnitP0]>; 1100 def SwiftWriteP0TwoCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 2; } 1101 def SwiftWriteP0FourCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 4; } 1102 def SwiftWriteP0SixCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 6; } 1103 def SwiftWriteP0P1FourCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> { 1104 let Latency = 4; 1105 } 1106 def SwiftWriteP0P1SixCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> { 1107 let Latency = 6; 1108 } 1109 def SwiftWriteP01OneCycle : SchedWriteRes<[SwiftUnitP01]>; 1110 def SwiftWriteP1TwoCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 2; } 1111 def SwiftWriteP1FourCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 4; } 1112 def SwiftWriteP1SixCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 6; } 1113 def SwiftWriteP1EightCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 8; } 1114 def SwiftWriteP1TwelveCyc : SchedWriteRes<[SwiftUnitP1]> { let Latency = 12; } 1115 def SwiftWriteP01OneCycle2x : WriteSequence<[SwiftWriteP01OneCycle], 2>; 1116 def SwiftWriteP01OneCycle3x : WriteSequence<[SwiftWriteP01OneCycle], 3>; 1117 def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; } 1118 def SwiftWriteP01ThreeCycleTwoUops : SchedWriteRes<[SwiftUnitP01, 1119 SwiftUnitP01]> { 1120 let Latency = 3; 1121 let NumMicroOps = 2; 1122 } 1123 def SwiftWriteP0ThreeCycleThreeUops : SchedWriteRes<[SwiftUnitP0]> { 1124 let Latency = 3; 1125 let NumMicroOps = 3; 1126 let ResourceCycles = [3]; 1127 } 1128 // Plain load without writeback. 1129 def SwiftWriteP2ThreeCycle : SchedWriteRes<[SwiftUnitP2]> { 1130 let Latency = 3; 1131 } 1132 def SwiftWriteP2FourCycle : SchedWriteRes<[SwiftUnitP2]> { 1133 let Latency = 4; 1134 } 1135 // A store does not write to a register. 1136 def SwiftWriteP2 : SchedWriteRes<[SwiftUnitP2]> { 1137 let Latency = 0; 1138 } 1139 foreach Num = 1-4 in { 1140 def SwiftWrite#Num#xP2 : WriteSequence<[SwiftWriteP2], Num>; 1141 } 1142 def SwiftWriteP01OneCycle2x_load : WriteSequence<[SwiftWriteP01OneCycle, 1143 SwiftWriteP01OneCycle, 1144 SwiftWriteP2ThreeCycle]>; 1145 // 4.2.4 Arithmetic and Logical. 1146 // ALU operation register shifted by immediate variant. 1147 def SwiftWriteALUsi : SchedWriteVariant<[ 1148 // lsl #2, lsl #1, or lsr #1. 1149 SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01TwoCycle]>, 1150 SchedVar<NoSchedPred, [WriteALU]> 1151 ]>; 1152 def SwiftWriteALUsr : SchedWriteVariant<[ 1153 SchedVar<IsPredicatedPred, [SwiftWriteP01ThreeCycleTwoUops]>, 1154 SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]> 1155 ]>; 1156 def SwiftWriteALUSsr : SchedWriteVariant<[ 1157 SchedVar<IsPredicatedPred, [SwiftWriteP0ThreeCycleThreeUops]>, 1158 SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]> 1159 ]>; 1160 def SwiftReadAdvanceALUsr : SchedReadVariant<[ 1161 SchedVar<IsPredicatedPred, [SchedReadAdvance<2>]>, 1162 SchedVar<NoSchedPred, [NoReadAdvance]> 1163 ]>; 1164 // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR 1165 // AND,BIC,EOR,ORN,ORR 1166 // CLZ,RBIT,REV,REV16,REVSH,PKH 1167 def : WriteRes<WriteALU, [SwiftUnitP01]>; 1168 def : SchedAlias<WriteALUsi, SwiftWriteALUsi>; 1169 def : SchedAlias<WriteALUsr, SwiftWriteALUsr>; 1170 def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>; 1171 def : ReadAdvance<ReadALU, 0>; 1172 def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>; 1173 1174 1175 def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[ 1176 SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01OneCycle]>, 1177 SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]> 1178 ]>; 1179 1180 // 4.2.5 Integer comparison 1181 def : WriteRes<WriteCMP, [SwiftUnitP01]>; 1182 def : SchedAlias<WriteCMPsi, SwiftChooseShiftKindP01OneOrTwoCycle>; 1183 def : SchedAlias<WriteCMPsr, SwiftWriteP01TwoCycle>; 1184 1185 // 4.2.6 Shift, Move 1186 // Shift 1187 // ASR,LSL,ROR,RRX 1188 // MOV(register-shiftedregister) MVN(register-shiftedregister) 1189 // Move 1190 // MOV,MVN 1191 // MOVT 1192 // Sign/Zero extension 1193 def : InstRW<[SwiftWriteP01OneCycle], 1194 (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16", 1195 "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", 1196 "t2UXTB16")>; 1197 // Pseudo instructions. 1198 def : InstRW<[SwiftWriteP01OneCycle2x], 1199 (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi32imm", 1200 "t2MOVi32imm", "t2MOV_ga_dyn")>; 1201 def : InstRW<[SwiftWriteP01OneCycle3x], 1202 (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel", "t2MOVi16_ga_pcrel")>; 1203 def : InstRW<[SwiftWriteP01OneCycle2x_load], 1204 (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>; 1205 1206 def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>; 1207 1208 def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[ 1209 SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>, 1210 SchedVar<NoSchedPred, [ SwiftWriteP0OneCycle ]> 1211 ]>; 1212 1213 // 4.2.7 Select 1214 // SEL 1215 def : InstRW<[SwiftPredP0OneOrTwoCycle], (instregex "SEL", "t2SEL")>; 1216 1217 // 4.2.8 Bitfield 1218 // BFI,BFC, SBFX,UBFX 1219 def : InstRW< [SwiftWriteP01TwoCycle], 1220 (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI", 1221 "(t|t2)UBFX", "(t|t2)SBFX")>; 1222 1223 // 4.2.9 Saturating arithmetic 1224 def : InstRW< [SwiftWriteP01TwoCycle], 1225 (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT", 1226 "USAT16", "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX", 1227 "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD", 1228 "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT", 1229 "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX", 1230 "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX")>; 1231 1232 // 4.2.10 Parallel Arithmetic 1233 // Not flag setting. 1234 def : InstRW< [SwiftWriteALUsr], 1235 (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX", 1236 "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8", 1237 "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8", 1238 "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>; 1239 // Flag setting. 1240 def : InstRW< [SwiftWriteP01TwoCycle], 1241 (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX", 1242 "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16", 1243 "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16", 1244 "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16", 1245 "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX", 1246 "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>; 1247 1248 // 4.2.11 Sum of Absolute Difference 1249 def : InstRW< [SwiftWriteP0P1FourCycle], (instregex "USAD8") >; 1250 def : InstRW<[SwiftWriteP0P1FourCycle, ReadALU, ReadALU, SchedReadAdvance<2>], 1251 (instregex "USADA8")>; 1252 1253 // 4.2.12 Integer Multiply (32-bit result) 1254 // Two sources. 1255 def : InstRW< [SwiftWriteP0FourCycle], 1256 (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT", 1257 "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL", 1258 "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT", 1259 "t2SMULWB", "t2SMULWT", "t2SMUSD")>; 1260 1261 def SwiftWriteP0P01FiveCycleTwoUops : 1262 SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> { 1263 let Latency = 5; 1264 } 1265 1266 def SwiftPredP0P01FourFiveCycle : SchedWriteVariant<[ 1267 SchedVar<IsPredicatedPred, [ SwiftWriteP0P01FiveCycleTwoUops ]>, 1268 SchedVar<NoSchedPred, [ SwiftWriteP0FourCycle ]> 1269 ]>; 1270 1271 def SwiftReadAdvanceFourCyclesPred : SchedReadVariant<[ 1272 SchedVar<IsPredicatedPred, [SchedReadAdvance<4>]>, 1273 SchedVar<NoSchedPred, [ReadALU]> 1274 ]>; 1275 1276 // Multiply accumulate, three sources 1277 def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU, 1278 SwiftReadAdvanceFourCyclesPred], 1279 (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR", 1280 "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", 1281 "t2SMMLSR")>; 1282 1283 // 4.2.13 Integer Multiply (32-bit result, Q flag) 1284 def : InstRW< [SwiftWriteP0FourCycle], 1285 (instregex "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX")>; 1286 def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU, 1287 SwiftReadAdvanceFourCyclesPred], 1288 (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX", 1289 "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT", 1290 "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT")>; 1291 def : InstRW< [SwiftPredP0P01FourFiveCycle], 1292 (instregex "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX")>; 1293 1294 def SwiftP0P0P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> { 1295 let Latency = 5; 1296 let NumMicroOps = 3; 1297 let ResourceCycles = [2, 1]; 1298 } 1299 def SwiftWrite1Cycle : SchedWriteRes<[]> { 1300 let Latency = 1; 1301 let NumMicroOps = 0; 1302 } 1303 def SwiftWrite5Cycle : SchedWriteRes<[]> { 1304 let Latency = 5; 1305 let NumMicroOps = 0; 1306 } 1307 def SwiftWrite6Cycle : SchedWriteRes<[]> { 1308 let Latency = 6; 1309 let NumMicroOps = 0; 1310 } 1311 1312 // 4.2.14 Integer Multiply, Long 1313 def : InstRW< [SwiftP0P0P01FiveCycle, SwiftWrite5Cycle], 1314 (instregex "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$")>; 1315 1316 def Swift2P03P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> { 1317 let Latency = 7; 1318 let NumMicroOps = 5; 1319 let ResourceCycles = [2, 3]; 1320 } 1321 1322 // 4.2.15 Integer Multiply Accumulate, Long 1323 // 4.2.16 Integer Multiply Accumulate, Dual 1324 // 4.2.17 Integer Multiply Accumulate Accumulate, Long 1325 // We are being a bit inaccurate here. 1326 def : InstRW< [SwiftWrite5Cycle, Swift2P03P01FiveCycle, ReadALU, ReadALU, 1327 SchedReadAdvance<4>, SchedReadAdvance<3>], 1328 (instregex "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT", 1329 "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX", 1330 "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB", "t2SMLALBT", 1331 "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX", "t2SMLSLD", "t2SMLSLDX", 1332 "t2UMAAL")>; 1333 1334 def SwiftDiv : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> { 1335 let NumMicroOps = 1; 1336 let Latency = 14; 1337 let ResourceCycles = [1, 14]; 1338 } 1339 // 4.2.18 Integer Divide 1340 def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround. 1341 def : InstRW <[SwiftDiv], 1342 (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>; 1343 1344 // 4.2.19 Integer Load Single Element 1345 // 4.2.20 Integer Load Signextended 1346 def SwiftWriteP2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> { 1347 let Latency = 3; 1348 } 1349 def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> { 1350 let Latency = 4; 1351 } 1352 def SwiftWriteP2P01P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01, 1353 SwiftUnitP01]> { 1354 let Latency = 4; 1355 } 1356 def SwiftWriteP2P2ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2]> { 1357 let Latency = 3; 1358 } 1359 def SwiftWriteP2P2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2, 1360 SwiftUnitP01]> { 1361 let Latency = 3; 1362 } 1363 def SwiftWrBackOne : SchedWriteRes<[]> { 1364 let Latency = 1; 1365 let NumMicroOps = 0; 1366 } 1367 def SwiftWriteLdFour : SchedWriteRes<[]> { 1368 let Latency = 4; 1369 let NumMicroOps = 0; 1370 } 1371 // Not accurate. 1372 def : InstRW<[SwiftWriteP2ThreeCycle], 1373 (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)", 1374 "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "tLDR[BH](r|i|spi|pci|pciASM)", 1375 "tLDR(r|i|spi|pci|pciASM)")>; 1376 def : InstRW<[SwiftWriteP2ThreeCycle], 1377 (instregex "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$")>; 1378 def : InstRW<[SwiftWriteP2P01FourCyle], 1379 (instregex "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$", 1380 "t2LDRpci_pic", "tLDRS(B|H)")>; 1381 def : InstRW<[SwiftWriteP2P01ThreeCycle, SwiftWrBackOne], 1382 (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)", 1383 "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)", 1384 "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T")>; 1385 def : InstRW<[SwiftWriteP2P01P01FourCycle, SwiftWrBackOne], 1386 (instregex "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)", 1387 "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T")>; 1388 1389 // 4.2.21 Integer Dual Load 1390 // Not accurate. 1391 def : InstRW<[SwiftWriteP2P2ThreeCycle, SwiftWriteLdFour], 1392 (instregex "t2LDRDi8", "LDRD$")>; 1393 def : InstRW<[SwiftWriteP2P2P01ThreeCycle, SwiftWriteLdFour, SwiftWrBackOne], 1394 (instregex "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>; 1395 1396 // 4.2.22 Integer Load, Multiple 1397 // NumReg = 1 .. 16 1398 foreach Lat = 3-25 in { 1399 def SwiftWriteLM#Lat#Cy : SchedWriteRes<[SwiftUnitP2]> { 1400 let Latency = Lat; 1401 } 1402 def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> { let Latency = Lat; } 1403 } 1404 // Predicate. 1405 foreach NumAddr = 1-16 in { 1406 def SwiftLMAddr#NumAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>; 1407 } 1408 def SwiftWriteLDMAddrNoWB : SchedWriteRes<[SwiftUnitP01]> { let Latency = 0; } 1409 def SwiftWriteLDMAddrWB : SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]>; 1410 def SwiftWriteLM : SchedWriteVariant<[ 1411 SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy]>, 1412 SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, 1413 SwiftWriteLM5Cy]>, 1414 SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, 1415 SwiftWriteLM5Cy, SwiftWriteLM6Cy]>, 1416 SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, 1417 SwiftWriteLM5Cy, SwiftWriteLM6Cy, 1418 SwiftWriteLM7Cy]>, 1419 SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, 1420 SwiftWriteLM5Cy, SwiftWriteLM6Cy, 1421 SwiftWriteLM7Cy, SwiftWriteLM8Cy]>, 1422 SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, 1423 SwiftWriteLM5Cy, SwiftWriteLM6Cy, 1424 SwiftWriteLM7Cy, SwiftWriteLM8Cy, 1425 SwiftWriteLM9Cy]>, 1426 SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, 1427 SwiftWriteLM5Cy, SwiftWriteLM6Cy, 1428 SwiftWriteLM7Cy, SwiftWriteLM8Cy, 1429 SwiftWriteLM9Cy, SwiftWriteLM10Cy]>, 1430 SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, 1431 SwiftWriteLM5Cy, SwiftWriteLM6Cy, 1432 SwiftWriteLM7Cy, SwiftWriteLM8Cy, 1433 SwiftWriteLM9Cy, SwiftWriteLM10Cy, 1434 SwiftWriteLM11Cy]>, 1435 SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, 1436 SwiftWriteLM5Cy, SwiftWriteLM6Cy, 1437 SwiftWriteLM7Cy, SwiftWriteLM8Cy, 1438 SwiftWriteLM9Cy, SwiftWriteLM10Cy, 1439 SwiftWriteLM11Cy, SwiftWriteLM12Cy]>, 1440 SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, 1441 SwiftWriteLM5Cy, SwiftWriteLM6Cy, 1442 SwiftWriteLM7Cy, SwiftWriteLM8Cy, 1443 SwiftWriteLM9Cy, SwiftWriteLM10Cy, 1444 SwiftWriteLM11Cy, SwiftWriteLM12Cy, 1445 SwiftWriteLM13Cy]>, 1446 SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, 1447 SwiftWriteLM5Cy, SwiftWriteLM6Cy, 1448 SwiftWriteLM7Cy, SwiftWriteLM8Cy, 1449 SwiftWriteLM9Cy, SwiftWriteLM10Cy, 1450 SwiftWriteLM11Cy, SwiftWriteLM12Cy, 1451 SwiftWriteLM13Cy, SwiftWriteLM14Cy]>, 1452 SchedVar<SwiftLMAddr13Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, 1453 SwiftWriteLM5Cy, SwiftWriteLM6Cy, 1454 SwiftWriteLM7Cy, SwiftWriteLM8Cy, 1455 SwiftWriteLM9Cy, SwiftWriteLM10Cy, 1456 SwiftWriteLM11Cy, SwiftWriteLM12Cy, 1457 SwiftWriteLM13Cy, SwiftWriteLM14Cy, 1458 SwiftWriteLM15Cy]>, 1459 SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, 1460 SwiftWriteLM5Cy, SwiftWriteLM6Cy, 1461 SwiftWriteLM7Cy, SwiftWriteLM8Cy, 1462 SwiftWriteLM9Cy, SwiftWriteLM10Cy, 1463 SwiftWriteLM11Cy, SwiftWriteLM12Cy, 1464 SwiftWriteLM13Cy, SwiftWriteLM14Cy, 1465 SwiftWriteLM15Cy, SwiftWriteLM16Cy]>, 1466 SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, 1467 SwiftWriteLM5Cy, SwiftWriteLM6Cy, 1468 SwiftWriteLM7Cy, SwiftWriteLM8Cy, 1469 SwiftWriteLM9Cy, SwiftWriteLM10Cy, 1470 SwiftWriteLM11Cy, SwiftWriteLM12Cy, 1471 SwiftWriteLM13Cy, SwiftWriteLM14Cy, 1472 SwiftWriteLM15Cy, SwiftWriteLM16Cy, 1473 SwiftWriteLM17Cy]>, 1474 SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, 1475 SwiftWriteLM5Cy, SwiftWriteLM6Cy, 1476 SwiftWriteLM7Cy, SwiftWriteLM8Cy, 1477 SwiftWriteLM9Cy, SwiftWriteLM10Cy, 1478 SwiftWriteLM11Cy, SwiftWriteLM12Cy, 1479 SwiftWriteLM13Cy, SwiftWriteLM14Cy, 1480 SwiftWriteLM15Cy, SwiftWriteLM16Cy, 1481 SwiftWriteLM17Cy, SwiftWriteLM18Cy]>, 1482 // Unknow number of registers, just use resources for two registers. 1483 SchedVar<NoSchedPred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, 1484 SwiftWriteLM5CyNo, SwiftWriteLM6CyNo, 1485 SwiftWriteLM7CyNo, SwiftWriteLM8CyNo, 1486 SwiftWriteLM9CyNo, SwiftWriteLM10CyNo, 1487 SwiftWriteLM11CyNo, SwiftWriteLM12CyNo, 1488 SwiftWriteLM13CyNo, SwiftWriteLM14CyNo, 1489 SwiftWriteLM15CyNo, SwiftWriteLM16CyNo, 1490 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo]> 1491 1492 ]> { let Variadic=1; } 1493 1494 def : InstRW<[SwiftWriteLM, SwiftWriteLDMAddrNoWB], 1495 (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$", 1496 "(t|sys)LDM(IA|DA|DB|IB)$")>; 1497 def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM], 1498 (instregex /*"t2LDMIA_RET", "tLDMIA_RET", "LDMIA_RET",*/ 1499 "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>; 1500 def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM, SwiftWriteP1TwoCycle], 1501 (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>; 1502 // 4.2.23 Integer Store, Single Element 1503 def : InstRW<[SwiftWriteP2], 1504 (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", 1505 "t2STR(i12|i8|s)$", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>; 1506 1507 def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2], 1508 (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)", 1509 "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)", 1510 "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)", 1511 "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>; 1512 1513 // 4.2.24 Integer Store, Dual 1514 def : InstRW<[SwiftWriteP2, SwiftWriteP2, SwiftWriteP01OneCycle], 1515 (instregex "STRD$", "t2STRDi8")>; 1516 def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2, SwiftWriteP2, 1517 SwiftWriteP01OneCycle], 1518 (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>; 1519 1520 // 4.2.25 Integer Store, Multiple 1521 def SwiftWriteStIncAddr : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> { 1522 let Latency = 0; 1523 } 1524 foreach NumAddr = 1-16 in { 1525 def SwiftWriteSTM#NumAddr : WriteSequence<[SwiftWriteStIncAddr], NumAddr>; 1526 } 1527 def SwiftWriteSTM : SchedWriteVariant<[ 1528 SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM2]>, 1529 SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM3]>, 1530 SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM4]>, 1531 SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM5]>, 1532 SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM6]>, 1533 SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM7]>, 1534 SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM8]>, 1535 SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM9]>, 1536 SchedVar<SwiftLMAddr10Pred,[SwiftWriteSTM10]>, 1537 SchedVar<SwiftLMAddr11Pred,[SwiftWriteSTM11]>, 1538 SchedVar<SwiftLMAddr12Pred,[SwiftWriteSTM12]>, 1539 SchedVar<SwiftLMAddr13Pred,[SwiftWriteSTM13]>, 1540 SchedVar<SwiftLMAddr14Pred,[SwiftWriteSTM14]>, 1541 SchedVar<SwiftLMAddr15Pred,[SwiftWriteSTM15]>, 1542 SchedVar<SwiftLMAddr16Pred,[SwiftWriteSTM16]>, 1543 // Unknow number of registers, just use resources for two registers. 1544 SchedVar<NoSchedPred, [SwiftWriteSTM2]> 1545 ]>; 1546 def : InstRW<[SwiftWriteSTM], 1547 (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>; 1548 def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteSTM], 1549 (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD", 1550 "PUSH", "tPUSH")>; 1551 1552 // 4.2.26 Branch 1553 def : WriteRes<WriteBr, [SwiftUnitP1]> { let Latency = 0; } 1554 def : WriteRes<WriteBrL, [SwiftUnitP1]> { let Latency = 2; } 1555 def : WriteRes<WriteBrTbl, [SwiftUnitP1, SwiftUnitP2]> { let Latency = 0; } 1556 1557 // 4.2.27 Not issued 1558 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } 1559 def : InstRW<[WriteNoop], (instregex "t2IT", "IT", "NOP")>; 1560 1561 // 4.2.28 Advanced SIMD, Integer, 2 cycle 1562 def : InstRW<[SwiftWriteP0TwoCycle], 1563 (instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL", 1564 "VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi", 1565 "VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST", 1566 "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL", "VQSHLU", "VBIF", 1567 "VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>; 1568 1569 def : InstRW<[SwiftWriteP1TwoCycle], 1570 (instregex "VEXT", "VREV16", "VREV32", "VREV64")>; 1571 1572 // 4.2.29 Advanced SIMD, Integer, 4 cycle 1573 // 4.2.30 Advanced SIMD, Integer with Accumulate 1574 def : InstRW<[SwiftWriteP0FourCycle], 1575 (instregex "VABA", "VABAL", "VPADAL", "VRSRA", "VSRA", "VACGE", "VACGT", 1576 "VACLE", "VACLT", "VCEQ", "VCGE", "VCGT", "VCLE", "VCLT", "VRSHL", 1577 "VQRSHL", "VRSHR(u|s)", "VABS(f|v)", "VQABS", "VQNEG", "VQADD", 1578 "VQSUB")>; 1579 def : InstRW<[SwiftWriteP1FourCycle], 1580 (instregex "VRECPE", "VRSQRTE")>; 1581 1582 // 4.2.31 Advanced SIMD, Add and Shift with Narrow 1583 def : InstRW<[SwiftWriteP0P1FourCycle], 1584 (instregex "VADDHN", "VSUBHN", "VSHRN")>; 1585 def : InstRW<[SwiftWriteP0P1SixCycle], 1586 (instregex "VRADDHN", "VRSUBHN", "VRSHRN", "VQSHRN", "VQSHRUN", 1587 "VQRSHRN", "VQRSHRUN")>; 1588 1589 // 4.2.32 Advanced SIMD, Vector Table Lookup 1590 foreach Num = 1-4 in { 1591 def SwiftWrite#Num#xP1TwoCycle : WriteSequence<[SwiftWriteP1TwoCycle], Num>; 1592 } 1593 def : InstRW<[SwiftWrite1xP1TwoCycle], 1594 (instregex "VTB(L|X)1")>; 1595 def : InstRW<[SwiftWrite2xP1TwoCycle], 1596 (instregex "VTB(L|X)2")>; 1597 def : InstRW<[SwiftWrite3xP1TwoCycle], 1598 (instregex "VTB(L|X)3")>; 1599 def : InstRW<[SwiftWrite4xP1TwoCycle], 1600 (instregex "VTB(L|X)4")>; 1601 1602 // 4.2.33 Advanced SIMD, Transpose 1603 def : InstRW<[SwiftWriteP1FourCycle, SwiftWriteP1FourCycle, 1604 SwiftWriteP1TwoCycle/*RsrcOnly*/, SchedReadAdvance<2>], 1605 (instregex "VSWP", "VTRN", "VUZP", "VZIP")>; 1606 1607 // 4.2.34 Advanced SIMD and VFP, Floating Point 1608 def : InstRW<[SwiftWriteP0TwoCycle], (instregex "VABS(S|D)$", "VNEG(S|D)$")>; 1609 def : InstRW<[SwiftWriteP0FourCycle], 1610 (instregex "VCMP(D|S|ZD|ZS)$", "VCMPE(D|S|ZD|ZS)")>; 1611 def : InstRW<[SwiftWriteP0FourCycle], 1612 (instregex "VADD(S|f)", "VSUB(S|f)", "VABD", "VPADDf", "VMAX", "VMIN", "VPMAX", 1613 "VPMIN")>; 1614 def : InstRW<[SwiftWriteP0SixCycle], (instregex "VADDD$", "VSUBD$")>; 1615 def : InstRW<[SwiftWriteP1EightCycle], (instregex "VRECPS", "VRSQRTS")>; 1616 1617 // 4.2.35 Advanced SIMD and VFP, Multiply 1618 def : InstRW<[SwiftWriteP1FourCycle], 1619 (instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH", 1620 "VMULL", "VQDMULL")>; 1621 def : InstRW<[SwiftWriteP1SixCycle], 1622 (instregex "VMULD", "VNMULD")>; 1623 def : InstRW<[SwiftWriteP1FourCycle], 1624 (instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)", 1625 "VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>; 1626 def : InstRW<[SwiftWriteP1EightCycle], (instregex "VFMAfd", "VFMSfd")>; 1627 def : InstRW<[SwiftWriteP1TwelveCyc], (instregex "VFMAfq", "VFMSfq")>; 1628 1629 // 4.2.36 Advanced SIMD and VFP, Convert 1630 def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>; 1631 // Fixpoint conversions. 1632 def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; } 1633 1634 // 4.2.37 Advanced SIMD and VFP, Move 1635 def : InstRW<[SwiftWriteP0TwoCycle], 1636 (instregex "VMOVv", "VMOV(S|D)$", "VMOV(S|D)cc", 1637 "VMVNv", "VMVN(d|q)", "VMVN(S|D)cc", 1638 "FCONST(D|S)")>; 1639 def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VMOVN", "VMOVL")>; 1640 def : InstRW<[WriteSequence<[SwiftWriteP0FourCycle, SwiftWriteP1TwoCycle]>], 1641 (instregex "VQMOVN")>; 1642 def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VDUPLN", "VDUPf")>; 1643 def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>], 1644 (instregex "VDUP(8|16|32)")>; 1645 def : InstRW<[SwiftWriteP2ThreeCycle], (instregex "VMOVRS$")>; 1646 def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP0TwoCycle]>], 1647 (instregex "VMOVSR$", "VSETLN")>; 1648 def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2FourCycle], 1649 (instregex "VMOVRR(D|S)$")>; 1650 def : InstRW<[SwiftWriteP2FourCycle], (instregex "VMOVDRR$")>; 1651 def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>, 1652 WriteSequence<[SwiftWrite1Cycle, SwiftWriteP2FourCycle, 1653 SwiftWriteP1TwoCycle]>], 1654 (instregex "VMOVSRR$")>; 1655 def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle]>], 1656 (instregex "VGETLN(u|i)")>; 1657 def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle, 1658 SwiftWriteP01OneCycle]>], 1659 (instregex "VGETLNs")>; 1660 1661 // 4.2.38 Advanced SIMD and VFP, Move FPSCR 1662 // Serializing instructions. 1663 def SwiftWaitP0For15Cy : SchedWriteRes<[SwiftUnitP0]> { 1664 let Latency = 15; 1665 let ResourceCycles = [15]; 1666 } 1667 def SwiftWaitP1For15Cy : SchedWriteRes<[SwiftUnitP1]> { 1668 let Latency = 15; 1669 let ResourceCycles = [15]; 1670 } 1671 def SwiftWaitP2For15Cy : SchedWriteRes<[SwiftUnitP2]> { 1672 let Latency = 15; 1673 let ResourceCycles = [15]; 1674 } 1675 def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy], 1676 (instregex "VMRS")>; 1677 def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy], 1678 (instregex "VMSR")>; 1679 // Not serializing. 1680 def : InstRW<[SwiftWriteP0TwoCycle], (instregex "FMSTAT")>; 1681 1682 // 4.2.39 Advanced SIMD and VFP, Load Single Element 1683 def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDRD$", "VLDRS$")>; 1684 1685 // 4.2.40 Advanced SIMD and VFP, Store Single Element 1686 def : InstRW<[SwiftWriteLM4Cy], (instregex "VSTRD$", "VSTRS$")>; 1687 1688 // 4.2.41 Advanced SIMD and VFP, Load Multiple 1689 // 4.2.42 Advanced SIMD and VFP, Store Multiple 1690 1691 // Resource requirement for permuting, just reserves the resources. 1692 foreach Num = 1-28 in { 1693 def SwiftVLDMPerm#Num : SchedWriteRes<[SwiftUnitP1]> { 1694 let Latency = 0; 1695 let NumMicroOps = Num; 1696 let ResourceCycles = [Num]; 1697 } 1698 } 1699 1700 // Pre RA pseudos - load/store to a Q register as a D register pair. 1701 def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDMQIA$", "VSTMQIA$")>; 1702 1703 // Post RA not modelled accurately. We assume that register use of width 64 1704 // bit maps to a D register, 128 maps to a Q register. Not all different kinds 1705 // are accurately represented. 1706 def SwiftWriteVLDM : SchedWriteVariant<[ 1707 // Load of one S register. 1708 SchedVar<SwiftLMAddr1Pred, [SwiftWriteLM4Cy]>, 1709 // Load of one D register. 1710 SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo]>, 1711 // Load of 3 S register. 1712 SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy, 1713 SwiftWriteLM13CyNo, SwiftWriteP01OneCycle, 1714 SwiftVLDMPerm3]>, 1715 // Load of a Q register (not neccessarily true). We should not be mapping to 1716 // 4 S registers, either. 1717 SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo, 1718 SwiftWriteLM4CyNo, SwiftWriteLM4CyNo]>, 1719 // Load of 5 S registers. 1720 SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy, 1721 SwiftWriteLM13CyNo, SwiftWriteLM14CyNo, 1722 SwiftWriteLM17CyNo, SwiftWriteP01OneCycle, 1723 SwiftVLDMPerm5]>, 1724 // Load of 3 D registers. (Must also be able to handle s register list - 1725 // though, not accurate) 1726 SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy, 1727 SwiftWriteLM10Cy, SwiftWriteLM14CyNo, 1728 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, 1729 SwiftWriteP01OneCycle, SwiftVLDMPerm5]>, 1730 // Load of 7 S registers. 1731 SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy, 1732 SwiftWriteLM13Cy, SwiftWriteLM14CyNo, 1733 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo, 1734 SwiftWriteLM21CyNo, SwiftWriteP01OneCycle, 1735 SwiftVLDMPerm7]>, 1736 // Load of two Q registers. 1737 SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy, 1738 SwiftWriteLM13Cy, SwiftWriteLM13CyNo, 1739 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, 1740 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, 1741 SwiftWriteP01OneCycle, SwiftVLDMPerm2]>, 1742 // Load of 9 S registers. 1743 SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy, 1744 SwiftWriteLM13Cy, SwiftWriteLM14CyNo, 1745 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo, 1746 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, 1747 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle, 1748 SwiftVLDMPerm9]>, 1749 // Load of 5 D registers. 1750 SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy, 1751 SwiftWriteLM10Cy, SwiftWriteLM14Cy, 1752 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, 1753 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, 1754 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, 1755 SwiftWriteP01OneCycle, SwiftVLDMPerm5]>, 1756 // Inaccurate: reuse describtion from 9 S registers. 1757 SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy, 1758 SwiftWriteLM13Cy, SwiftWriteLM14CyNo, 1759 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo, 1760 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, 1761 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, 1762 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle, 1763 SwiftVLDMPerm9]>, 1764 // Load of three Q registers. 1765 SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy, 1766 SwiftWriteLM11Cy, SwiftWriteLM11Cy, 1767 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo, 1768 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo, 1769 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo, 1770 SwiftWriteLM11CyNo, SwiftWriteLM11CyNo, 1771 SwiftWriteP01OneCycle, SwiftVLDMPerm3]>, 1772 // Inaccurate: reuse describtion from 9 S registers. 1773 SchedVar<SwiftLMAddr13Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy, 1774 SwiftWriteLM13Cy, SwiftWriteLM14CyNo, 1775 SwiftWriteLM17CyNo, SwiftWriteLM18CyNo, 1776 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, 1777 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, 1778 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, 1779 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle, 1780 SwiftVLDMPerm9]>, 1781 // Load of 7 D registers inaccurate. 1782 SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy, 1783 SwiftWriteLM10Cy, SwiftWriteLM14Cy, 1784 SwiftWriteLM14Cy, SwiftWriteLM14CyNo, 1785 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, 1786 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, 1787 SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, 1788 SwiftWriteP01OneCycle, SwiftVLDMPerm7]>, 1789 SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy, 1790 SwiftWriteLM13Cy, SwiftWriteLM14Cy, 1791 SwiftWriteLM17Cy, SwiftWriteLM18CyNo, 1792 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, 1793 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, 1794 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, 1795 SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, 1796 SwiftWriteLM25CyNo, SwiftWriteP01OneCycle, 1797 SwiftVLDMPerm9]>, 1798 // Load of 4 Q registers. 1799 SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM7Cy, SwiftWriteLM10Cy, 1800 SwiftWriteLM11Cy, SwiftWriteLM14Cy, 1801 SwiftWriteLM15Cy, SwiftWriteLM18CyNo, 1802 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo, 1803 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo, 1804 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo, 1805 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo, 1806 SwiftWriteLM19CyNo, SwiftWriteLM22CyNo, 1807 SwiftWriteP01OneCycle, SwiftVLDMPerm4]>, 1808 // Unknow number of registers, just use resources for two registers. 1809 SchedVar<NoSchedPred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy, 1810 SwiftWriteLM13Cy, SwiftWriteLM13CyNo, 1811 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, 1812 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, 1813 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, 1814 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, 1815 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, 1816 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, 1817 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, 1818 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, 1819 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, 1820 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, 1821 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, 1822 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, 1823 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, 1824 SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, 1825 SwiftWriteP01OneCycle, SwiftVLDMPerm2]> 1826 ]> { let Variadic = 1; } 1827 1828 def : InstRW<[SwiftWriteVLDM], (instregex "VLDM[SD](IA|DB)$")>; 1829 1830 def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVLDM], 1831 (instregex "VLDM[SD](IA|DB)_UPD$")>; 1832 1833 def SwiftWriteVSTM : SchedWriteVariant<[ 1834 // One S register. 1835 SchedVar<SwiftLMAddr1Pred, [SwiftWriteSTM1]>, 1836 // One D register. 1837 SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM1]>, 1838 // Three S registers. 1839 SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM4]>, 1840 // Assume one Q register. 1841 SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM1]>, 1842 SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM6]>, 1843 // Assume three D registers. 1844 SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM4]>, 1845 SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM8]>, 1846 // Assume two Q registers. 1847 SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM3]>, 1848 SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM10]>, 1849 // Assume 5 D registers. 1850 SchedVar<SwiftLMAddr10Pred, [SwiftWriteSTM6]>, 1851 SchedVar<SwiftLMAddr11Pred, [SwiftWriteSTM12]>, 1852 // Asume three Q registers. 1853 SchedVar<SwiftLMAddr12Pred, [SwiftWriteSTM4]>, 1854 SchedVar<SwiftLMAddr13Pred, [SwiftWriteSTM14]>, 1855 // Assume 7 D registers. 1856 SchedVar<SwiftLMAddr14Pred, [SwiftWriteSTM8]>, 1857 SchedVar<SwiftLMAddr15Pred, [SwiftWriteSTM16]>, 1858 // Assume four Q registers. 1859 SchedVar<SwiftLMAddr16Pred, [SwiftWriteSTM5]>, 1860 // Asumme two Q registers. 1861 SchedVar<NoSchedPred, [SwiftWriteSTM3]> 1862 ]> { let Variadic = 1; } 1863 1864 def : InstRW<[SwiftWriteVSTM], (instregex "VSTM[SD](IA|DB)$")>; 1865 1866 def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVSTM], 1867 (instregex "VSTM[SD](IA|DB)_UPD")>; 1868 1869 // 4.2.43 Advanced SIMD, Element or Structure Load and Store 1870 def SwiftWrite2xP2FourCy : SchedWriteRes<[SwiftUnitP2]> { 1871 let Latency = 4; 1872 let ResourceCycles = [2]; 1873 } 1874 def SwiftWrite3xP2FourCy : SchedWriteRes<[SwiftUnitP2]> { 1875 let Latency = 4; 1876 let ResourceCycles = [3]; 1877 } 1878 foreach Num = 1-2 in { 1879 def SwiftExt#Num#xP0 : SchedWriteRes<[SwiftUnitP0]> { 1880 let Latency = 0; 1881 let NumMicroOps = Num; 1882 let ResourceCycles = [Num]; 1883 } 1884 } 1885 // VLDx 1886 // Multiple structures. 1887 // Single element structure loads. 1888 // We assume aligned. 1889 // Single/two register. 1890 def : InstRW<[SwiftWriteLM4Cy], (instregex "VLD1(d|q)(8|16|32|64)$")>; 1891 def : InstRW<[SwiftWriteLM4Cy, SwiftWriteP01OneCycle], 1892 (instregex "VLD1(d|q)(8|16|32|64)wb")>; 1893 // Three register. 1894 def : InstRW<[SwiftWrite3xP2FourCy], 1895 (instregex "VLD1(d|q)(8|16|32|64)T$", "VLD1d64TPseudo")>; 1896 def : InstRW<[SwiftWrite3xP2FourCy, SwiftWriteP01OneCycle], 1897 (instregex "VLD1(d|q)(8|16|32|64)Twb")>; 1898 /// Four Register. 1899 def : InstRW<[SwiftWrite2xP2FourCy], 1900 (instregex "VLD1(d|q)(8|16|32|64)Q$", "VLD1d64QPseudo")>; 1901 def : InstRW<[SwiftWrite2xP2FourCy, SwiftWriteP01OneCycle], 1902 (instregex "VLD1(d|q)(8|16|32|64)Qwb")>; 1903 // Two element structure loads. 1904 // Two/four register. 1905 def : InstRW<[SwiftWriteLM9Cy, SwiftExt2xP0, SwiftVLDMPerm2], 1906 (instregex "VLD2(d|q|b)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>; 1907 def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftExt2xP0, 1908 SwiftVLDMPerm2], 1909 (instregex "VLD2(d|q|b)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>; 1910 // Three element structure. 1911 def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo, 1912 SwiftVLDMPerm3, SwiftWrite3xP2FourCy], 1913 (instregex "VLD3(d|q)(8|16|32)$")>; 1914 def : InstRW<[SwiftWriteLM9Cy, SwiftVLDMPerm3, SwiftWrite3xP2FourCy], 1915 (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>; 1916 1917 def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo, 1918 SwiftWriteP01OneCycle, SwiftVLDMPerm3, SwiftWrite3xP2FourCy], 1919 (instregex "VLD3(d|q)(8|16|32)_UPD$")>; 1920 def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm3, 1921 SwiftWrite3xP2FourCy], 1922 (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>; 1923 // Four element structure loads. 1924 def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy, 1925 SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4, 1926 SwiftWrite3xP2FourCy], 1927 (instregex "VLD4(d|q)(8|16|32)$")>; 1928 def : InstRW<[SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4, 1929 SwiftWrite3xP2FourCy], 1930 (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>; 1931 def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy, 1932 SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0, 1933 SwiftVLDMPerm4, SwiftWrite3xP2FourCy], 1934 (instregex "VLD4(d|q)(8|16|32)_UPD")>; 1935 def : InstRW<[SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0, 1936 SwiftVLDMPerm4, SwiftWrite3xP2FourCy], 1937 (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; 1938 1939 // Single all/lane loads. 1940 // One element structure. 1941 def : InstRW<[SwiftWriteLM6Cy, SwiftVLDMPerm2], 1942 (instregex "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>; 1943 def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm2], 1944 (instregex "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)", 1945 "VLD1LNq(8|16|32)Pseudo_UPD")>; 1946 // Two element structure. 1947 def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftExt1xP0, SwiftVLDMPerm2], 1948 (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$", 1949 "VLD2LN(d|q)(8|16|32)Pseudo$")>; 1950 def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftWriteP01OneCycle, 1951 SwiftExt1xP0, SwiftVLDMPerm2], 1952 (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>; 1953 def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy, 1954 SwiftExt1xP0, SwiftVLDMPerm2], 1955 (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb")>; 1956 def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy, 1957 SwiftExt1xP0, SwiftVLDMPerm2], 1958 (instregex "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>; 1959 // Three element structure. 1960 def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy, SwiftExt1xP0, 1961 SwiftVLDMPerm3], 1962 (instregex "VLD3(DUP|LN)(d|q)(8|16|32)$", 1963 "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>; 1964 def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy, 1965 SwiftWriteP01OneCycle, SwiftExt1xP0, SwiftVLDMPerm3], 1966 (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>; 1967 def : InstRW<[SwiftWriteLM7Cy, SwiftWriteP01OneCycle, SwiftWriteLM8Cy, 1968 SwiftWriteLM8Cy, SwiftExt1xP0, SwiftVLDMPerm3], 1969 (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>; 1970 // Four element struture. 1971 def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo, 1972 SwiftWriteLM10CyNo, SwiftExt1xP0, SwiftVLDMPerm5], 1973 (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$", 1974 "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo$")>; 1975 def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo, 1976 SwiftWriteLM10CyNo, SwiftWriteP01OneCycle, SwiftExt1xP0, 1977 SwiftVLDMPerm5], 1978 (instregex "VLD4(DUP|LN)(d|q)(8|16|32)_UPD")>; 1979 def : InstRW<[SwiftWriteLM8Cy, SwiftWriteP01OneCycle, SwiftWriteLM9Cy, 1980 SwiftWriteLM10CyNo, SwiftWriteLM10CyNo, SwiftExt1xP0, 1981 SwiftVLDMPerm5], 1982 (instregex "VLD4(DUP|LN)(d|q)(8|16|32)Pseudo_UPD")>; 1983 // VSTx 1984 // Multiple structures. 1985 // Single element structure store. 1986 def : InstRW<[SwiftWrite1xP2], (instregex "VST1d(8|16|32|64)$")>; 1987 def : InstRW<[SwiftWrite2xP2], (instregex "VST1q(8|16|32|64)$")>; 1988 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2], 1989 (instregex "VST1d(8|16|32|64)wb")>; 1990 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2], 1991 (instregex "VST1q(8|16|32|64)wb")>; 1992 def : InstRW<[SwiftWrite3xP2], 1993 (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>; 1994 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite3xP2], 1995 (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>; 1996 def : InstRW<[SwiftWrite4xP2], 1997 (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>; 1998 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2], 1999 (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>; 2000 // Two element structure store. 2001 def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1], 2002 (instregex "VST2(d|b)(8|16|32)$")>; 2003 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1], 2004 (instregex "VST2(b|d)(8|16|32)wb")>; 2005 def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2], 2006 (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>; 2007 def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2], 2008 (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>; 2009 // Three element structure store. 2010 def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2], 2011 (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>; 2012 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2], 2013 (instregex "VST3(d|q)(8|16|32)_UPD", 2014 "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; 2015 // Four element structure store. 2016 def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2], 2017 (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>; 2018 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm4], 2019 (instregex "VST4(d|q)(8|16|32)_UPD", 2020 "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; 2021 // Single/all lane store. 2022 // One element structure. 2023 def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1], 2024 (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>; 2025 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1], 2026 (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>; 2027 // Two element structure. 2028 def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm2], 2029 (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>; 2030 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm2], 2031 (instregex "VST2LN(d|q)(8|16|32)_UPD", 2032 "VST2LN(d|q)(8|16|32)Pseudo_UPD")>; 2033 // Three element structure. 2034 def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2], 2035 (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>; 2036 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2], 2037 (instregex "VST3LN(d|q)(8|16|32)_UPD", 2038 "VST3LN(d|q)(8|16|32)Pseudo_UPD")>; 2039 // Four element structure. 2040 def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2], 2041 (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>; 2042 def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2, SwiftVLDMPerm2], 2043 (instregex "VST4LN(d|q)(8|16|32)_UPD", 2044 "VST4LN(d|q)(8|16|32)Pseudo_UPD")>; 2045 2046 // 4.2.44 VFP, Divide and Square Root 2047 def SwiftDiv17 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> { 2048 let NumMicroOps = 1; 2049 let Latency = 17; 2050 let ResourceCycles = [1, 15]; 2051 } 2052 def SwiftDiv32 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> { 2053 let NumMicroOps = 1; 2054 let Latency = 32; 2055 let ResourceCycles = [1, 30]; 2056 } 2057 def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>; 2058 def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>; 2059 2060 // Not specified. 2061 def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>; 2062 // Preload. 2063 def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0; 2064 let ResourceCycles = [0]; 2065 } 2066 2067 } 2068