1 //===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 //===----------------------------------------------------------------------===// 10 // Instruction scheduling annotations for out-of-order CPUs. 11 // These annotations are independent of the itinerary class defined below. 12 // Here we define the subtarget independent read/write per-operand resources. 13 // The subtarget schedule definitions will then map these to the subtarget's 14 // resource usages. 15 // For example: 16 // The instruction cycle timings table might contain an entry for an operation 17 // like the following: 18 // Rd <- ADD Rn, Rm, <shift> Rs 19 // Uops | Latency from register | Uops - resource requirements - latency 20 // 2 | Rn: 1 Rm: 4 Rs: 4 | uop T0, Rm, Rs - P01 - 3 21 // | | uopc Rd, Rn, T0 - P01 - 1 22 // This is telling us that the result will be available in destination register 23 // Rd after a minimum of three cycles after the result in Rm and Rs is available 24 // and one cycle after the result in Rn is available. The micro-ops can execute 25 // on resource P01. 26 // To model this, we need to express that we need to dispatch two micro-ops, 27 // that the resource P01 is needed and that the latency to Rn is different than 28 // the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by 29 // two. 30 // We will do this by assigning (abstract) resources to register defs/uses. 31 // ARMSchedule.td: 32 // def WriteALUsr : SchedWrite; 33 // def ReadAdvanceALUsr : ScheRead; 34 // 35 // ARMInstrInfo.td: 36 // def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault, 37 // ReadDefault]> { ...} 38 // ReadAdvance read resources allow us to define "pipeline by-passes" or 39 // shorter latencies to certain registers as needed in the example above. 40 // The "ReadDefault" can be omitted. 41 // Next, the subtarget td file assigns resources to the abstract resources 42 // defined here. 43 // ARMScheduleSubtarget.td: 44 // // Resources. 45 // def P01 : ProcResource<3>; // ALU unit (3 of it). 46 // ... 47 // // Resource usages. 48 // def : WriteRes<WriteALUsr, [P01, P01]> { 49 // Latency = 4; // Latency of 4. 50 // NumMicroOps = 2; // Dispatch 2 micro-ops. 51 // // The two instances of resource P01 are occupied for one cycle. It is one 52 // // cycle because these resources happen to be pipelined. 53 // ResourceCycles = [1, 1]; 54 // } 55 // def : ReadAdvance<ReadAdvanceALUsr, 3>; 56 57 // Basic ALU operation. 58 def WriteALU : SchedWrite; 59 def ReadALU : SchedRead; 60 61 // Basic ALU with shifts. 62 def WriteALUsi : SchedWrite; // Shift by immediate. 63 def WriteALUsr : SchedWrite; // Shift by register. 64 def WriteALUSsr : SchedWrite; // Shift by register (flag setting). 65 def ReadALUsr : SchedRead; // Some operands are read later. 66 67 // Compares. 68 def WriteCMP : SchedWrite; 69 def WriteCMPsi : SchedWrite; 70 def WriteCMPsr : SchedWrite; 71 72 // Division. 73 def WriteDiv : SchedWrite; 74 75 // Loads. 76 def WriteLd : SchedWrite; 77 def WritePreLd : SchedWrite; 78 79 // Branches. 80 def WriteBr : SchedWrite; 81 def WriteBrL : SchedWrite; 82 def WriteBrTbl : SchedWrite; 83 84 // Fixpoint conversions. 85 def WriteCvtFP : SchedWrite; 86 87 // Noop. 88 def WriteNoop : SchedWrite; 89 90 // Define TII for use in SchedVariant Predicates. 91 def : PredicateProlog<[{ 92 const ARMBaseInstrInfo *TII = 93 static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo()); 94 (void)TII; 95 }]>; 96 97 def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(*MI)}]>; 98 99 //===----------------------------------------------------------------------===// 100 // Instruction Itinerary classes used for ARM 101 // 102 def IIC_iALUx : InstrItinClass; 103 def IIC_iALUi : InstrItinClass; 104 def IIC_iALUr : InstrItinClass; 105 def IIC_iALUsi : InstrItinClass; 106 def IIC_iALUsir : InstrItinClass; 107 def IIC_iALUsr : InstrItinClass; 108 def IIC_iBITi : InstrItinClass; 109 def IIC_iBITr : InstrItinClass; 110 def IIC_iBITsi : InstrItinClass; 111 def IIC_iBITsr : InstrItinClass; 112 def IIC_iUNAr : InstrItinClass; 113 def IIC_iUNAsi : InstrItinClass; 114 def IIC_iEXTr : InstrItinClass; 115 def IIC_iEXTAr : InstrItinClass; 116 def IIC_iEXTAsr : InstrItinClass; 117 def IIC_iCMPi : InstrItinClass; 118 def IIC_iCMPr : InstrItinClass; 119 def IIC_iCMPsi : InstrItinClass; 120 def IIC_iCMPsr : InstrItinClass; 121 def IIC_iTSTi : InstrItinClass; 122 def IIC_iTSTr : InstrItinClass; 123 def IIC_iTSTsi : InstrItinClass; 124 def IIC_iTSTsr : InstrItinClass; 125 def IIC_iMOVi : InstrItinClass; 126 def IIC_iMOVr : InstrItinClass; 127 def IIC_iMOVsi : InstrItinClass; 128 def IIC_iMOVsr : InstrItinClass; 129 def IIC_iMOVix2 : InstrItinClass; 130 def IIC_iMOVix2addpc : InstrItinClass; 131 def IIC_iMOVix2ld : InstrItinClass; 132 def IIC_iMVNi : InstrItinClass; 133 def IIC_iMVNr : InstrItinClass; 134 def IIC_iMVNsi : InstrItinClass; 135 def IIC_iMVNsr : InstrItinClass; 136 def IIC_iCMOVi : InstrItinClass; 137 def IIC_iCMOVr : InstrItinClass; 138 def IIC_iCMOVsi : InstrItinClass; 139 def IIC_iCMOVsr : InstrItinClass; 140 def IIC_iCMOVix2 : InstrItinClass; 141 def IIC_iMUL16 : InstrItinClass; 142 def IIC_iMAC16 : InstrItinClass; 143 def IIC_iMUL32 : InstrItinClass; 144 def IIC_iMAC32 : InstrItinClass; 145 def IIC_iMUL64 : InstrItinClass; 146 def IIC_iMAC64 : InstrItinClass; 147 def IIC_iDIV : InstrItinClass; 148 def IIC_iLoad_i : InstrItinClass; 149 def IIC_iLoad_r : InstrItinClass; 150 def IIC_iLoad_si : InstrItinClass; 151 def IIC_iLoad_iu : InstrItinClass; 152 def IIC_iLoad_ru : InstrItinClass; 153 def IIC_iLoad_siu : InstrItinClass; 154 def IIC_iLoad_bh_i : InstrItinClass; 155 def IIC_iLoad_bh_r : InstrItinClass; 156 def IIC_iLoad_bh_si : InstrItinClass; 157 def IIC_iLoad_bh_iu : InstrItinClass; 158 def IIC_iLoad_bh_ru : InstrItinClass; 159 def IIC_iLoad_bh_siu : InstrItinClass; 160 def IIC_iLoad_d_i : InstrItinClass; 161 def IIC_iLoad_d_r : InstrItinClass; 162 def IIC_iLoad_d_ru : InstrItinClass; 163 def IIC_iLoad_m : InstrItinClass; 164 def IIC_iLoad_mu : InstrItinClass; 165 def IIC_iLoad_mBr : InstrItinClass; 166 def IIC_iPop : InstrItinClass; 167 def IIC_iPop_Br : InstrItinClass; 168 def IIC_iLoadiALU : InstrItinClass; 169 def IIC_iStore_i : InstrItinClass; 170 def IIC_iStore_r : InstrItinClass; 171 def IIC_iStore_si : InstrItinClass; 172 def IIC_iStore_iu : InstrItinClass; 173 def IIC_iStore_ru : InstrItinClass; 174 def IIC_iStore_siu : InstrItinClass; 175 def IIC_iStore_bh_i : InstrItinClass; 176 def IIC_iStore_bh_r : InstrItinClass; 177 def IIC_iStore_bh_si : InstrItinClass; 178 def IIC_iStore_bh_iu : InstrItinClass; 179 def IIC_iStore_bh_ru : InstrItinClass; 180 def IIC_iStore_bh_siu : InstrItinClass; 181 def IIC_iStore_d_i : InstrItinClass; 182 def IIC_iStore_d_r : InstrItinClass; 183 def IIC_iStore_d_ru : InstrItinClass; 184 def IIC_iStore_m : InstrItinClass; 185 def IIC_iStore_mu : InstrItinClass; 186 def IIC_Preload : InstrItinClass; 187 def IIC_Br : InstrItinClass; 188 def IIC_fpSTAT : InstrItinClass; 189 def IIC_fpUNA16 : InstrItinClass; 190 def IIC_fpUNA32 : InstrItinClass; 191 def IIC_fpUNA64 : InstrItinClass; 192 def IIC_fpCMP16 : InstrItinClass; 193 def IIC_fpCMP32 : InstrItinClass; 194 def IIC_fpCMP64 : InstrItinClass; 195 def IIC_fpCVTSD : InstrItinClass; 196 def IIC_fpCVTDS : InstrItinClass; 197 def IIC_fpCVTSH : InstrItinClass; 198 def IIC_fpCVTHS : InstrItinClass; 199 def IIC_fpCVTIH : InstrItinClass; 200 def IIC_fpCVTIS : InstrItinClass; 201 def IIC_fpCVTID : InstrItinClass; 202 def IIC_fpCVTHI : InstrItinClass; 203 def IIC_fpCVTSI : InstrItinClass; 204 def IIC_fpCVTDI : InstrItinClass; 205 def IIC_fpMOVIS : InstrItinClass; 206 def IIC_fpMOVID : InstrItinClass; 207 def IIC_fpMOVSI : InstrItinClass; 208 def IIC_fpMOVDI : InstrItinClass; 209 def IIC_fpALU16 : InstrItinClass; 210 def IIC_fpALU32 : InstrItinClass; 211 def IIC_fpALU64 : InstrItinClass; 212 def IIC_fpMUL16 : InstrItinClass; 213 def IIC_fpMUL32 : InstrItinClass; 214 def IIC_fpMUL64 : InstrItinClass; 215 def IIC_fpMAC16 : InstrItinClass; 216 def IIC_fpMAC32 : InstrItinClass; 217 def IIC_fpMAC64 : InstrItinClass; 218 def IIC_fpFMAC16 : InstrItinClass; 219 def IIC_fpFMAC32 : InstrItinClass; 220 def IIC_fpFMAC64 : InstrItinClass; 221 def IIC_fpDIV16 : InstrItinClass; 222 def IIC_fpDIV32 : InstrItinClass; 223 def IIC_fpDIV64 : InstrItinClass; 224 def IIC_fpSQRT16 : InstrItinClass; 225 def IIC_fpSQRT32 : InstrItinClass; 226 def IIC_fpSQRT64 : InstrItinClass; 227 def IIC_fpLoad16 : InstrItinClass; 228 def IIC_fpLoad32 : InstrItinClass; 229 def IIC_fpLoad64 : InstrItinClass; 230 def IIC_fpLoad_m : InstrItinClass; 231 def IIC_fpLoad_mu : InstrItinClass; 232 def IIC_fpStore16 : InstrItinClass; 233 def IIC_fpStore32 : InstrItinClass; 234 def IIC_fpStore64 : InstrItinClass; 235 def IIC_fpStore_m : InstrItinClass; 236 def IIC_fpStore_mu : InstrItinClass; 237 def IIC_VLD1 : InstrItinClass; 238 def IIC_VLD1x2 : InstrItinClass; 239 def IIC_VLD1x3 : InstrItinClass; 240 def IIC_VLD1x4 : InstrItinClass; 241 def IIC_VLD1u : InstrItinClass; 242 def IIC_VLD1x2u : InstrItinClass; 243 def IIC_VLD1x3u : InstrItinClass; 244 def IIC_VLD1x4u : InstrItinClass; 245 def IIC_VLD1ln : InstrItinClass; 246 def IIC_VLD1lnu : InstrItinClass; 247 def IIC_VLD1dup : InstrItinClass; 248 def IIC_VLD1dupu : InstrItinClass; 249 def IIC_VLD2 : InstrItinClass; 250 def IIC_VLD2x2 : InstrItinClass; 251 def IIC_VLD2u : InstrItinClass; 252 def IIC_VLD2x2u : InstrItinClass; 253 def IIC_VLD2ln : InstrItinClass; 254 def IIC_VLD2lnu : InstrItinClass; 255 def IIC_VLD2dup : InstrItinClass; 256 def IIC_VLD2dupu : InstrItinClass; 257 def IIC_VLD3 : InstrItinClass; 258 def IIC_VLD3ln : InstrItinClass; 259 def IIC_VLD3u : InstrItinClass; 260 def IIC_VLD3lnu : InstrItinClass; 261 def IIC_VLD3dup : InstrItinClass; 262 def IIC_VLD3dupu : InstrItinClass; 263 def IIC_VLD4 : InstrItinClass; 264 def IIC_VLD4ln : InstrItinClass; 265 def IIC_VLD4u : InstrItinClass; 266 def IIC_VLD4lnu : InstrItinClass; 267 def IIC_VLD4dup : InstrItinClass; 268 def IIC_VLD4dupu : InstrItinClass; 269 def IIC_VST1 : InstrItinClass; 270 def IIC_VST1x2 : InstrItinClass; 271 def IIC_VST1x3 : InstrItinClass; 272 def IIC_VST1x4 : InstrItinClass; 273 def IIC_VST1u : InstrItinClass; 274 def IIC_VST1x2u : InstrItinClass; 275 def IIC_VST1x3u : InstrItinClass; 276 def IIC_VST1x4u : InstrItinClass; 277 def IIC_VST1ln : InstrItinClass; 278 def IIC_VST1lnu : InstrItinClass; 279 def IIC_VST2 : InstrItinClass; 280 def IIC_VST2x2 : InstrItinClass; 281 def IIC_VST2u : InstrItinClass; 282 def IIC_VST2x2u : InstrItinClass; 283 def IIC_VST2ln : InstrItinClass; 284 def IIC_VST2lnu : InstrItinClass; 285 def IIC_VST3 : InstrItinClass; 286 def IIC_VST3u : InstrItinClass; 287 def IIC_VST3ln : InstrItinClass; 288 def IIC_VST3lnu : InstrItinClass; 289 def IIC_VST4 : InstrItinClass; 290 def IIC_VST4u : InstrItinClass; 291 def IIC_VST4ln : InstrItinClass; 292 def IIC_VST4lnu : InstrItinClass; 293 def IIC_VUNAD : InstrItinClass; 294 def IIC_VUNAQ : InstrItinClass; 295 def IIC_VBIND : InstrItinClass; 296 def IIC_VBINQ : InstrItinClass; 297 def IIC_VPBIND : InstrItinClass; 298 def IIC_VFMULD : InstrItinClass; 299 def IIC_VFMULQ : InstrItinClass; 300 def IIC_VMOV : InstrItinClass; 301 def IIC_VMOVImm : InstrItinClass; 302 def IIC_VMOVD : InstrItinClass; 303 def IIC_VMOVQ : InstrItinClass; 304 def IIC_VMOVIS : InstrItinClass; 305 def IIC_VMOVID : InstrItinClass; 306 def IIC_VMOVISL : InstrItinClass; 307 def IIC_VMOVSI : InstrItinClass; 308 def IIC_VMOVDI : InstrItinClass; 309 def IIC_VMOVN : InstrItinClass; 310 def IIC_VPERMD : InstrItinClass; 311 def IIC_VPERMQ : InstrItinClass; 312 def IIC_VPERMQ3 : InstrItinClass; 313 def IIC_VMACD : InstrItinClass; 314 def IIC_VMACQ : InstrItinClass; 315 def IIC_VFMACD : InstrItinClass; 316 def IIC_VFMACQ : InstrItinClass; 317 def IIC_VRECSD : InstrItinClass; 318 def IIC_VRECSQ : InstrItinClass; 319 def IIC_VCNTiD : InstrItinClass; 320 def IIC_VCNTiQ : InstrItinClass; 321 def IIC_VUNAiD : InstrItinClass; 322 def IIC_VUNAiQ : InstrItinClass; 323 def IIC_VQUNAiD : InstrItinClass; 324 def IIC_VQUNAiQ : InstrItinClass; 325 def IIC_VBINiD : InstrItinClass; 326 def IIC_VBINiQ : InstrItinClass; 327 def IIC_VSUBiD : InstrItinClass; 328 def IIC_VSUBiQ : InstrItinClass; 329 def IIC_VBINi4D : InstrItinClass; 330 def IIC_VBINi4Q : InstrItinClass; 331 def IIC_VSUBi4D : InstrItinClass; 332 def IIC_VSUBi4Q : InstrItinClass; 333 def IIC_VABAD : InstrItinClass; 334 def IIC_VABAQ : InstrItinClass; 335 def IIC_VSHLiD : InstrItinClass; 336 def IIC_VSHLiQ : InstrItinClass; 337 def IIC_VSHLi4D : InstrItinClass; 338 def IIC_VSHLi4Q : InstrItinClass; 339 def IIC_VPALiD : InstrItinClass; 340 def IIC_VPALiQ : InstrItinClass; 341 def IIC_VMULi16D : InstrItinClass; 342 def IIC_VMULi32D : InstrItinClass; 343 def IIC_VMULi16Q : InstrItinClass; 344 def IIC_VMULi32Q : InstrItinClass; 345 def IIC_VMACi16D : InstrItinClass; 346 def IIC_VMACi32D : InstrItinClass; 347 def IIC_VMACi16Q : InstrItinClass; 348 def IIC_VMACi32Q : InstrItinClass; 349 def IIC_VEXTD : InstrItinClass; 350 def IIC_VEXTQ : InstrItinClass; 351 def IIC_VTB1 : InstrItinClass; 352 def IIC_VTB2 : InstrItinClass; 353 def IIC_VTB3 : InstrItinClass; 354 def IIC_VTB4 : InstrItinClass; 355 def IIC_VTBX1 : InstrItinClass; 356 def IIC_VTBX2 : InstrItinClass; 357 def IIC_VTBX3 : InstrItinClass; 358 def IIC_VTBX4 : InstrItinClass; 359 360 //===----------------------------------------------------------------------===// 361 // Processor instruction itineraries. 362 363 include "ARMScheduleV6.td" 364 include "ARMScheduleA8.td" 365 include "ARMScheduleA9.td" 366 include "ARMScheduleSwift.td" 367