Home | History | Annotate | Download | only in ARM
      1 //===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //===----------------------------------------------------------------------===//
     10 // Instruction scheduling annotations for out-of-order CPUs.
     11 // These annotations are independent of the itinerary class defined below.
     12 // Here we define the subtarget independent read/write per-operand resources.
     13 // The subtarget schedule definitions will then map these to the subtarget's
     14 // resource usages.
     15 // For example:
     16 // The instruction cycle timings table might contain an entry for an operation
     17 // like the following:
     18 // Rd <- ADD Rn, Rm, <shift> Rs
     19 //  Uops | Latency from register | Uops - resource requirements - latency
     20 //  2    | Rn: 1 Rm: 4 Rs: 4     | uop T0, Rm, Rs - P01 - 3
     21 //       |                       | uopc Rd, Rn, T0 -  P01 - 1
     22 // This is telling us that the result will be available in destination register
     23 // Rd after a minimum of three cycles after the result in Rm and Rs is available
     24 // and one cycle after the result in Rn is available. The micro-ops can execute
     25 // on resource P01.
     26 // To model this, we need to express that we need to dispatch two micro-ops,
     27 // that the resource P01 is needed and that the latency to Rn is different than
     28 // the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by
     29 // two.
     30 // We will do this by assigning (abstract) resources to register defs/uses.
     31 // ARMSchedule.td:
     32 //   def WriteALUsr : SchedWrite;
     33 //   def ReadAdvanceALUsr : ScheRead;
     34 //
     35 // ARMInstrInfo.td:
     36 //   def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault,
     37 //                           ReadDefault]> { ...}
     38 // ReadAdvance read resources allow us to define "pipeline by-passes" or
     39 // shorter latencies to certain registers as needed in the example above.
     40 // The "ReadDefault" can be omitted.
     41 // Next, the subtarget td file assigns resources to the abstract resources
     42 // defined here.
     43 // ARMScheduleSubtarget.td:
     44 //  // Resources.
     45 //  def P01 : ProcResource<3>; // ALU unit (3 of it).
     46 //  ...
     47 //  // Resource usages.
     48 //  def : WriteRes<WriteALUsr, [P01, P01]> {
     49 //    Latency = 4; // Latency of 4.
     50 //    NumMicroOps = 2; // Dispatch 2 micro-ops.
     51 //    // The two instances of resource P01 are occupied for one cycle. It is one
     52 //    // cycle because these resources happen to be pipelined.
     53 //    ResourceCycles = [1, 1];
     54 //  }
     55 //  def : ReadAdvance<ReadAdvanceALUsr, 3>;
     56 
     57 // Basic ALU operation.
     58 def WriteALU : SchedWrite;
     59 def ReadALU : SchedRead;
     60 
     61 // Basic ALU with shifts.
     62 def WriteALUsi : SchedWrite; // Shift by immediate.
     63 def WriteALUsr : SchedWrite; // Shift by register.
     64 def WriteALUSsr : SchedWrite; // Shift by register (flag setting).
     65 def ReadALUsr : SchedRead; // Some operands are read later.
     66 
     67 // Compares.
     68 def WriteCMP : SchedWrite;
     69 def WriteCMPsi : SchedWrite;
     70 def WriteCMPsr : SchedWrite;
     71 
     72 // Division.
     73 def WriteDiv : SchedWrite;
     74 
     75 // Loads.
     76 def WriteLd : SchedWrite;
     77 def WritePreLd : SchedWrite;
     78 
     79 // Branches.
     80 def WriteBr : SchedWrite;
     81 def WriteBrL : SchedWrite;
     82 def WriteBrTbl : SchedWrite;
     83 
     84 // Fixpoint conversions.
     85 def WriteCvtFP : SchedWrite;
     86 
     87 // Noop.
     88 def WriteNoop : SchedWrite;
     89 
     90 // Define TII for use in SchedVariant Predicates.
     91 def : PredicateProlog<[{
     92   const ARMBaseInstrInfo *TII =
     93     static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
     94   (void)TII;
     95 }]>;
     96 
     97 def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(*MI)}]>;
     98 
     99 //===----------------------------------------------------------------------===//
    100 // Instruction Itinerary classes used for ARM
    101 //
    102 def IIC_iALUx      : InstrItinClass;
    103 def IIC_iALUi      : InstrItinClass;
    104 def IIC_iALUr      : InstrItinClass;
    105 def IIC_iALUsi     : InstrItinClass;
    106 def IIC_iALUsir    : InstrItinClass;
    107 def IIC_iALUsr     : InstrItinClass;
    108 def IIC_iBITi      : InstrItinClass;
    109 def IIC_iBITr      : InstrItinClass;
    110 def IIC_iBITsi     : InstrItinClass;
    111 def IIC_iBITsr     : InstrItinClass;
    112 def IIC_iUNAr      : InstrItinClass;
    113 def IIC_iUNAsi     : InstrItinClass;
    114 def IIC_iEXTr      : InstrItinClass;
    115 def IIC_iEXTAr     : InstrItinClass;
    116 def IIC_iEXTAsr    : InstrItinClass;
    117 def IIC_iCMPi      : InstrItinClass;
    118 def IIC_iCMPr      : InstrItinClass;
    119 def IIC_iCMPsi     : InstrItinClass;
    120 def IIC_iCMPsr     : InstrItinClass;
    121 def IIC_iTSTi      : InstrItinClass;
    122 def IIC_iTSTr      : InstrItinClass;
    123 def IIC_iTSTsi     : InstrItinClass;
    124 def IIC_iTSTsr     : InstrItinClass;
    125 def IIC_iMOVi      : InstrItinClass;
    126 def IIC_iMOVr      : InstrItinClass;
    127 def IIC_iMOVsi     : InstrItinClass;
    128 def IIC_iMOVsr     : InstrItinClass;
    129 def IIC_iMOVix2    : InstrItinClass;
    130 def IIC_iMOVix2addpc : InstrItinClass;
    131 def IIC_iMOVix2ld  : InstrItinClass;
    132 def IIC_iMVNi      : InstrItinClass;
    133 def IIC_iMVNr      : InstrItinClass;
    134 def IIC_iMVNsi     : InstrItinClass;
    135 def IIC_iMVNsr     : InstrItinClass;
    136 def IIC_iCMOVi     : InstrItinClass;
    137 def IIC_iCMOVr     : InstrItinClass;
    138 def IIC_iCMOVsi    : InstrItinClass;
    139 def IIC_iCMOVsr    : InstrItinClass;
    140 def IIC_iCMOVix2   : InstrItinClass;
    141 def IIC_iMUL16     : InstrItinClass;
    142 def IIC_iMAC16     : InstrItinClass;
    143 def IIC_iMUL32     : InstrItinClass;
    144 def IIC_iMAC32     : InstrItinClass;
    145 def IIC_iMUL64     : InstrItinClass;
    146 def IIC_iMAC64     : InstrItinClass;
    147 def IIC_iDIV     : InstrItinClass;
    148 def IIC_iLoad_i    : InstrItinClass;
    149 def IIC_iLoad_r    : InstrItinClass;
    150 def IIC_iLoad_si   : InstrItinClass;
    151 def IIC_iLoad_iu   : InstrItinClass;
    152 def IIC_iLoad_ru   : InstrItinClass;
    153 def IIC_iLoad_siu  : InstrItinClass;
    154 def IIC_iLoad_bh_i   : InstrItinClass;
    155 def IIC_iLoad_bh_r   : InstrItinClass;
    156 def IIC_iLoad_bh_si  : InstrItinClass;
    157 def IIC_iLoad_bh_iu  : InstrItinClass;
    158 def IIC_iLoad_bh_ru  : InstrItinClass;
    159 def IIC_iLoad_bh_siu : InstrItinClass;
    160 def IIC_iLoad_d_i  : InstrItinClass;
    161 def IIC_iLoad_d_r  : InstrItinClass;
    162 def IIC_iLoad_d_ru : InstrItinClass;
    163 def IIC_iLoad_m    : InstrItinClass;
    164 def IIC_iLoad_mu   : InstrItinClass;
    165 def IIC_iLoad_mBr  : InstrItinClass;
    166 def IIC_iPop       : InstrItinClass;
    167 def IIC_iPop_Br    : InstrItinClass;
    168 def IIC_iLoadiALU  : InstrItinClass;
    169 def IIC_iStore_i   : InstrItinClass;
    170 def IIC_iStore_r   : InstrItinClass;
    171 def IIC_iStore_si  : InstrItinClass;
    172 def IIC_iStore_iu  : InstrItinClass;
    173 def IIC_iStore_ru  : InstrItinClass;
    174 def IIC_iStore_siu : InstrItinClass;
    175 def IIC_iStore_bh_i   : InstrItinClass;
    176 def IIC_iStore_bh_r   : InstrItinClass;
    177 def IIC_iStore_bh_si  : InstrItinClass;
    178 def IIC_iStore_bh_iu  : InstrItinClass;
    179 def IIC_iStore_bh_ru  : InstrItinClass;
    180 def IIC_iStore_bh_siu : InstrItinClass;
    181 def IIC_iStore_d_i   : InstrItinClass;
    182 def IIC_iStore_d_r   : InstrItinClass;
    183 def IIC_iStore_d_ru  : InstrItinClass;
    184 def IIC_iStore_m   : InstrItinClass;
    185 def IIC_iStore_mu  : InstrItinClass;
    186 def IIC_Preload    : InstrItinClass;
    187 def IIC_Br         : InstrItinClass;
    188 def IIC_fpSTAT     : InstrItinClass;
    189 def IIC_fpUNA16    : InstrItinClass;
    190 def IIC_fpUNA32    : InstrItinClass;
    191 def IIC_fpUNA64    : InstrItinClass;
    192 def IIC_fpCMP16    : InstrItinClass;
    193 def IIC_fpCMP32    : InstrItinClass;
    194 def IIC_fpCMP64    : InstrItinClass;
    195 def IIC_fpCVTSD    : InstrItinClass;
    196 def IIC_fpCVTDS    : InstrItinClass;
    197 def IIC_fpCVTSH    : InstrItinClass;
    198 def IIC_fpCVTHS    : InstrItinClass;
    199 def IIC_fpCVTIH    : InstrItinClass;
    200 def IIC_fpCVTIS    : InstrItinClass;
    201 def IIC_fpCVTID    : InstrItinClass;
    202 def IIC_fpCVTHI    : InstrItinClass;
    203 def IIC_fpCVTSI    : InstrItinClass;
    204 def IIC_fpCVTDI    : InstrItinClass;
    205 def IIC_fpMOVIS    : InstrItinClass;
    206 def IIC_fpMOVID    : InstrItinClass;
    207 def IIC_fpMOVSI    : InstrItinClass;
    208 def IIC_fpMOVDI    : InstrItinClass;
    209 def IIC_fpALU16    : InstrItinClass;
    210 def IIC_fpALU32    : InstrItinClass;
    211 def IIC_fpALU64    : InstrItinClass;
    212 def IIC_fpMUL16    : InstrItinClass;
    213 def IIC_fpMUL32    : InstrItinClass;
    214 def IIC_fpMUL64    : InstrItinClass;
    215 def IIC_fpMAC16    : InstrItinClass;
    216 def IIC_fpMAC32    : InstrItinClass;
    217 def IIC_fpMAC64    : InstrItinClass;
    218 def IIC_fpFMAC16   : InstrItinClass;
    219 def IIC_fpFMAC32   : InstrItinClass;
    220 def IIC_fpFMAC64   : InstrItinClass;
    221 def IIC_fpDIV16    : InstrItinClass;
    222 def IIC_fpDIV32    : InstrItinClass;
    223 def IIC_fpDIV64    : InstrItinClass;
    224 def IIC_fpSQRT16   : InstrItinClass;
    225 def IIC_fpSQRT32   : InstrItinClass;
    226 def IIC_fpSQRT64   : InstrItinClass;
    227 def IIC_fpLoad16   : InstrItinClass;
    228 def IIC_fpLoad32   : InstrItinClass;
    229 def IIC_fpLoad64   : InstrItinClass;
    230 def IIC_fpLoad_m   : InstrItinClass;
    231 def IIC_fpLoad_mu  : InstrItinClass;
    232 def IIC_fpStore16  : InstrItinClass;
    233 def IIC_fpStore32  : InstrItinClass;
    234 def IIC_fpStore64  : InstrItinClass;
    235 def IIC_fpStore_m  : InstrItinClass;
    236 def IIC_fpStore_mu : InstrItinClass;
    237 def IIC_VLD1       : InstrItinClass;
    238 def IIC_VLD1x2     : InstrItinClass;
    239 def IIC_VLD1x3     : InstrItinClass;
    240 def IIC_VLD1x4     : InstrItinClass;
    241 def IIC_VLD1u      : InstrItinClass;
    242 def IIC_VLD1x2u    : InstrItinClass;
    243 def IIC_VLD1x3u    : InstrItinClass;
    244 def IIC_VLD1x4u    : InstrItinClass;
    245 def IIC_VLD1ln     : InstrItinClass;
    246 def IIC_VLD1lnu    : InstrItinClass;
    247 def IIC_VLD1dup    : InstrItinClass;
    248 def IIC_VLD1dupu   : InstrItinClass;
    249 def IIC_VLD2       : InstrItinClass;
    250 def IIC_VLD2x2     : InstrItinClass;
    251 def IIC_VLD2u      : InstrItinClass;
    252 def IIC_VLD2x2u    : InstrItinClass;
    253 def IIC_VLD2ln     : InstrItinClass;
    254 def IIC_VLD2lnu    : InstrItinClass;
    255 def IIC_VLD2dup    : InstrItinClass;
    256 def IIC_VLD2dupu   : InstrItinClass;
    257 def IIC_VLD3       : InstrItinClass;
    258 def IIC_VLD3ln     : InstrItinClass;
    259 def IIC_VLD3u      : InstrItinClass;
    260 def IIC_VLD3lnu    : InstrItinClass;
    261 def IIC_VLD3dup    : InstrItinClass;
    262 def IIC_VLD3dupu   : InstrItinClass;
    263 def IIC_VLD4       : InstrItinClass;
    264 def IIC_VLD4ln     : InstrItinClass;
    265 def IIC_VLD4u      : InstrItinClass;
    266 def IIC_VLD4lnu    : InstrItinClass;
    267 def IIC_VLD4dup    : InstrItinClass;
    268 def IIC_VLD4dupu   : InstrItinClass;
    269 def IIC_VST1       : InstrItinClass;
    270 def IIC_VST1x2     : InstrItinClass;
    271 def IIC_VST1x3     : InstrItinClass;
    272 def IIC_VST1x4     : InstrItinClass;
    273 def IIC_VST1u      : InstrItinClass;
    274 def IIC_VST1x2u    : InstrItinClass;
    275 def IIC_VST1x3u    : InstrItinClass;
    276 def IIC_VST1x4u    : InstrItinClass;
    277 def IIC_VST1ln     : InstrItinClass;
    278 def IIC_VST1lnu    : InstrItinClass;
    279 def IIC_VST2       : InstrItinClass;
    280 def IIC_VST2x2     : InstrItinClass;
    281 def IIC_VST2u      : InstrItinClass;
    282 def IIC_VST2x2u    : InstrItinClass;
    283 def IIC_VST2ln     : InstrItinClass;
    284 def IIC_VST2lnu    : InstrItinClass;
    285 def IIC_VST3       : InstrItinClass;
    286 def IIC_VST3u      : InstrItinClass;
    287 def IIC_VST3ln     : InstrItinClass;
    288 def IIC_VST3lnu    : InstrItinClass;
    289 def IIC_VST4       : InstrItinClass;
    290 def IIC_VST4u      : InstrItinClass;
    291 def IIC_VST4ln     : InstrItinClass;
    292 def IIC_VST4lnu    : InstrItinClass;
    293 def IIC_VUNAD      : InstrItinClass;
    294 def IIC_VUNAQ      : InstrItinClass;
    295 def IIC_VBIND      : InstrItinClass;
    296 def IIC_VBINQ      : InstrItinClass;
    297 def IIC_VPBIND     : InstrItinClass;
    298 def IIC_VFMULD     : InstrItinClass;
    299 def IIC_VFMULQ     : InstrItinClass;
    300 def IIC_VMOV       : InstrItinClass;
    301 def IIC_VMOVImm    : InstrItinClass;
    302 def IIC_VMOVD      : InstrItinClass;
    303 def IIC_VMOVQ      : InstrItinClass;
    304 def IIC_VMOVIS     : InstrItinClass;
    305 def IIC_VMOVID     : InstrItinClass;
    306 def IIC_VMOVISL    : InstrItinClass;
    307 def IIC_VMOVSI     : InstrItinClass;
    308 def IIC_VMOVDI     : InstrItinClass;
    309 def IIC_VMOVN      : InstrItinClass;
    310 def IIC_VPERMD     : InstrItinClass;
    311 def IIC_VPERMQ     : InstrItinClass;
    312 def IIC_VPERMQ3    : InstrItinClass;
    313 def IIC_VMACD      : InstrItinClass;
    314 def IIC_VMACQ      : InstrItinClass;
    315 def IIC_VFMACD     : InstrItinClass;
    316 def IIC_VFMACQ     : InstrItinClass;
    317 def IIC_VRECSD     : InstrItinClass;
    318 def IIC_VRECSQ     : InstrItinClass;
    319 def IIC_VCNTiD     : InstrItinClass;
    320 def IIC_VCNTiQ     : InstrItinClass;
    321 def IIC_VUNAiD     : InstrItinClass;
    322 def IIC_VUNAiQ     : InstrItinClass;
    323 def IIC_VQUNAiD    : InstrItinClass;
    324 def IIC_VQUNAiQ    : InstrItinClass;
    325 def IIC_VBINiD     : InstrItinClass;
    326 def IIC_VBINiQ     : InstrItinClass;
    327 def IIC_VSUBiD     : InstrItinClass;
    328 def IIC_VSUBiQ     : InstrItinClass;
    329 def IIC_VBINi4D    : InstrItinClass;
    330 def IIC_VBINi4Q    : InstrItinClass;
    331 def IIC_VSUBi4D    : InstrItinClass;
    332 def IIC_VSUBi4Q    : InstrItinClass;
    333 def IIC_VABAD      : InstrItinClass;
    334 def IIC_VABAQ      : InstrItinClass;
    335 def IIC_VSHLiD     : InstrItinClass;
    336 def IIC_VSHLiQ     : InstrItinClass;
    337 def IIC_VSHLi4D    : InstrItinClass;
    338 def IIC_VSHLi4Q    : InstrItinClass;
    339 def IIC_VPALiD     : InstrItinClass;
    340 def IIC_VPALiQ     : InstrItinClass;
    341 def IIC_VMULi16D   : InstrItinClass;
    342 def IIC_VMULi32D   : InstrItinClass;
    343 def IIC_VMULi16Q   : InstrItinClass;
    344 def IIC_VMULi32Q   : InstrItinClass;
    345 def IIC_VMACi16D   : InstrItinClass;
    346 def IIC_VMACi32D   : InstrItinClass;
    347 def IIC_VMACi16Q   : InstrItinClass;
    348 def IIC_VMACi32Q   : InstrItinClass;
    349 def IIC_VEXTD      : InstrItinClass;
    350 def IIC_VEXTQ      : InstrItinClass;
    351 def IIC_VTB1       : InstrItinClass;
    352 def IIC_VTB2       : InstrItinClass;
    353 def IIC_VTB3       : InstrItinClass;
    354 def IIC_VTB4       : InstrItinClass;
    355 def IIC_VTBX1      : InstrItinClass;
    356 def IIC_VTBX2      : InstrItinClass;
    357 def IIC_VTBX3      : InstrItinClass;
    358 def IIC_VTBX4      : InstrItinClass;
    359 
    360 //===----------------------------------------------------------------------===//
    361 // Processor instruction itineraries.
    362 
    363 include "ARMScheduleV6.td"
    364 include "ARMScheduleA8.td"
    365 include "ARMScheduleA9.td"
    366 include "ARMScheduleSwift.td"
    367