1 //==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the itinerary class data for the ARM ThunderX T8X 11 // (T88, T81, T83) processors. 12 // Loosely based on Cortex-A53 which is somewhat similar. 13 // 14 //===----------------------------------------------------------------------===// 15 16 // ===---------------------------------------------------------------------===// 17 // The following definitions describe the simpler per-operand machine model. 18 // This works with MachineScheduler. See llvm/MC/MCSchedule.h for details. 19 20 // Cavium ThunderX T8X scheduling machine model. 21 def ThunderXT8XModel : SchedMachineModel { 22 let IssueWidth = 2; // 2 micro-ops dispatched per cycle. 23 let MicroOpBufferSize = 0; // ThunderX T88/T81/T83 are in-order. 24 let LoadLatency = 3; // Optimistic load latency. 25 let MispredictPenalty = 8; // Branch mispredict penalty. 26 let PostRAScheduler = 1; // Use PostRA scheduler. 27 let CompleteModel = 1; 28 29 list<Predicate> UnsupportedFeatures = [HasSVE]; 30 31 // FIXME: Remove when all errors have been fixed. 32 let FullInstRWOverlapCheck = 0; 33 } 34 35 // Modeling each pipeline with BufferSize == 0 since T8X is in-order. 36 def THXT8XUnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU 37 def THXT8XUnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC 38 def THXT8XUnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division 39 def THXT8XUnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store 40 def THXT8XUnitBr : ProcResource<1> { let BufferSize = 0; } // Branch 41 def THXT8XUnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU 42 def THXT8XUnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mul/Div/Sqrt 43 44 //===----------------------------------------------------------------------===// 45 // Subtarget-specific SchedWrite types mapping the ProcResources and 46 // latencies. 47 48 let SchedModel = ThunderXT8XModel in { 49 50 // ALU 51 def : WriteRes<WriteImm, [THXT8XUnitALU]> { let Latency = 1; } 52 def : WriteRes<WriteI, [THXT8XUnitALU]> { let Latency = 1; } 53 def : WriteRes<WriteISReg, [THXT8XUnitALU]> { let Latency = 2; } 54 def : WriteRes<WriteIEReg, [THXT8XUnitALU]> { let Latency = 2; } 55 def : WriteRes<WriteIS, [THXT8XUnitALU]> { let Latency = 2; } 56 def : WriteRes<WriteExtr, [THXT8XUnitALU]> { let Latency = 2; } 57 58 // MAC 59 def : WriteRes<WriteIM32, [THXT8XUnitMAC]> { 60 let Latency = 4; 61 let ResourceCycles = [1]; 62 } 63 64 def : WriteRes<WriteIM64, [THXT8XUnitMAC]> { 65 let Latency = 4; 66 let ResourceCycles = [1]; 67 } 68 69 // Div 70 def : WriteRes<WriteID32, [THXT8XUnitDiv]> { 71 let Latency = 12; 72 let ResourceCycles = [6]; 73 } 74 75 def : WriteRes<WriteID64, [THXT8XUnitDiv]> { 76 let Latency = 14; 77 let ResourceCycles = [8]; 78 } 79 80 // Load 81 def : WriteRes<WriteLD, [THXT8XUnitLdSt]> { let Latency = 3; } 82 def : WriteRes<WriteLDIdx, [THXT8XUnitLdSt]> { let Latency = 3; } 83 def : WriteRes<WriteLDHi, [THXT8XUnitLdSt]> { let Latency = 3; } 84 85 // Vector Load 86 def : WriteRes<WriteVLD, [THXT8XUnitLdSt]> { 87 let Latency = 8; 88 let ResourceCycles = [3]; 89 } 90 91 def THXT8XWriteVLD1 : SchedWriteRes<[THXT8XUnitLdSt]> { 92 let Latency = 6; 93 let ResourceCycles = [1]; 94 } 95 96 def THXT8XWriteVLD2 : SchedWriteRes<[THXT8XUnitLdSt]> { 97 let Latency = 11; 98 let ResourceCycles = [7]; 99 } 100 101 def THXT8XWriteVLD3 : SchedWriteRes<[THXT8XUnitLdSt]> { 102 let Latency = 12; 103 let ResourceCycles = [8]; 104 } 105 106 def THXT8XWriteVLD4 : SchedWriteRes<[THXT8XUnitLdSt]> { 107 let Latency = 13; 108 let ResourceCycles = [9]; 109 } 110 111 def THXT8XWriteVLD5 : SchedWriteRes<[THXT8XUnitLdSt]> { 112 let Latency = 13; 113 let ResourceCycles = [9]; 114 } 115 116 // Pre/Post Indexing 117 def : WriteRes<WriteAdr, []> { let Latency = 0; } 118 119 // Store 120 def : WriteRes<WriteST, [THXT8XUnitLdSt]> { let Latency = 1; } 121 def : WriteRes<WriteSTP, [THXT8XUnitLdSt]> { let Latency = 1; } 122 def : WriteRes<WriteSTIdx, [THXT8XUnitLdSt]> { let Latency = 1; } 123 def : WriteRes<WriteSTX, [THXT8XUnitLdSt]> { let Latency = 1; } 124 125 // Vector Store 126 def : WriteRes<WriteVST, [THXT8XUnitLdSt]>; 127 def THXT8XWriteVST1 : SchedWriteRes<[THXT8XUnitLdSt]>; 128 129 def THXT8XWriteVST2 : SchedWriteRes<[THXT8XUnitLdSt]> { 130 let Latency = 10; 131 let ResourceCycles = [9]; 132 } 133 134 def THXT8XWriteVST3 : SchedWriteRes<[THXT8XUnitLdSt]> { 135 let Latency = 11; 136 let ResourceCycles = [10]; 137 } 138 139 def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 140 141 // Branch 142 def : WriteRes<WriteBr, [THXT8XUnitBr]>; 143 def THXT8XWriteBR : SchedWriteRes<[THXT8XUnitBr]>; 144 def : WriteRes<WriteBrReg, [THXT8XUnitBr]>; 145 def THXT8XWriteBRR : SchedWriteRes<[THXT8XUnitBr]>; 146 def THXT8XWriteRET : SchedWriteRes<[THXT8XUnitALU]>; 147 def : WriteRes<WriteSys, [THXT8XUnitBr]>; 148 def : WriteRes<WriteBarrier, [THXT8XUnitBr]>; 149 def : WriteRes<WriteHint, [THXT8XUnitBr]>; 150 151 // FP ALU 152 def : WriteRes<WriteF, [THXT8XUnitFPALU]> { let Latency = 6; } 153 def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; } 154 def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; } 155 def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; } 156 def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; } 157 def : WriteRes<WriteV, [THXT8XUnitFPALU]> { let Latency = 6; } 158 159 // FP Mul, Div, Sqrt 160 def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; } 161 def : WriteRes<WriteFDiv, [THXT8XUnitFPMDS]> { 162 let Latency = 22; 163 let ResourceCycles = [19]; 164 } 165 166 def THXT8XWriteFMAC : SchedWriteRes<[THXT8XUnitFPMDS]> { let Latency = 10; } 167 168 def THXT8XWriteFDivSP : SchedWriteRes<[THXT8XUnitFPMDS]> { 169 let Latency = 12; 170 let ResourceCycles = [9]; 171 } 172 173 def THXT8XWriteFDivDP : SchedWriteRes<[THXT8XUnitFPMDS]> { 174 let Latency = 22; 175 let ResourceCycles = [19]; 176 } 177 178 def THXT8XWriteFSqrtSP : SchedWriteRes<[THXT8XUnitFPMDS]> { 179 let Latency = 17; 180 let ResourceCycles = [14]; 181 } 182 183 def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> { 184 let Latency = 31; 185 let ResourceCycles = [28]; 186 } 187 188 //===----------------------------------------------------------------------===// 189 // Subtarget-specific SchedRead types. 190 191 // No forwarding for these reads. 192 def : ReadAdvance<ReadExtrHi, 1>; 193 def : ReadAdvance<ReadAdrBase, 2>; 194 def : ReadAdvance<ReadVLD, 2>; 195 196 // FIXME: This needs more targeted benchmarking. 197 // ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable 198 // operands are needed one cycle later if and only if they are to be 199 // shifted. Otherwise, they too are needed two cycles later. This same 200 // ReadAdvance applies to Extended registers as well, even though there is 201 // a separate SchedPredicate for them. 202 def : ReadAdvance<ReadI, 2, [WriteImm, WriteI, 203 WriteISReg, WriteIEReg, WriteIS, 204 WriteID32, WriteID64, 205 WriteIM32, WriteIM64]>; 206 def THXT8XReadShifted : SchedReadAdvance<1, [WriteImm, WriteI, 207 WriteISReg, WriteIEReg, WriteIS, 208 WriteID32, WriteID64, 209 WriteIM32, WriteIM64]>; 210 def THXT8XReadNotShifted : SchedReadAdvance<2, [WriteImm, WriteI, 211 WriteISReg, WriteIEReg, WriteIS, 212 WriteID32, WriteID64, 213 WriteIM32, WriteIM64]>; 214 def THXT8XReadISReg : SchedReadVariant<[ 215 SchedVar<RegShiftedPred, [THXT8XReadShifted]>, 216 SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>; 217 def : SchedAlias<ReadISReg, THXT8XReadISReg>; 218 219 def THXT8XReadIEReg : SchedReadVariant<[ 220 SchedVar<RegExtendedPred, [THXT8XReadShifted]>, 221 SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>; 222 def : SchedAlias<ReadIEReg, THXT8XReadIEReg>; 223 224 // MAC - Operands are generally needed one cycle later in the MAC pipe. 225 // Accumulator operands are needed two cycles later. 226 def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI, 227 WriteISReg, WriteIEReg, WriteIS, 228 WriteID32, WriteID64, 229 WriteIM32, WriteIM64]>; 230 def : ReadAdvance<ReadIMA, 2, [WriteImm, WriteI, 231 WriteISReg, WriteIEReg, WriteIS, 232 WriteID32, WriteID64, 233 WriteIM32, WriteIM64]>; 234 235 // Div 236 def : ReadAdvance<ReadID, 1, [WriteImm, WriteI, 237 WriteISReg, WriteIEReg, WriteIS, 238 WriteID32, WriteID64, 239 WriteIM32, WriteIM64]>; 240 241 //===----------------------------------------------------------------------===// 242 // Subtarget-specific InstRW. 243 244 //--- 245 // Branch 246 //--- 247 def : InstRW<[THXT8XWriteBR], (instregex "^B$")>; 248 def : InstRW<[THXT8XWriteBR], (instregex "^BL$")>; 249 def : InstRW<[THXT8XWriteBR], (instregex "^B..$")>; 250 def : InstRW<[THXT8XWriteBR], (instregex "^CBNZ")>; 251 def : InstRW<[THXT8XWriteBR], (instregex "^CBZ")>; 252 def : InstRW<[THXT8XWriteBR], (instregex "^TBNZ")>; 253 def : InstRW<[THXT8XWriteBR], (instregex "^TBZ")>; 254 def : InstRW<[THXT8XWriteBRR], (instregex "^BR$")>; 255 def : InstRW<[THXT8XWriteBRR], (instregex "^BLR$")>; 256 257 //--- 258 // Ret 259 //--- 260 def : InstRW<[THXT8XWriteRET], (instregex "^RET$")>; 261 262 //--- 263 // Miscellaneous 264 //--- 265 def : InstRW<[WriteI], (instrs COPY)>; 266 267 //--- 268 // Vector Loads 269 //--- 270 def : InstRW<[THXT8XWriteVLD1], (instregex "LD1i(8|16|32|64)$")>; 271 def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 272 def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 273 def : InstRW<[THXT8XWriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 274 def : InstRW<[THXT8XWriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 275 def : InstRW<[THXT8XWriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 276 def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; 277 def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 278 def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 279 def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 280 def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 281 def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 282 283 def : InstRW<[THXT8XWriteVLD1], (instregex "LD2i(8|16|32|64)$")>; 284 def : InstRW<[THXT8XWriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 285 def : InstRW<[THXT8XWriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>; 286 def : InstRW<[THXT8XWriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>; 287 def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>; 288 def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; 289 def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; 290 def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; 291 292 def : InstRW<[THXT8XWriteVLD2], (instregex "LD3i(8|16|32|64)$")>; 293 def : InstRW<[THXT8XWriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 294 def : InstRW<[THXT8XWriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; 295 def : InstRW<[THXT8XWriteVLD3], (instregex "LD3Threev(2d)$")>; 296 def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; 297 def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 298 def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; 299 def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; 300 301 def : InstRW<[THXT8XWriteVLD2], (instregex "LD4i(8|16|32|64)$")>; 302 def : InstRW<[THXT8XWriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 303 def : InstRW<[THXT8XWriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; 304 def : InstRW<[THXT8XWriteVLD4], (instregex "LD4Fourv(2d)$")>; 305 def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; 306 def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 307 def : InstRW<[THXT8XWriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; 308 def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; 309 310 //--- 311 // Vector Stores 312 //--- 313 def : InstRW<[THXT8XWriteVST1], (instregex "ST1i(8|16|32|64)$")>; 314 def : InstRW<[THXT8XWriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 315 def : InstRW<[THXT8XWriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 316 def : InstRW<[THXT8XWriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 317 def : InstRW<[THXT8XWriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 318 def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; 319 def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 320 def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 321 def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 322 def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 323 324 def : InstRW<[THXT8XWriteVST1], (instregex "ST2i(8|16|32|64)$")>; 325 def : InstRW<[THXT8XWriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>; 326 def : InstRW<[THXT8XWriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>; 327 def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; 328 def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; 329 def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; 330 331 def : InstRW<[THXT8XWriteVST2], (instregex "ST3i(8|16|32|64)$")>; 332 def : InstRW<[THXT8XWriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; 333 def : InstRW<[THXT8XWriteVST2], (instregex "ST3Threev(2d)$")>; 334 def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; 335 def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; 336 def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; 337 338 def : InstRW<[THXT8XWriteVST2], (instregex "ST4i(8|16|32|64)$")>; 339 def : InstRW<[THXT8XWriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; 340 def : InstRW<[THXT8XWriteVST2], (instregex "ST4Fourv(2d)$")>; 341 def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; 342 def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; 343 def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; 344 345 //--- 346 // Floating Point MAC, DIV, SQRT 347 //--- 348 def : InstRW<[THXT8XWriteFMAC], (instregex "^FN?M(ADD|SUB).*")>; 349 def : InstRW<[THXT8XWriteFMAC], (instregex "^FML(A|S).*")>; 350 def : InstRW<[THXT8XWriteFDivSP], (instrs FDIVSrr)>; 351 def : InstRW<[THXT8XWriteFDivDP], (instrs FDIVDrr)>; 352 def : InstRW<[THXT8XWriteFDivSP], (instregex "^FDIVv.*32$")>; 353 def : InstRW<[THXT8XWriteFDivDP], (instregex "^FDIVv.*64$")>; 354 def : InstRW<[THXT8XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; 355 def : InstRW<[THXT8XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; 356 357 } 358