1 //=- AArch64SchedM1.td - Samsung Exynos-M1 Scheduling Defs ---*- tablegen -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the machine model for Samsung Exynos-M1 to support 11 // instruction scheduling and other instruction cost heuristics. 12 // 13 //===----------------------------------------------------------------------===// 14 15 //===----------------------------------------------------------------------===// 16 // The Exynos-M1 is a traditional superscalar microprocessor with a 17 // 4-wide in-order stage for decode and dispatch and a wider issue stage. 18 // The execution units and loads and stores are out-of-order. 19 20 def ExynosM1Model : SchedMachineModel { 21 let IssueWidth = 4; // Up to 4 uops per cycle. 22 let MicroOpBufferSize = 96; // ROB size. 23 let LoopMicroOpBufferSize = 24; // Based on the instruction queue size. 24 let LoadLatency = 4; // Optimistic load cases. 25 let MispredictPenalty = 14; // Minimum branch misprediction penalty. 26 let CompleteModel = 0; // Use the default model otherwise. 27 } 28 29 //===----------------------------------------------------------------------===// 30 // Define each kind of processor resource and number available on the Exynos-M1, 31 // which has 9 pipelines, each with its own queue with out-of-order dispatch. 32 33 def M1UnitA : ProcResource<2>; // Simple integer 34 def M1UnitC : ProcResource<1>; // Simple and complex integer 35 def M1UnitB : ProcResource<2>; // Branch 36 def M1UnitL : ProcResource<1>; // Load 37 def M1UnitS : ProcResource<1>; // Store 38 def M1PipeF0 : ProcResource<1>; // FP #0 39 def M1PipeF1 : ProcResource<1>; // FP #1 40 41 let Super = M1PipeF0 in { 42 def M1UnitFMAC : ProcResource<1>; // FP multiplication 43 def M1UnitFCVT : ProcResource<1>; // FP conversion 44 def M1UnitNAL0 : ProcResource<1>; // Simple vector. 45 def M1UnitNMISC : ProcResource<1>; // Miscellanea 46 def M1UnitNCRYPT : ProcResource<1>; // Cryptographic 47 } 48 49 let Super = M1PipeF1 in { 50 def M1UnitFADD : ProcResource<1>; // Simple FP 51 let BufferSize = 1 in 52 def M1UnitFVAR : ProcResource<1>; // FP division & square root (serialized) 53 def M1UnitNAL1 : ProcResource<1>; // Simple vector. 54 def M1UnitFST : ProcResource<1>; // FP store 55 } 56 57 let SchedModel = ExynosM1Model in { 58 def M1UnitALU : ProcResGroup<[M1UnitA, 59 M1UnitC]>; // All simple integer. 60 def M1UnitNALU : ProcResGroup<[M1UnitNAL0, 61 M1UnitNAL1]>; // All simple vector. 62 } 63 64 let SchedModel = ExynosM1Model in { 65 66 //===----------------------------------------------------------------------===// 67 // Coarse scheduling model for the Exynos-M1. 68 69 // Branch instructions. 70 // TODO: Non-conditional direct branches take zero cycles and units. 71 def : WriteRes<WriteBr, [M1UnitB]> { let Latency = 1; } 72 def : WriteRes<WriteBrReg, [M1UnitC]> { let Latency = 1; } 73 // TODO: Branch and link is much different. 74 75 // Arithmetic and logical integer instructions. 76 def : WriteRes<WriteI, [M1UnitALU]> { let Latency = 1; } 77 // TODO: Shift over 3 and some extensions take 2 cycles. 78 def : WriteRes<WriteISReg, [M1UnitALU]> { let Latency = 1; } 79 def : WriteRes<WriteIEReg, [M1UnitALU]> { let Latency = 1; } 80 def : WriteRes<WriteIS, [M1UnitALU]> { let Latency = 1; } 81 82 // Move instructions. 83 def : WriteRes<WriteImm, [M1UnitALU]> { let Latency = 1; } 84 85 // Divide and multiply instructions. 86 // TODO: Division blocks the divider inside C. 87 def : WriteRes<WriteID32, [M1UnitC]> { let Latency = 13; } 88 def : WriteRes<WriteID64, [M1UnitC]> { let Latency = 21; } 89 // TODO: Long multiplication take 5 cycles and also the ALU. 90 // TODO: Multiplication with accumulation can be advanced. 91 def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; } 92 // TODO: 64-bit multiplication has a throughput of 1/2. 93 def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4; } 94 95 // Miscellaneous instructions. 96 def : WriteRes<WriteExtr, [M1UnitALU, 97 M1UnitALU]> { let Latency = 2; } 98 99 // TODO: The latency for the post or pre register is 1 cycle. 100 def : WriteRes<WriteAdr, []> { let Latency = 0; } 101 102 // Load instructions. 103 def : WriteRes<WriteLD, [M1UnitL]> { let Latency = 4; } 104 // TODO: Extended address requires also the ALU. 105 def : WriteRes<WriteLDIdx, [M1UnitL]> { let Latency = 5; } 106 def : WriteRes<WriteLDHi, [M1UnitALU]> { let Latency = 4; } 107 108 // Store instructions. 109 def : WriteRes<WriteST, [M1UnitS]> { let Latency = 1; } 110 // TODO: Extended address requires also the ALU. 111 def : WriteRes<WriteSTIdx, [M1UnitS]> { let Latency = 1; } 112 def : WriteRes<WriteSTP, [M1UnitS]> { let Latency = 1; } 113 def : WriteRes<WriteSTX, [M1UnitS]> { let Latency = 1; } 114 115 // FP data instructions. 116 def : WriteRes<WriteF, [M1UnitFADD]> { let Latency = 3; } 117 // TODO: FCCMP is much different. 118 def : WriteRes<WriteFCmp, [M1UnitNMISC]> { let Latency = 4; } 119 // TODO: DP takes longer. 120 def : WriteRes<WriteFDiv, [M1UnitFVAR]> { let Latency = 15; } 121 // TODO: MACC takes longer. 122 def : WriteRes<WriteFMul, [M1UnitFMAC]> { let Latency = 4; } 123 124 // FP miscellaneous instructions. 125 // TODO: Conversion between register files is much different. 126 def : WriteRes<WriteFCvt, [M1UnitFCVT]> { let Latency = 3; } 127 def : WriteRes<WriteFImm, [M1UnitNALU]> { let Latency = 1; } 128 // TODO: Copy from FPR to GPR is much different. 129 def : WriteRes<WriteFCopy, [M1UnitS]> { let Latency = 4; } 130 131 // FP load instructions. 132 // TODO: ASIMD loads are much different. 133 def : WriteRes<WriteVLD, [M1UnitL]> { let Latency = 5; } 134 135 // FP store instructions. 136 // TODO: ASIMD stores are much different. 137 def : WriteRes<WriteVST, [M1UnitS, M1UnitFST]> { let Latency = 1; } 138 139 // ASIMD FP instructions. 140 // TODO: Other operations are much different. 141 def : WriteRes<WriteV, [M1UnitFADD]> { let Latency = 3; } 142 143 // Other miscellaneous instructions. 144 def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 145 def : WriteRes<WriteBarrier, []> { let Latency = 1; } 146 def : WriteRes<WriteHint, []> { let Latency = 1; } 147 def : WriteRes<WriteSys, []> { let Latency = 1; } 148 149 //===----------------------------------------------------------------------===// 150 // Generic fast forwarding. 151 152 // TODO: Add FP register forwarding rules. 153 154 def : ReadAdvance<ReadI, 0>; 155 def : ReadAdvance<ReadISReg, 0>; 156 def : ReadAdvance<ReadIEReg, 0>; 157 def : ReadAdvance<ReadIM, 0>; 158 // Integer multiply-accumulate. 159 // TODO: The forwarding for WriteIM64 saves actually 3 cycles. 160 def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>; 161 def : ReadAdvance<ReadID, 0>; 162 def : ReadAdvance<ReadExtrHi, 0>; 163 def : ReadAdvance<ReadAdrBase, 0>; 164 def : ReadAdvance<ReadVLD, 0>; 165 166 //===----------------------------------------------------------------------===// 167 // Finer scheduling model for the Exynos-M1. 168 169 def M1WriteNEONA : SchedWriteRes<[M1UnitNALU, 170 M1UnitNALU, 171 M1UnitFADD]> { let Latency = 9; } 172 def M1WriteNEONB : SchedWriteRes<[M1UnitNALU, 173 M1UnitFST]> { let Latency = 5; } 174 def M1WriteNEONC : SchedWriteRes<[M1UnitNALU, 175 M1UnitFST]> { let Latency = 6; } 176 def M1WriteNEOND : SchedWriteRes<[M1UnitNALU, 177 M1UnitFST, 178 M1UnitL]> { let Latency = 10; } 179 def M1WriteNEONE : SchedWriteRes<[M1UnitFCVT, 180 M1UnitFST]> { let Latency = 8; } 181 def M1WriteNEONF : SchedWriteRes<[M1UnitFCVT, 182 M1UnitFST, 183 M1UnitL]> { let Latency = 13; } 184 def M1WriteNEONG : SchedWriteRes<[M1UnitNMISC, 185 M1UnitFST]> { let Latency = 6; } 186 def M1WriteNEONH : SchedWriteRes<[M1UnitNALU, 187 M1UnitFST]> { let Latency = 3; } 188 def M1WriteNEONI : SchedWriteRes<[M1UnitFST, 189 M1UnitL]> { let Latency = 9; } 190 def M1WriteNEONJ : SchedWriteRes<[M1UnitNMISC, 191 M1UnitFMAC]> { let Latency = 6; } 192 def M1WriteNEONK : SchedWriteRes<[M1UnitNMISC, 193 M1UnitFMAC]> { let Latency = 7; } 194 def M1WriteALU1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; } 195 def M1WriteB : SchedWriteRes<[M1UnitB]> { let Latency = 1; } 196 // FIXME: This is the worst case, conditional branch and link. 197 def M1WriteBL : SchedWriteRes<[M1UnitB, 198 M1UnitALU]> { let Latency = 1; } 199 // FIXME: This is the worst case, when using LR. 200 def M1WriteBLR : SchedWriteRes<[M1UnitB, 201 M1UnitALU, 202 M1UnitALU]> { let Latency = 2; } 203 def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; } 204 def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; } 205 def M1WriteFADD3 : SchedWriteRes<[M1UnitFADD]> { let Latency = 3; } 206 def M1WriteFCVT3 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 3; } 207 def M1WriteFCVT4 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 4; } 208 def M1WriteFMAC4 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 4; } 209 def M1WriteFMAC5 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 5; } 210 def M1WriteFVAR15 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 15; } 211 def M1WriteFVAR23 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 23; } 212 def M1WriteNALU1 : SchedWriteRes<[M1UnitNALU]> { let Latency = 1; } 213 def M1WriteNALU2 : SchedWriteRes<[M1UnitNALU]> { let Latency = 2; } 214 def M1WriteNAL11 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 1; } 215 def M1WriteNAL12 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 2; } 216 def M1WriteNAL13 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 3; } 217 def M1WriteNCRYPT1 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; } 218 def M1WriteNCRYPT5 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 5; } 219 def M1WriteNMISC1 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 1; } 220 def M1WriteNMISC2 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 2; } 221 def M1WriteNMISC3 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 3; } 222 def M1WriteNMISC4 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 4; } 223 def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; } 224 def M1WriteTB : SchedWriteRes<[M1UnitC, 225 M1UnitALU]> { let Latency = 2; } 226 227 // Branch instructions 228 def : InstRW<[M1WriteB ], (instrs Bcc)>; 229 def : InstRW<[M1WriteBL], (instrs BL)>; 230 def : InstRW<[M1WriteBLR], (instrs BLR)>; 231 def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>; 232 def : InstRW<[M1WriteTB], (instregex "^TBN?Z[WX]")>; 233 234 // Arithmetic and logical integer instructions. 235 def : InstRW<[M1WriteALU1], (instrs COPY)>; 236 237 // Divide and multiply instructions. 238 239 // Miscellaneous instructions. 240 241 // Load instructions. 242 243 // Store instructions. 244 245 // FP data instructions. 246 def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)[DS]r")>; 247 def : InstRW<[M1WriteFADD3], (instregex "^F(ADD|SUB)[DS]rr")>; 248 def : InstRW<[M1WriteNEONG], (instregex "^FCCMPE?[DS]rr")>; 249 def : InstRW<[M1WriteNMISC4], (instregex "^FCMPE?[DS]r")>; 250 def : InstRW<[M1WriteFVAR15], (instrs FDIVSrr)>; 251 def : InstRW<[M1WriteFVAR23], (instrs FDIVDrr)>; 252 def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN).+rr")>; 253 def : InstRW<[M1WriteFMAC4], (instregex "^FN?MUL[DS]rr")>; 254 def : InstRW<[M1WriteFMAC5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>; 255 def : InstRW<[M1WriteFCVT3], (instregex "^FRINT.+r")>; 256 def : InstRW<[M1WriteNEONH], (instregex "^FCSEL[DS]rrr")>; 257 def : InstRW<[M1WriteFVAR15], (instrs FSQRTSr)>; 258 def : InstRW<[M1WriteFVAR23], (instrs FSQRTDr)>; 259 260 // FP miscellaneous instructions. 261 def : InstRW<[M1WriteFCVT3], (instregex "^FCVT[DS][DS]r")>; 262 def : InstRW<[M1WriteNEONF], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>; 263 def : InstRW<[M1WriteNEONE], (instregex "^[SU]CVTF[SU]")>; 264 def : InstRW<[M1WriteNALU1], (instregex "^FMOV[DS][ir]")>; 265 def : InstRW<[M1WriteS4], (instregex "^FMOV[WX][DS](High)?r")>; 266 def : InstRW<[M1WriteNEONI], (instregex "^FMOV[DS][WX](High)?r")>; 267 268 // FP load instructions. 269 270 // FP store instructions. 271 272 // ASIMD instructions. 273 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]ABAL?v")>; 274 def : InstRW<[M1WriteNMISC1], (instregex "^[SU]ABDL?v")>; 275 def : InstRW<[M1WriteNMISC1], (instregex "^(SQ)?ABSv")>; 276 def : InstRW<[M1WriteNMISC1], (instregex "^SQNEGv")>; 277 def : InstRW<[M1WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>; 278 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?H(ADD|SUB)v")>; 279 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?AD[AD](L|LP|P|W)V?2?v")>; 280 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?SUB[LW]2?v")>; 281 def : InstRW<[M1WriteNMISC3], (instregex "^R?(ADD|SUB)HN?2?v")>; 282 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]+Q(ADD|SUB)v")>; 283 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]RHADDv")>; 284 def : InstRW<[M1WriteNMISC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>; 285 def : InstRW<[M1WriteNALU1], (instregex "^CMTSTv")>; 286 def : InstRW<[M1WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>; 287 def : InstRW<[M1WriteNMISC1], (instregex "^[SU](MIN|MAX)v")>; 288 def : InstRW<[M1WriteNMISC2], (instregex "^[SU](MIN|MAX)Pv")>; 289 def : InstRW<[M1WriteNMISC3], (instregex "^[SU](MIN|MAX)Vv")>; 290 def : InstRW<[M1WriteNMISC4], (instregex "^(MUL|SQR?DMULH)v")>; 291 def : InstRW<[M1WriteNMISC4], (instregex "^ML[AS]v")>; 292 def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>; 293 def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>; 294 def : InstRW<[M1WriteNAL13], (instregex "^(S|SR|U|UR)SRAv")>; 295 def : InstRW<[M1WriteNALU1], (instregex "^[SU]?SH(L|LL|R)2?v")>; 296 def : InstRW<[M1WriteNALU1], (instregex "^S[LR]Iv")>; 297 def : InstRW<[M1WriteNAL13], (instregex "^[SU]?(Q|QR|R)?SHR(N|U|UN)?2?v")>; 298 def : InstRW<[M1WriteNAL13], (instregex "^[SU](Q|QR|R)SHLU?v")>; 299 300 // ASIMD FP instructions. 301 def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)v")>; 302 def : InstRW<[M1WriteNMISC3], (instregex "^F(ABD|ADD|SUB)v")>; 303 def : InstRW<[M1WriteNEONA], (instregex "^FADDP")>; 304 def : InstRW<[M1WriteNMISC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>; 305 def : InstRW<[M1WriteFCVT3], (instregex "^[FVSU]CVTX?[AFLMNPZ][SU]?(_Int)?v")>; 306 def : InstRW<[M1WriteFVAR15], (instregex "FDIVv.f32")>; 307 def : InstRW<[M1WriteFVAR23], (instregex "FDIVv2f64")>; 308 def : InstRW<[M1WriteFVAR15], (instregex "FSQRTv.f32")>; 309 def : InstRW<[M1WriteFVAR23], (instregex "FSQRTv2f64")>; 310 def : InstRW<[M1WriteNMISC1], (instregex "^F(MAX|MIN)(NM)?V?v")>; 311 def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN)(NM)?Pv")>; 312 def : InstRW<[M1WriteNEONJ], (instregex "^FMULX?v.i")>; 313 def : InstRW<[M1WriteFMAC4], (instregex "^FMULX?v.f")>; 314 def : InstRW<[M1WriteNEONK], (instregex "^FML[AS]v.i")>; 315 def : InstRW<[M1WriteFMAC5], (instregex "^FML[AS]v.f")>; 316 def : InstRW<[M1WriteFCVT3], (instregex "^FRINT[AIMNPXZ]v")>; 317 318 // ASIMD miscellaneous instructions. 319 def : InstRW<[M1WriteNALU1], (instregex "^RBITv")>; 320 def : InstRW<[M1WriteNAL11], (instregex "^(BIF|BIT|BSL)v")>; 321 def : InstRW<[M1WriteNALU1], (instregex "^CPY")>; 322 def : InstRW<[M1WriteNEONB], (instregex "^DUPv.+gpr")>; 323 def : InstRW<[M1WriteNALU1], (instregex "^DUPv.+lane")>; 324 def : InstRW<[M1WriteNAL13], (instregex "^[SU]?Q?XTU?Nv")>; 325 def : InstRW<[M1WriteNEONC], (instregex "^INSv.+gpr")>; 326 def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev")>; 327 def : InstRW<[M1WriteNMISC1], (instregex "^[FU](RECP|RSQRT)Xv")>; 328 def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)Sv")>; 329 def : InstRW<[M1WriteNALU1], (instregex "^REV(16|32|64)v")>; 330 def : InstRW<[M1WriteNAL11], (instregex "^TB[LX]v8i8One")>; 331 def : InstRW<[WriteSequence<[M1WriteNAL11], 2>], 332 (instregex "^TB[LX]v8i8Two")>; 333 def : InstRW<[WriteSequence<[M1WriteNAL11], 3>], 334 (instregex "^TB[LX]v8i8Three")>; 335 def : InstRW<[WriteSequence<[M1WriteNAL11], 4>], 336 (instregex "^TB[LX]v8i8Four")>; 337 def : InstRW<[M1WriteNAL12], (instregex "^TB[LX]v16i8One")>; 338 def : InstRW<[WriteSequence<[M1WriteNAL12], 2>], 339 (instregex "^TB[LX]v16i8Two")>; 340 def : InstRW<[WriteSequence<[M1WriteNAL12], 3>], 341 (instregex "^TB[LX]v16i8Three")>; 342 def : InstRW<[WriteSequence<[M1WriteNAL12], 4>], 343 (instregex "^TB[LX]v16i8Four")>; 344 def : InstRW<[M1WriteNEOND], (instregex "^[SU]MOVv")>; 345 def : InstRW<[M1WriteNALU1], (instregex "^INSv.+lane")>; 346 def : InstRW<[M1WriteNALU1], (instregex "^(TRN|UZP)[12](v8i8|v4i16|v2i32)")>; 347 def : InstRW<[M1WriteNALU2], (instregex "^(TRN|UZP)[12](v16i8|v8i16|v4i32|v2i64)")>; 348 def : InstRW<[M1WriteNALU1], (instregex "^ZIP[12]v")>; 349 350 // ASIMD load instructions. 351 352 // ASIMD store instructions. 353 354 // Cryptography instructions. 355 def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; } 356 def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>; 357 def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AES")>; 358 359 def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>; 360 def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>; 361 def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA1[CMP]")>; 362 def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA256SU0")>; 363 def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA256(H|SU1)")>; 364 365 // CRC instructions. 366 def : InstRW<[M1WriteC2], (instregex "^CRC32")>; 367 368 } // SchedModel = ExynosM1Model 369