1 //=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the itinerary class data for the ARM Cortex A8 processors. 11 // 12 //===----------------------------------------------------------------------===// 13 14 // 15 // Scheduling information derived from "Cortex-A8 Technical Reference Manual". 16 // Functional Units. 17 def A8_Pipe0 : FuncUnit; // pipeline 0 18 def A8_Pipe1 : FuncUnit; // pipeline 1 19 def A8_LSPipe : FuncUnit; // Load / store pipeline 20 def A8_NPipe : FuncUnit; // NEON ALU/MUL pipe 21 def A8_NLSPipe : FuncUnit; // NEON LS pipe 22 // 23 // Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1 24 // 25 def CortexA8Itineraries : ProcessorItineraries< 26 [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe], 27 [], [ 28 // Two fully-pipelined integer ALU pipelines 29 // 30 // No operand cycles 31 InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>, 32 // 33 // Binary Instructions that produce a result 34 InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 35 InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>, 36 InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, 37 InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>, 38 InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>, 39 // 40 // Bitwise Instructions that produce a result 41 InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 42 InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>, 43 InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, 44 InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>, 45 // 46 // Unary Instructions that produce a result 47 InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 48 InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, 49 // 50 // Zero and sign extension instructions 51 InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, 52 InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, 53 InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>, 54 // 55 // Compare instructions 56 InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, 57 InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 58 InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, 59 InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, 60 // 61 // Test instructions 62 InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, 63 InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 64 InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, 65 InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, 66 // 67 // Move instructions, unconditional 68 InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>, 69 InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, 70 InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, 71 InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>, 72 InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 73 InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, 74 InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 75 InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 76 InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>, 77 InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 78 InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 79 InstrStage<1, [A8_LSPipe]>], [5]>, 80 // 81 // Move instructions, conditional 82 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, 83 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, 84 InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, 85 InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, 86 InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 87 InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>, 88 // 89 // MVN instructions 90 InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>, 91 InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, 92 InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, 93 InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>, 94 95 // Integer multiply pipeline 96 // Result written in E5, but that is relative to the last cycle of multicycle, 97 // so we use 6 for those cases 98 // 99 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>, 100 InstrItinData<IIC_iMAC16 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, 101 InstrItinData<IIC_iMUL32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>, 102 InstrItinData<IIC_iMAC32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, 103 InstrItinData<IIC_iMUL64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, 104 InstrItinData<IIC_iMAC64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, 105 106 // Integer load pipeline 107 // 108 // Immediate offset 109 InstrItinData<IIC_iLoad_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 110 InstrStage<1, [A8_LSPipe]>], [3, 1]>, 111 InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 112 InstrStage<1, [A8_LSPipe]>], [3, 1]>, 113 InstrItinData<IIC_iLoad_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 114 InstrStage<1, [A8_LSPipe]>], [3, 1]>, 115 // 116 // Register offset 117 InstrItinData<IIC_iLoad_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 118 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 119 InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 120 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 121 InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 122 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 123 // 124 // Scaled register offset, issues over 2 cycles 125 // FIXME: lsl by 2 takes 1 cycle. 126 InstrItinData<IIC_iLoad_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 127 InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>, 128 InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 129 InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>, 130 // 131 // Immediate offset with update 132 InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 133 InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>, 134 InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 135 InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>, 136 // 137 // Register offset with update 138 InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 139 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>, 140 InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 141 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>, 142 InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 143 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>, 144 // 145 // Scaled register offset with update, issues over 2 cycles 146 InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 147 InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>, 148 InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 149 InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>, 150 // 151 // Load multiple, def is the 5th operand. Pipeline 0 only. 152 // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. 153 InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>, 154 InstrStage<2, [A8_LSPipe]>], 155 [1, 1, 1, 1, 3], [], -1>, // dynamic uops 156 // 157 // Load multiple + update, defs are the 1st and 5th operands. 158 InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>, 159 InstrStage<3, [A8_LSPipe]>], 160 [2, 1, 1, 1, 3], [], -1>, // dynamic uops 161 // 162 // Load multiple plus branch 163 InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>, 164 InstrStage<3, [A8_LSPipe]>, 165 InstrStage<1, [A8_Pipe0, A8_Pipe1]>], 166 [1, 2, 1, 1, 3], [], -1>, // dynamic uops 167 // 168 // Pop, def is the 3rd operand. 169 InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>, 170 InstrStage<3, [A8_LSPipe]>], 171 [1, 1, 3], [], -1>, // dynamic uops 172 // 173 // Push, def is the 3th operand. 174 InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>, 175 InstrStage<3, [A8_LSPipe]>, 176 InstrStage<1, [A8_Pipe0, A8_Pipe1]>], 177 [1, 1, 3], [], -1>, // dynamic uops 178 // 179 // iLoadi + iALUr for t2LDRpci_pic. 180 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 181 InstrStage<1, [A8_LSPipe]>, 182 InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>, 183 184 185 // Integer store pipeline 186 // 187 // Immediate offset 188 InstrItinData<IIC_iStore_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 189 InstrStage<1, [A8_LSPipe]>], [3, 1]>, 190 InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 191 InstrStage<1, [A8_LSPipe]>], [3, 1]>, 192 InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 193 InstrStage<1, [A8_LSPipe]>], [3, 1]>, 194 // 195 // Register offset 196 InstrItinData<IIC_iStore_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 197 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 198 InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 199 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 200 InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 201 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 202 // 203 // Scaled register offset, issues over 2 cycles 204 InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 205 InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>, 206 InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 207 InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>, 208 // 209 // Immediate offset with update 210 InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 211 InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>, 212 InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 213 InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>, 214 // 215 // Register offset with update 216 InstrItinData<IIC_iStore_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 217 InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>, 218 InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 219 InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>, 220 InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 221 InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>, 222 // 223 // Scaled register offset with update, issues over 2 cycles 224 InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 225 InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>, 226 InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 227 InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>, 228 // 229 // Store multiple. Pipeline 0 only. 230 // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. 231 InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>, 232 InstrStage<2, [A8_LSPipe]>], 233 [], [], -1>, // dynamic uops 234 // 235 // Store multiple + update 236 InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>, 237 InstrStage<2, [A8_LSPipe]>], 238 [2], [], -1>, // dynamic uops 239 // 240 // Preload 241 InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 242 243 // Branch 244 // 245 // no delay slots, so the latency of a branch is unimportant 246 InstrItinData<IIC_Br , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>, 247 248 // VFP 249 // Issue through integer pipeline, and execute in NEON unit. We assume 250 // RunFast mode so that NFP pipeline is used for single-precision when 251 // possible. 252 // 253 // FP Special Register to Integer Register File Move 254 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 255 InstrStage<1, [A8_NLSPipe]>], [20]>, 256 // 257 // Single-precision FP Unary 258 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 259 InstrStage<1, [A8_NPipe]>], [7, 1]>, 260 // 261 // Double-precision FP Unary 262 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 263 InstrStage<4, [A8_NPipe], 0>, 264 InstrStage<4, [A8_NLSPipe]>], [4, 1]>, 265 // 266 // Single-precision FP Compare 267 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 268 InstrStage<1, [A8_NPipe]>], [1, 1]>, 269 // 270 // Double-precision FP Compare 271 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 272 InstrStage<4, [A8_NPipe], 0>, 273 InstrStage<4, [A8_NLSPipe]>], [4, 1]>, 274 // 275 // Single to Double FP Convert 276 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 277 InstrStage<7, [A8_NPipe], 0>, 278 InstrStage<7, [A8_NLSPipe]>], [7, 1]>, 279 // 280 // Double to Single FP Convert 281 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 282 InstrStage<5, [A8_NPipe], 0>, 283 InstrStage<5, [A8_NLSPipe]>], [5, 1]>, 284 // 285 // Single-Precision FP to Integer Convert 286 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 287 InstrStage<1, [A8_NPipe]>], [7, 1]>, 288 // 289 // Double-Precision FP to Integer Convert 290 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 291 InstrStage<8, [A8_NPipe], 0>, 292 InstrStage<8, [A8_NLSPipe]>], [8, 1]>, 293 // 294 // Integer to Single-Precision FP Convert 295 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 296 InstrStage<1, [A8_NPipe]>], [7, 1]>, 297 // 298 // Integer to Double-Precision FP Convert 299 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 300 InstrStage<8, [A8_NPipe], 0>, 301 InstrStage<8, [A8_NLSPipe]>], [8, 1]>, 302 // 303 // Single-precision FP ALU 304 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 305 InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, 306 // 307 // Double-precision FP ALU 308 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 309 InstrStage<9, [A8_NPipe], 0>, 310 InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>, 311 // 312 // Single-precision FP Multiply 313 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 314 InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, 315 // 316 // Double-precision FP Multiply 317 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 318 InstrStage<11, [A8_NPipe], 0>, 319 InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>, 320 // 321 // Single-precision FP MAC 322 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 323 InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>, 324 // 325 // Double-precision FP MAC 326 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 327 InstrStage<19, [A8_NPipe], 0>, 328 InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, 329 // 330 // Single-precision Fused FP MAC 331 InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 332 InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>, 333 // 334 // Double-precision Fused FP MAC 335 InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 336 InstrStage<19, [A8_NPipe], 0>, 337 InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, 338 // 339 // Single-precision FP DIV 340 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 341 InstrStage<20, [A8_NPipe], 0>, 342 InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>, 343 // 344 // Double-precision FP DIV 345 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 346 InstrStage<29, [A8_NPipe], 0>, 347 InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>, 348 // 349 // Single-precision FP SQRT 350 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 351 InstrStage<19, [A8_NPipe], 0>, 352 InstrStage<19, [A8_NLSPipe]>], [19, 1]>, 353 // 354 // Double-precision FP SQRT 355 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 356 InstrStage<29, [A8_NPipe], 0>, 357 InstrStage<29, [A8_NLSPipe]>], [29, 1]>, 358 359 // 360 // Integer to Single-precision Move 361 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 362 InstrStage<1, [A8_NPipe]>], 363 [2, 1]>, 364 // 365 // Integer to Double-precision Move 366 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 367 InstrStage<1, [A8_NPipe]>], 368 [2, 1, 1]>, 369 // 370 // Single-precision to Integer Move 371 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 372 InstrStage<1, [A8_NPipe]>], 373 [20, 1]>, 374 // 375 // Double-precision to Integer Move 376 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 377 InstrStage<1, [A8_NPipe]>], 378 [20, 20, 1]>, 379 380 // 381 // Single-precision FP Load 382 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 383 InstrStage<1, [A8_NLSPipe], 0>, 384 InstrStage<1, [A8_LSPipe]>], 385 [2, 1]>, 386 // 387 // Double-precision FP Load 388 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 389 InstrStage<1, [A8_NLSPipe], 0>, 390 InstrStage<1, [A8_LSPipe]>], 391 [2, 1]>, 392 // 393 // FP Load Multiple 394 // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. 395 InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 396 InstrStage<1, [A8_NLSPipe], 0>, 397 InstrStage<1, [A8_LSPipe]>, 398 InstrStage<1, [A8_NLSPipe], 0>, 399 InstrStage<1, [A8_LSPipe]>], 400 [1, 1, 1, 2], [], -1>, // dynamic uops 401 // 402 // FP Load Multiple + update 403 InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 404 InstrStage<1, [A8_NLSPipe], 0>, 405 InstrStage<1, [A8_LSPipe]>, 406 InstrStage<1, [A8_NLSPipe], 0>, 407 InstrStage<1, [A8_LSPipe]>], 408 [2, 1, 1, 1, 2], [], -1>, // dynamic uops 409 // 410 // Single-precision FP Store 411 InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 412 InstrStage<1, [A8_NLSPipe], 0>, 413 InstrStage<1, [A8_LSPipe]>], 414 [1, 1]>, 415 // 416 // Double-precision FP Store 417 InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 418 InstrStage<1, [A8_NLSPipe], 0>, 419 InstrStage<1, [A8_LSPipe]>], 420 [1, 1]>, 421 // 422 // FP Store Multiple 423 InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 424 InstrStage<1, [A8_NLSPipe], 0>, 425 InstrStage<1, [A8_LSPipe]>, 426 InstrStage<1, [A8_NLSPipe], 0>, 427 InstrStage<1, [A8_LSPipe]>], 428 [1, 1, 1, 1], [], -1>, // dynamic uops 429 // 430 // FP Store Multiple + update 431 InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 432 InstrStage<1, [A8_NLSPipe], 0>, 433 InstrStage<1, [A8_LSPipe]>, 434 InstrStage<1, [A8_NLSPipe], 0>, 435 InstrStage<1, [A8_LSPipe]>], 436 [2, 1, 1, 1, 1], [], -1>, // dynamic uops 437 // NEON 438 // Issue through integer pipeline, and execute in NEON unit. 439 // 440 // VLD1 441 InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 442 InstrStage<2, [A8_NLSPipe], 0>, 443 InstrStage<2, [A8_LSPipe]>], 444 [2, 1]>, 445 // VLD1x2 446 InstrItinData<IIC_VLD1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 447 InstrStage<2, [A8_NLSPipe], 0>, 448 InstrStage<2, [A8_LSPipe]>], 449 [2, 2, 1]>, 450 // 451 // VLD1x3 452 InstrItinData<IIC_VLD1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 453 InstrStage<3, [A8_NLSPipe], 0>, 454 InstrStage<3, [A8_LSPipe]>], 455 [2, 2, 3, 1]>, 456 // 457 // VLD1x4 458 InstrItinData<IIC_VLD1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 459 InstrStage<3, [A8_NLSPipe], 0>, 460 InstrStage<3, [A8_LSPipe]>], 461 [2, 2, 3, 3, 1]>, 462 // 463 // VLD1u 464 InstrItinData<IIC_VLD1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 465 InstrStage<2, [A8_NLSPipe], 0>, 466 InstrStage<2, [A8_LSPipe]>], 467 [2, 2, 1]>, 468 // 469 // VLD1x2u 470 InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 471 InstrStage<2, [A8_NLSPipe], 0>, 472 InstrStage<2, [A8_LSPipe]>], 473 [2, 2, 2, 1]>, 474 // 475 // VLD1x3u 476 InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 477 InstrStage<3, [A8_NLSPipe], 0>, 478 InstrStage<3, [A8_LSPipe]>], 479 [2, 2, 3, 2, 1]>, 480 // 481 // VLD1x4u 482 InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 483 InstrStage<3, [A8_NLSPipe], 0>, 484 InstrStage<3, [A8_LSPipe]>], 485 [2, 2, 3, 3, 2, 1]>, 486 // 487 // VLD1ln 488 InstrItinData<IIC_VLD1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 489 InstrStage<3, [A8_NLSPipe], 0>, 490 InstrStage<3, [A8_LSPipe]>], 491 [3, 1, 1, 1]>, 492 // 493 // VLD1lnu 494 InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 495 InstrStage<3, [A8_NLSPipe], 0>, 496 InstrStage<3, [A8_LSPipe]>], 497 [3, 2, 1, 1, 1, 1]>, 498 // 499 // VLD1dup 500 InstrItinData<IIC_VLD1dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 501 InstrStage<2, [A8_NLSPipe], 0>, 502 InstrStage<2, [A8_LSPipe]>], 503 [2, 1]>, 504 // 505 // VLD1dupu 506 InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 507 InstrStage<2, [A8_NLSPipe], 0>, 508 InstrStage<2, [A8_LSPipe]>], 509 [2, 2, 1, 1]>, 510 // 511 // VLD2 512 InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 513 InstrStage<2, [A8_NLSPipe], 0>, 514 InstrStage<2, [A8_LSPipe]>], 515 [2, 2, 1]>, 516 // 517 // VLD2x2 518 InstrItinData<IIC_VLD2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 519 InstrStage<3, [A8_NLSPipe], 0>, 520 InstrStage<3, [A8_LSPipe]>], 521 [2, 2, 3, 3, 1]>, 522 // 523 // VLD2ln 524 InstrItinData<IIC_VLD2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 525 InstrStage<3, [A8_NLSPipe], 0>, 526 InstrStage<3, [A8_LSPipe]>], 527 [3, 3, 1, 1, 1, 1]>, 528 // 529 // VLD2u 530 InstrItinData<IIC_VLD2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 531 InstrStage<2, [A8_NLSPipe], 0>, 532 InstrStage<2, [A8_LSPipe]>], 533 [2, 2, 2, 1, 1, 1]>, 534 // 535 // VLD2x2u 536 InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 537 InstrStage<3, [A8_NLSPipe], 0>, 538 InstrStage<3, [A8_LSPipe]>], 539 [2, 2, 3, 3, 2, 1]>, 540 // 541 // VLD2lnu 542 InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 543 InstrStage<3, [A8_NLSPipe], 0>, 544 InstrStage<3, [A8_LSPipe]>], 545 [3, 3, 2, 1, 1, 1, 1, 1]>, 546 // 547 // VLD2dup 548 InstrItinData<IIC_VLD2dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 549 InstrStage<2, [A8_NLSPipe], 0>, 550 InstrStage<2, [A8_LSPipe]>], 551 [2, 2, 1]>, 552 // 553 // VLD2dupu 554 InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 555 InstrStage<2, [A8_NLSPipe], 0>, 556 InstrStage<2, [A8_LSPipe]>], 557 [2, 2, 2, 1, 1]>, 558 // 559 // VLD3 560 InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 561 InstrStage<4, [A8_NLSPipe], 0>, 562 InstrStage<4, [A8_LSPipe]>], 563 [3, 3, 4, 1]>, 564 // 565 // VLD3ln 566 InstrItinData<IIC_VLD3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 567 InstrStage<5, [A8_NLSPipe], 0>, 568 InstrStage<5, [A8_LSPipe]>], 569 [4, 4, 5, 1, 1, 1, 1, 2]>, 570 // 571 // VLD3u 572 InstrItinData<IIC_VLD3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 573 InstrStage<4, [A8_NLSPipe], 0>, 574 InstrStage<4, [A8_LSPipe]>], 575 [3, 3, 4, 2, 1]>, 576 // 577 // VLD3lnu 578 InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 579 InstrStage<5, [A8_NLSPipe], 0>, 580 InstrStage<5, [A8_LSPipe]>], 581 [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>, 582 // 583 // VLD3dup 584 InstrItinData<IIC_VLD3dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 585 InstrStage<3, [A8_NLSPipe], 0>, 586 InstrStage<3, [A8_LSPipe]>], 587 [2, 2, 3, 1]>, 588 // 589 // VLD3dupu 590 InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 591 InstrStage<3, [A8_NLSPipe], 0>, 592 InstrStage<3, [A8_LSPipe]>], 593 [2, 2, 3, 2, 1, 1]>, 594 // 595 // VLD4 596 InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 597 InstrStage<4, [A8_NLSPipe], 0>, 598 InstrStage<4, [A8_LSPipe]>], 599 [3, 3, 4, 4, 1]>, 600 // 601 // VLD4ln 602 InstrItinData<IIC_VLD4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 603 InstrStage<5, [A8_NLSPipe], 0>, 604 InstrStage<5, [A8_LSPipe]>], 605 [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>, 606 // 607 // VLD4u 608 InstrItinData<IIC_VLD4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 609 InstrStage<4, [A8_NLSPipe], 0>, 610 InstrStage<4, [A8_LSPipe]>], 611 [3, 3, 4, 4, 2, 1]>, 612 // 613 // VLD4lnu 614 InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 615 InstrStage<5, [A8_NLSPipe], 0>, 616 InstrStage<5, [A8_LSPipe]>], 617 [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>, 618 // 619 // VLD4dup 620 InstrItinData<IIC_VLD4dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 621 InstrStage<3, [A8_NLSPipe], 0>, 622 InstrStage<3, [A8_LSPipe]>], 623 [2, 2, 3, 3, 1]>, 624 // 625 // VLD4dupu 626 InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 627 InstrStage<3, [A8_NLSPipe], 0>, 628 InstrStage<3, [A8_LSPipe]>], 629 [2, 2, 3, 3, 2, 1, 1]>, 630 // 631 // VST1 632 InstrItinData<IIC_VST1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 633 InstrStage<2, [A8_NLSPipe], 0>, 634 InstrStage<2, [A8_LSPipe]>], 635 [1, 1, 1]>, 636 // 637 // VST1x2 638 InstrItinData<IIC_VST1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 639 InstrStage<2, [A8_NLSPipe], 0>, 640 InstrStage<2, [A8_LSPipe]>], 641 [1, 1, 1, 1]>, 642 // 643 // VST1x3 644 InstrItinData<IIC_VST1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 645 InstrStage<3, [A8_NLSPipe], 0>, 646 InstrStage<3, [A8_LSPipe]>], 647 [1, 1, 1, 1, 2]>, 648 // 649 // VST1x4 650 InstrItinData<IIC_VST1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 651 InstrStage<3, [A8_NLSPipe], 0>, 652 InstrStage<3, [A8_LSPipe]>], 653 [1, 1, 1, 1, 2, 2]>, 654 // 655 // VST1u 656 InstrItinData<IIC_VST1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 657 InstrStage<2, [A8_NLSPipe], 0>, 658 InstrStage<2, [A8_LSPipe]>], 659 [2, 1, 1, 1, 1]>, 660 // 661 // VST1x2u 662 InstrItinData<IIC_VST1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 663 InstrStage<2, [A8_NLSPipe], 0>, 664 InstrStage<2, [A8_LSPipe]>], 665 [2, 1, 1, 1, 1, 1]>, 666 // 667 // VST1x3u 668 InstrItinData<IIC_VST1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 669 InstrStage<3, [A8_NLSPipe], 0>, 670 InstrStage<3, [A8_LSPipe]>], 671 [2, 1, 1, 1, 1, 1, 2]>, 672 // 673 // VST1x4u 674 InstrItinData<IIC_VST1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 675 InstrStage<3, [A8_NLSPipe], 0>, 676 InstrStage<3, [A8_LSPipe]>], 677 [2, 1, 1, 1, 1, 1, 2, 2]>, 678 // 679 // VST1ln 680 InstrItinData<IIC_VST1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 681 InstrStage<2, [A8_NLSPipe], 0>, 682 InstrStage<2, [A8_LSPipe]>], 683 [1, 1, 1]>, 684 // 685 // VST1lnu 686 InstrItinData<IIC_VST1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 687 InstrStage<2, [A8_NLSPipe], 0>, 688 InstrStage<2, [A8_LSPipe]>], 689 [2, 1, 1, 1, 1]>, 690 // 691 // VST2 692 InstrItinData<IIC_VST2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 693 InstrStage<2, [A8_NLSPipe], 0>, 694 InstrStage<2, [A8_LSPipe]>], 695 [1, 1, 1, 1]>, 696 // 697 // VST2x2 698 InstrItinData<IIC_VST2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 699 InstrStage<4, [A8_NLSPipe], 0>, 700 InstrStage<4, [A8_LSPipe]>], 701 [1, 1, 1, 1, 2, 2]>, 702 // 703 // VST2u 704 InstrItinData<IIC_VST2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 705 InstrStage<2, [A8_NLSPipe], 0>, 706 InstrStage<2, [A8_LSPipe]>], 707 [2, 1, 1, 1, 1, 1]>, 708 // 709 // VST2x2u 710 InstrItinData<IIC_VST2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 711 InstrStage<4, [A8_NLSPipe], 0>, 712 InstrStage<4, [A8_LSPipe]>], 713 [2, 1, 1, 1, 1, 1, 2, 2]>, 714 // 715 // VST2ln 716 InstrItinData<IIC_VST2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 717 InstrStage<2, [A8_NLSPipe], 0>, 718 InstrStage<2, [A8_LSPipe]>], 719 [1, 1, 1, 1]>, 720 // 721 // VST2lnu 722 InstrItinData<IIC_VST2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 723 InstrStage<2, [A8_NLSPipe], 0>, 724 InstrStage<2, [A8_LSPipe]>], 725 [2, 1, 1, 1, 1, 1]>, 726 // 727 // VST3 728 InstrItinData<IIC_VST3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 729 InstrStage<3, [A8_NLSPipe], 0>, 730 InstrStage<3, [A8_LSPipe]>], 731 [1, 1, 1, 1, 2]>, 732 // 733 // VST3u 734 InstrItinData<IIC_VST3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 735 InstrStage<3, [A8_NLSPipe], 0>, 736 InstrStage<3, [A8_LSPipe]>], 737 [2, 1, 1, 1, 1, 1, 2]>, 738 // 739 // VST3ln 740 InstrItinData<IIC_VST3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 741 InstrStage<3, [A8_NLSPipe], 0>, 742 InstrStage<3, [A8_LSPipe]>], 743 [1, 1, 1, 1, 2]>, 744 // 745 // VST3lnu 746 InstrItinData<IIC_VST3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 747 InstrStage<3, [A8_NLSPipe], 0>, 748 InstrStage<3, [A8_LSPipe]>], 749 [2, 1, 1, 1, 1, 1, 2]>, 750 // 751 // VST4 752 InstrItinData<IIC_VST4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 753 InstrStage<4, [A8_NLSPipe], 0>, 754 InstrStage<4, [A8_LSPipe]>], 755 [1, 1, 1, 1, 2, 2]>, 756 // 757 // VST4u 758 InstrItinData<IIC_VST4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 759 InstrStage<4, [A8_NLSPipe], 0>, 760 InstrStage<4, [A8_LSPipe]>], 761 [2, 1, 1, 1, 1, 1, 2, 2]>, 762 // 763 // VST4ln 764 InstrItinData<IIC_VST4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 765 InstrStage<4, [A8_NLSPipe], 0>, 766 InstrStage<4, [A8_LSPipe]>], 767 [1, 1, 1, 1, 2, 2]>, 768 // 769 // VST4lnu 770 InstrItinData<IIC_VST4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 771 InstrStage<4, [A8_NLSPipe], 0>, 772 InstrStage<4, [A8_LSPipe]>], 773 [2, 1, 1, 1, 1, 1, 2, 2]>, 774 // 775 // Double-register FP Unary 776 InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 777 InstrStage<1, [A8_NPipe]>], [5, 2]>, 778 // 779 // Quad-register FP Unary 780 // Result written in N5, but that is relative to the last cycle of multicycle, 781 // so we use 6 for those cases 782 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 783 InstrStage<2, [A8_NPipe]>], [6, 2]>, 784 // 785 // Double-register FP Binary 786 InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 787 InstrStage<1, [A8_NPipe]>], [5, 2, 2]>, 788 // 789 // VPADD, etc. 790 InstrItinData<IIC_VPBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 791 InstrStage<1, [A8_NPipe]>], [5, 2, 2]>, 792 // 793 // Double-register FP VMUL 794 InstrItinData<IIC_VFMULD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 795 InstrStage<1, [A8_NPipe]>], [5, 2, 1]>, 796 797 // 798 // Quad-register FP Binary 799 // Result written in N5, but that is relative to the last cycle of multicycle, 800 // so we use 6 for those cases 801 InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 802 InstrStage<2, [A8_NPipe]>], [6, 2, 2]>, 803 // 804 // Quad-register FP VMUL 805 InstrItinData<IIC_VFMULQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 806 InstrStage<1, [A8_NPipe]>], [6, 2, 1]>, 807 // 808 // Move 809 InstrItinData<IIC_VMOV, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 810 InstrStage<1, [A8_NPipe]>], [1, 1]>, 811 // 812 // Move Immediate 813 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 814 InstrStage<1, [A8_NPipe]>], [3]>, 815 // 816 // Double-register Permute Move 817 InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 818 InstrStage<1, [A8_NLSPipe]>], [2, 1]>, 819 // 820 // Quad-register Permute Move 821 // Result written in N2, but that is relative to the last cycle of multicycle, 822 // so we use 3 for those cases 823 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 824 InstrStage<2, [A8_NLSPipe]>], [3, 1]>, 825 // 826 // Integer to Single-precision Move 827 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 828 InstrStage<1, [A8_NLSPipe]>], [2, 1]>, 829 // 830 // Integer to Double-precision Move 831 InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 832 InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>, 833 // 834 // Single-precision to Integer Move 835 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 836 InstrStage<1, [A8_NLSPipe]>], [20, 1]>, 837 // 838 // Double-precision to Integer Move 839 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 840 InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>, 841 // 842 // Integer to Lane Move 843 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 844 InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, 845 // 846 // Vector narrow move 847 InstrItinData<IIC_VMOVN , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 848 InstrStage<1, [A8_NPipe]>], [2, 1]>, 849 // 850 // Double-register Permute 851 InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 852 InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>, 853 // 854 // Quad-register Permute 855 // Result written in N2, but that is relative to the last cycle of multicycle, 856 // so we use 3 for those cases 857 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 858 InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>, 859 // 860 // Quad-register Permute (3 cycle issue) 861 // Result written in N2, but that is relative to the last cycle of multicycle, 862 // so we use 4 for those cases 863 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 864 InstrStage<1, [A8_NLSPipe]>, 865 InstrStage<1, [A8_NPipe], 0>, 866 InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>, 867 // 868 // Double-register FP Multiple-Accumulate 869 InstrItinData<IIC_VMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 870 InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>, 871 // 872 // Quad-register FP Multiple-Accumulate 873 // Result written in N9, but that is relative to the last cycle of multicycle, 874 // so we use 10 for those cases 875 InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 876 InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, 877 // 878 // Double-register Fused FP Multiple-Accumulate 879 InstrItinData<IIC_VFMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 880 InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>, 881 // 882 // Quad-register Fused FP Multiple-Accumulate 883 // Result written in N9, but that is relative to the last cycle of multicycle, 884 // so we use 10 for those cases 885 InstrItinData<IIC_VFMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 886 InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, 887 // 888 // Double-register Reciprical Step 889 InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 890 InstrStage<1, [A8_NPipe]>], [9, 2, 2]>, 891 // 892 // Quad-register Reciprical Step 893 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 894 InstrStage<2, [A8_NPipe]>], [10, 2, 2]>, 895 // 896 // Double-register Integer Count 897 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 898 InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, 899 // 900 // Quad-register Integer Count 901 // Result written in N3, but that is relative to the last cycle of multicycle, 902 // so we use 4 for those cases 903 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 904 InstrStage<2, [A8_NPipe]>], [4, 2, 2]>, 905 // 906 // Double-register Integer Unary 907 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 908 InstrStage<1, [A8_NPipe]>], [4, 2]>, 909 // 910 // Quad-register Integer Unary 911 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 912 InstrStage<1, [A8_NPipe]>], [4, 2]>, 913 // 914 // Double-register Integer Q-Unary 915 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 916 InstrStage<1, [A8_NPipe]>], [4, 1]>, 917 // 918 // Quad-register Integer CountQ-Unary 919 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 920 InstrStage<1, [A8_NPipe]>], [4, 1]>, 921 // 922 // Double-register Integer Binary 923 InstrItinData<IIC_VBINiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 924 InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, 925 // 926 // Quad-register Integer Binary 927 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 928 InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, 929 // 930 // Double-register Integer Binary (4 cycle) 931 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 932 InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, 933 // 934 // Quad-register Integer Binary (4 cycle) 935 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 936 InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, 937 938 // 939 // Double-register Integer Subtract 940 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 941 InstrStage<1, [A8_NPipe]>], [3, 2, 1]>, 942 // 943 // Quad-register Integer Subtract 944 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 945 InstrStage<1, [A8_NPipe]>], [3, 2, 1]>, 946 // 947 // Double-register Integer Subtract 948 InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 949 InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, 950 // 951 // Quad-register Integer Subtract 952 InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 953 InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, 954 // 955 // Double-register Integer Shift 956 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 957 InstrStage<1, [A8_NPipe]>], [3, 1, 1]>, 958 // 959 // Quad-register Integer Shift 960 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 961 InstrStage<2, [A8_NPipe]>], [4, 1, 1]>, 962 // 963 // Double-register Integer Shift (4 cycle) 964 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 965 InstrStage<1, [A8_NPipe]>], [4, 1, 1]>, 966 // 967 // Quad-register Integer Shift (4 cycle) 968 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 969 InstrStage<2, [A8_NPipe]>], [5, 1, 1]>, 970 // 971 // Double-register Integer Pair Add Long 972 InstrItinData<IIC_VPALiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 973 InstrStage<1, [A8_NPipe]>], [6, 3, 1]>, 974 // 975 // Quad-register Integer Pair Add Long 976 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 977 InstrStage<2, [A8_NPipe]>], [7, 3, 1]>, 978 // 979 // Double-register Absolute Difference and Accumulate 980 InstrItinData<IIC_VABAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 981 InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>, 982 // 983 // Quad-register Absolute Difference and Accumulate 984 InstrItinData<IIC_VABAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 985 InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>, 986 987 // 988 // Double-register Integer Multiply (.8, .16) 989 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 990 InstrStage<1, [A8_NPipe]>], [6, 2, 2]>, 991 // 992 // Double-register Integer Multiply (.32) 993 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 994 InstrStage<2, [A8_NPipe]>], [7, 2, 1]>, 995 // 996 // Quad-register Integer Multiply (.8, .16) 997 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 998 InstrStage<2, [A8_NPipe]>], [7, 2, 2]>, 999 // 1000 // Quad-register Integer Multiply (.32) 1001 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1002 InstrStage<1, [A8_NPipe]>, 1003 InstrStage<2, [A8_NLSPipe], 0>, 1004 InstrStage<3, [A8_NPipe]>], [9, 2, 1]>, 1005 // 1006 // Double-register Integer Multiply-Accumulate (.8, .16) 1007 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1008 InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>, 1009 // 1010 // Double-register Integer Multiply-Accumulate (.32) 1011 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1012 InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>, 1013 // 1014 // Quad-register Integer Multiply-Accumulate (.8, .16) 1015 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1016 InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>, 1017 // 1018 // Quad-register Integer Multiply-Accumulate (.32) 1019 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1020 InstrStage<1, [A8_NPipe]>, 1021 InstrStage<2, [A8_NLSPipe], 0>, 1022 InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>, 1023 // 1024 // Double-register VEXT 1025 InstrItinData<IIC_VEXTD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1026 InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>, 1027 // 1028 // Quad-register VEXT 1029 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1030 InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, 1031 // 1032 // VTB 1033 InstrItinData<IIC_VTB1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1034 InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>, 1035 InstrItinData<IIC_VTB2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1036 InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>, 1037 InstrItinData<IIC_VTB3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1038 InstrStage<1, [A8_NLSPipe]>, 1039 InstrStage<1, [A8_NPipe], 0>, 1040 InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>, 1041 InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1042 InstrStage<1, [A8_NLSPipe]>, 1043 InstrStage<1, [A8_NPipe], 0>, 1044 InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>, 1045 // 1046 // VTBX 1047 InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1048 InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>, 1049 InstrItinData<IIC_VTBX2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1050 InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>, 1051 InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1052 InstrStage<1, [A8_NLSPipe]>, 1053 InstrStage<1, [A8_NPipe], 0>, 1054 InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>, 1055 InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1056 InstrStage<1, [A8_NLSPipe]>, 1057 InstrStage<1, [A8_NPipe], 0>, 1058 InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> 1059 ]>; 1060 1061 // ===---------------------------------------------------------------------===// 1062 // This following definitions describe the simple machine model which 1063 // will replace itineraries. 1064 1065 // Cortex-A8 machine model for scheduling and other instruction cost heuristics. 1066 def CortexA8Model : SchedMachineModel { 1067 let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. 1068 let LoadLatency = 2; // Optimistic load latency assuming bypass. 1069 // This is overriden by OperandCycles if the 1070 // Itineraries are queried instead. 1071 let MispredictPenalty = 13; // Based on estimate of pipeline depth. 1072 let CompleteModel = 0; 1073 1074 let Itineraries = CortexA8Itineraries; 1075 } 1076