1 //=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the itinerary class data for the ARM Cortex A8 processors. 11 // 12 //===----------------------------------------------------------------------===// 13 14 // 15 // Scheduling information derived from "Cortex-A8 Technical Reference Manual". 16 // Functional Units. 17 def A8_Pipe0 : FuncUnit; // pipeline 0 18 def A8_Pipe1 : FuncUnit; // pipeline 1 19 def A8_LSPipe : FuncUnit; // Load / store pipeline 20 def A8_NPipe : FuncUnit; // NEON ALU/MUL pipe 21 def A8_NLSPipe : FuncUnit; // NEON LS pipe 22 // 23 // Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1 24 // 25 def CortexA8Itineraries : ProcessorItineraries< 26 [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe], 27 [], [ 28 // Two fully-pipelined integer ALU pipelines 29 // 30 // No operand cycles 31 InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>, 32 // 33 // Binary Instructions that produce a result 34 InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 35 InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>, 36 InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, 37 InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>, 38 InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>, 39 // 40 // Bitwise Instructions that produce a result 41 InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 42 InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>, 43 InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, 44 InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>, 45 // 46 // Unary Instructions that produce a result 47 InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 48 InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, 49 // 50 // Zero and sign extension instructions 51 InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, 52 InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, 53 InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>, 54 // 55 // Compare instructions 56 InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, 57 InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 58 InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, 59 InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, 60 // 61 // Test instructions 62 InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, 63 InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 64 InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, 65 InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, 66 // 67 // Move instructions, unconditional 68 InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>, 69 InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, 70 InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, 71 InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>, 72 InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 73 InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, 74 InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 75 InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 76 InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>, 77 InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 78 InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 79 InstrStage<1, [A8_LSPipe]>], [5]>, 80 // 81 // Move instructions, conditional 82 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, 83 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, 84 InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, 85 InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, 86 InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 87 InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>, 88 // 89 // MVN instructions 90 InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>, 91 InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, 92 InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, 93 InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>, 94 95 // Integer multiply pipeline 96 // Result written in E5, but that is relative to the last cycle of multicycle, 97 // so we use 6 for those cases 98 // 99 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>, 100 InstrItinData<IIC_iMAC16 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, 101 InstrItinData<IIC_iMUL32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>, 102 InstrItinData<IIC_iMAC32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, 103 InstrItinData<IIC_iMUL64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, 104 InstrItinData<IIC_iMAC64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, 105 106 // Integer load pipeline 107 // 108 // Immediate offset 109 InstrItinData<IIC_iLoad_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 110 InstrStage<1, [A8_LSPipe]>], [3, 1]>, 111 InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 112 InstrStage<1, [A8_LSPipe]>], [3, 1]>, 113 InstrItinData<IIC_iLoad_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 114 InstrStage<1, [A8_LSPipe]>], [3, 1]>, 115 // 116 // Register offset 117 InstrItinData<IIC_iLoad_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 118 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 119 InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 120 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 121 InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 122 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 123 // 124 // Scaled register offset, issues over 2 cycles 125 // FIXME: lsl by 2 takes 1 cycle. 126 InstrItinData<IIC_iLoad_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 127 InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>, 128 InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 129 InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>, 130 // 131 // Immediate offset with update 132 InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 133 InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>, 134 InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 135 InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>, 136 // 137 // Register offset with update 138 InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 139 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>, 140 InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 141 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>, 142 InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 143 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>, 144 // 145 // Scaled register offset with update, issues over 2 cycles 146 InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 147 InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>, 148 InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 149 InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>, 150 // 151 // Load multiple, def is the 5th operand. Pipeline 0 only. 152 // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. 153 InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>, 154 InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>, 155 // 156 // Load multiple + update, defs are the 1st and 5th operands. 157 InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>, 158 InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>, 159 // 160 // Load multiple plus branch 161 InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>, 162 InstrStage<3, [A8_LSPipe]>, 163 InstrStage<1, [A8_Pipe0, A8_Pipe1]>], 164 [1, 2, 1, 1, 3]>, 165 // 166 // Pop, def is the 3rd operand. 167 InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>, 168 InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>, 169 // 170 // Push, def is the 3th operand. 171 InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>, 172 InstrStage<3, [A8_LSPipe]>, 173 InstrStage<1, [A8_Pipe0, A8_Pipe1]>], 174 [1, 1, 3]>, 175 176 // 177 // iLoadi + iALUr for t2LDRpci_pic. 178 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 179 InstrStage<1, [A8_LSPipe]>, 180 InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>, 181 182 183 // Integer store pipeline 184 // 185 // Immediate offset 186 InstrItinData<IIC_iStore_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 187 InstrStage<1, [A8_LSPipe]>], [3, 1]>, 188 InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 189 InstrStage<1, [A8_LSPipe]>], [3, 1]>, 190 InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 191 InstrStage<1, [A8_LSPipe]>], [3, 1]>, 192 // 193 // Register offset 194 InstrItinData<IIC_iStore_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 195 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 196 InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 197 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 198 InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 199 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 200 // 201 // Scaled register offset, issues over 2 cycles 202 InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 203 InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>, 204 InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 205 InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>, 206 // 207 // Immediate offset with update 208 InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 209 InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>, 210 InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 211 InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>, 212 // 213 // Register offset with update 214 InstrItinData<IIC_iStore_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 215 InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>, 216 InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 217 InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>, 218 InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 219 InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>, 220 // 221 // Scaled register offset with update, issues over 2 cycles 222 InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 223 InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>, 224 InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 225 InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>, 226 // 227 // Store multiple. Pipeline 0 only. 228 // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. 229 InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>, 230 InstrStage<2, [A8_LSPipe]>]>, 231 // 232 // Store multiple + update 233 InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>, 234 InstrStage<2, [A8_LSPipe]>], [2]>, 235 236 // 237 // Preload 238 InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 239 240 // Branch 241 // 242 // no delay slots, so the latency of a branch is unimportant 243 InstrItinData<IIC_Br , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>, 244 245 // VFP 246 // Issue through integer pipeline, and execute in NEON unit. We assume 247 // RunFast mode so that NFP pipeline is used for single-precision when 248 // possible. 249 // 250 // FP Special Register to Integer Register File Move 251 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 252 InstrStage<1, [A8_NLSPipe]>], [20]>, 253 // 254 // Single-precision FP Unary 255 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 256 InstrStage<1, [A8_NPipe]>], [7, 1]>, 257 // 258 // Double-precision FP Unary 259 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 260 InstrStage<4, [A8_NPipe], 0>, 261 InstrStage<4, [A8_NLSPipe]>], [4, 1]>, 262 // 263 // Single-precision FP Compare 264 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 265 InstrStage<1, [A8_NPipe]>], [1, 1]>, 266 // 267 // Double-precision FP Compare 268 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 269 InstrStage<4, [A8_NPipe], 0>, 270 InstrStage<4, [A8_NLSPipe]>], [4, 1]>, 271 // 272 // Single to Double FP Convert 273 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 274 InstrStage<7, [A8_NPipe], 0>, 275 InstrStage<7, [A8_NLSPipe]>], [7, 1]>, 276 // 277 // Double to Single FP Convert 278 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 279 InstrStage<5, [A8_NPipe], 0>, 280 InstrStage<5, [A8_NLSPipe]>], [5, 1]>, 281 // 282 // Single-Precision FP to Integer Convert 283 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 284 InstrStage<1, [A8_NPipe]>], [7, 1]>, 285 // 286 // Double-Precision FP to Integer Convert 287 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 288 InstrStage<8, [A8_NPipe], 0>, 289 InstrStage<8, [A8_NLSPipe]>], [8, 1]>, 290 // 291 // Integer to Single-Precision FP Convert 292 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 293 InstrStage<1, [A8_NPipe]>], [7, 1]>, 294 // 295 // Integer to Double-Precision FP Convert 296 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 297 InstrStage<8, [A8_NPipe], 0>, 298 InstrStage<8, [A8_NLSPipe]>], [8, 1]>, 299 // 300 // Single-precision FP ALU 301 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 302 InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, 303 // 304 // Double-precision FP ALU 305 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 306 InstrStage<9, [A8_NPipe], 0>, 307 InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>, 308 // 309 // Single-precision FP Multiply 310 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 311 InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, 312 // 313 // Double-precision FP Multiply 314 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 315 InstrStage<11, [A8_NPipe], 0>, 316 InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>, 317 // 318 // Single-precision FP MAC 319 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 320 InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>, 321 // 322 // Double-precision FP MAC 323 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 324 InstrStage<19, [A8_NPipe], 0>, 325 InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, 326 // 327 // Single-precision Fused FP MAC 328 InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 329 InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>, 330 // 331 // Double-precision Fused FP MAC 332 InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 333 InstrStage<19, [A8_NPipe], 0>, 334 InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, 335 // 336 // Single-precision FP DIV 337 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 338 InstrStage<20, [A8_NPipe], 0>, 339 InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>, 340 // 341 // Double-precision FP DIV 342 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 343 InstrStage<29, [A8_NPipe], 0>, 344 InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>, 345 // 346 // Single-precision FP SQRT 347 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 348 InstrStage<19, [A8_NPipe], 0>, 349 InstrStage<19, [A8_NLSPipe]>], [19, 1]>, 350 // 351 // Double-precision FP SQRT 352 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 353 InstrStage<29, [A8_NPipe], 0>, 354 InstrStage<29, [A8_NLSPipe]>], [29, 1]>, 355 356 // 357 // Integer to Single-precision Move 358 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 359 InstrStage<1, [A8_NPipe]>], 360 [2, 1]>, 361 // 362 // Integer to Double-precision Move 363 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 364 InstrStage<1, [A8_NPipe]>], 365 [2, 1, 1]>, 366 // 367 // Single-precision to Integer Move 368 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 369 InstrStage<1, [A8_NPipe]>], 370 [20, 1]>, 371 // 372 // Double-precision to Integer Move 373 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 374 InstrStage<1, [A8_NPipe]>], 375 [20, 20, 1]>, 376 377 // 378 // Single-precision FP Load 379 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 380 InstrStage<1, [A8_NLSPipe], 0>, 381 InstrStage<1, [A8_LSPipe]>], 382 [2, 1]>, 383 // 384 // Double-precision FP Load 385 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 386 InstrStage<1, [A8_NLSPipe], 0>, 387 InstrStage<1, [A8_LSPipe]>], 388 [2, 1]>, 389 // 390 // FP Load Multiple 391 // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. 392 InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 393 InstrStage<1, [A8_NLSPipe], 0>, 394 InstrStage<1, [A8_LSPipe]>, 395 InstrStage<1, [A8_NLSPipe], 0>, 396 InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>, 397 // 398 // FP Load Multiple + update 399 InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 400 InstrStage<1, [A8_NLSPipe], 0>, 401 InstrStage<1, [A8_LSPipe]>, 402 InstrStage<1, [A8_NLSPipe], 0>, 403 InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>, 404 // 405 // Single-precision FP Store 406 InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 407 InstrStage<1, [A8_NLSPipe], 0>, 408 InstrStage<1, [A8_LSPipe]>], 409 [1, 1]>, 410 // 411 // Double-precision FP Store 412 InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 413 InstrStage<1, [A8_NLSPipe], 0>, 414 InstrStage<1, [A8_LSPipe]>], 415 [1, 1]>, 416 // 417 // FP Store Multiple 418 InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 419 InstrStage<1, [A8_NLSPipe], 0>, 420 InstrStage<1, [A8_LSPipe]>, 421 InstrStage<1, [A8_NLSPipe], 0>, 422 InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>, 423 // 424 // FP Store Multiple + update 425 InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 426 InstrStage<1, [A8_NLSPipe], 0>, 427 InstrStage<1, [A8_LSPipe]>, 428 InstrStage<1, [A8_NLSPipe], 0>, 429 InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>, 430 431 // NEON 432 // Issue through integer pipeline, and execute in NEON unit. 433 // 434 // VLD1 435 InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 436 InstrStage<2, [A8_NLSPipe], 0>, 437 InstrStage<2, [A8_LSPipe]>], 438 [2, 1]>, 439 // VLD1x2 440 InstrItinData<IIC_VLD1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 441 InstrStage<2, [A8_NLSPipe], 0>, 442 InstrStage<2, [A8_LSPipe]>], 443 [2, 2, 1]>, 444 // 445 // VLD1x3 446 InstrItinData<IIC_VLD1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 447 InstrStage<3, [A8_NLSPipe], 0>, 448 InstrStage<3, [A8_LSPipe]>], 449 [2, 2, 3, 1]>, 450 // 451 // VLD1x4 452 InstrItinData<IIC_VLD1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 453 InstrStage<3, [A8_NLSPipe], 0>, 454 InstrStage<3, [A8_LSPipe]>], 455 [2, 2, 3, 3, 1]>, 456 // 457 // VLD1u 458 InstrItinData<IIC_VLD1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 459 InstrStage<2, [A8_NLSPipe], 0>, 460 InstrStage<2, [A8_LSPipe]>], 461 [2, 2, 1]>, 462 // 463 // VLD1x2u 464 InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 465 InstrStage<2, [A8_NLSPipe], 0>, 466 InstrStage<2, [A8_LSPipe]>], 467 [2, 2, 2, 1]>, 468 // 469 // VLD1x3u 470 InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 471 InstrStage<3, [A8_NLSPipe], 0>, 472 InstrStage<3, [A8_LSPipe]>], 473 [2, 2, 3, 2, 1]>, 474 // 475 // VLD1x4u 476 InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 477 InstrStage<3, [A8_NLSPipe], 0>, 478 InstrStage<3, [A8_LSPipe]>], 479 [2, 2, 3, 3, 2, 1]>, 480 // 481 // VLD1ln 482 InstrItinData<IIC_VLD1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 483 InstrStage<3, [A8_NLSPipe], 0>, 484 InstrStage<3, [A8_LSPipe]>], 485 [3, 1, 1, 1]>, 486 // 487 // VLD1lnu 488 InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 489 InstrStage<3, [A8_NLSPipe], 0>, 490 InstrStage<3, [A8_LSPipe]>], 491 [3, 2, 1, 1, 1, 1]>, 492 // 493 // VLD1dup 494 InstrItinData<IIC_VLD1dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 495 InstrStage<2, [A8_NLSPipe], 0>, 496 InstrStage<2, [A8_LSPipe]>], 497 [2, 1]>, 498 // 499 // VLD1dupu 500 InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 501 InstrStage<2, [A8_NLSPipe], 0>, 502 InstrStage<2, [A8_LSPipe]>], 503 [2, 2, 1, 1]>, 504 // 505 // VLD2 506 InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 507 InstrStage<2, [A8_NLSPipe], 0>, 508 InstrStage<2, [A8_LSPipe]>], 509 [2, 2, 1]>, 510 // 511 // VLD2x2 512 InstrItinData<IIC_VLD2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 513 InstrStage<3, [A8_NLSPipe], 0>, 514 InstrStage<3, [A8_LSPipe]>], 515 [2, 2, 3, 3, 1]>, 516 // 517 // VLD2ln 518 InstrItinData<IIC_VLD2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 519 InstrStage<3, [A8_NLSPipe], 0>, 520 InstrStage<3, [A8_LSPipe]>], 521 [3, 3, 1, 1, 1, 1]>, 522 // 523 // VLD2u 524 InstrItinData<IIC_VLD2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 525 InstrStage<2, [A8_NLSPipe], 0>, 526 InstrStage<2, [A8_LSPipe]>], 527 [2, 2, 2, 1, 1, 1]>, 528 // 529 // VLD2x2u 530 InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 531 InstrStage<3, [A8_NLSPipe], 0>, 532 InstrStage<3, [A8_LSPipe]>], 533 [2, 2, 3, 3, 2, 1]>, 534 // 535 // VLD2lnu 536 InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 537 InstrStage<3, [A8_NLSPipe], 0>, 538 InstrStage<3, [A8_LSPipe]>], 539 [3, 3, 2, 1, 1, 1, 1, 1]>, 540 // 541 // VLD2dup 542 InstrItinData<IIC_VLD2dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 543 InstrStage<2, [A8_NLSPipe], 0>, 544 InstrStage<2, [A8_LSPipe]>], 545 [2, 2, 1]>, 546 // 547 // VLD2dupu 548 InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 549 InstrStage<2, [A8_NLSPipe], 0>, 550 InstrStage<2, [A8_LSPipe]>], 551 [2, 2, 2, 1, 1]>, 552 // 553 // VLD3 554 InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 555 InstrStage<4, [A8_NLSPipe], 0>, 556 InstrStage<4, [A8_LSPipe]>], 557 [3, 3, 4, 1]>, 558 // 559 // VLD3ln 560 InstrItinData<IIC_VLD3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 561 InstrStage<5, [A8_NLSPipe], 0>, 562 InstrStage<5, [A8_LSPipe]>], 563 [4, 4, 5, 1, 1, 1, 1, 2]>, 564 // 565 // VLD3u 566 InstrItinData<IIC_VLD3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 567 InstrStage<4, [A8_NLSPipe], 0>, 568 InstrStage<4, [A8_LSPipe]>], 569 [3, 3, 4, 2, 1]>, 570 // 571 // VLD3lnu 572 InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 573 InstrStage<5, [A8_NLSPipe], 0>, 574 InstrStage<5, [A8_LSPipe]>], 575 [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>, 576 // 577 // VLD3dup 578 InstrItinData<IIC_VLD3dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 579 InstrStage<3, [A8_NLSPipe], 0>, 580 InstrStage<3, [A8_LSPipe]>], 581 [2, 2, 3, 1]>, 582 // 583 // VLD3dupu 584 InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 585 InstrStage<3, [A8_NLSPipe], 0>, 586 InstrStage<3, [A8_LSPipe]>], 587 [2, 2, 3, 2, 1, 1]>, 588 // 589 // VLD4 590 InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 591 InstrStage<4, [A8_NLSPipe], 0>, 592 InstrStage<4, [A8_LSPipe]>], 593 [3, 3, 4, 4, 1]>, 594 // 595 // VLD4ln 596 InstrItinData<IIC_VLD4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 597 InstrStage<5, [A8_NLSPipe], 0>, 598 InstrStage<5, [A8_LSPipe]>], 599 [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>, 600 // 601 // VLD4u 602 InstrItinData<IIC_VLD4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 603 InstrStage<4, [A8_NLSPipe], 0>, 604 InstrStage<4, [A8_LSPipe]>], 605 [3, 3, 4, 4, 2, 1]>, 606 // 607 // VLD4lnu 608 InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 609 InstrStage<5, [A8_NLSPipe], 0>, 610 InstrStage<5, [A8_LSPipe]>], 611 [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>, 612 // 613 // VLD4dup 614 InstrItinData<IIC_VLD4dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 615 InstrStage<3, [A8_NLSPipe], 0>, 616 InstrStage<3, [A8_LSPipe]>], 617 [2, 2, 3, 3, 1]>, 618 // 619 // VLD4dupu 620 InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 621 InstrStage<3, [A8_NLSPipe], 0>, 622 InstrStage<3, [A8_LSPipe]>], 623 [2, 2, 3, 3, 2, 1, 1]>, 624 // 625 // VST1 626 InstrItinData<IIC_VST1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 627 InstrStage<2, [A8_NLSPipe], 0>, 628 InstrStage<2, [A8_LSPipe]>], 629 [1, 1, 1]>, 630 // 631 // VST1x2 632 InstrItinData<IIC_VST1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 633 InstrStage<2, [A8_NLSPipe], 0>, 634 InstrStage<2, [A8_LSPipe]>], 635 [1, 1, 1, 1]>, 636 // 637 // VST1x3 638 InstrItinData<IIC_VST1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 639 InstrStage<3, [A8_NLSPipe], 0>, 640 InstrStage<3, [A8_LSPipe]>], 641 [1, 1, 1, 1, 2]>, 642 // 643 // VST1x4 644 InstrItinData<IIC_VST1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 645 InstrStage<3, [A8_NLSPipe], 0>, 646 InstrStage<3, [A8_LSPipe]>], 647 [1, 1, 1, 1, 2, 2]>, 648 // 649 // VST1u 650 InstrItinData<IIC_VST1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 651 InstrStage<2, [A8_NLSPipe], 0>, 652 InstrStage<2, [A8_LSPipe]>], 653 [2, 1, 1, 1, 1]>, 654 // 655 // VST1x2u 656 InstrItinData<IIC_VST1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 657 InstrStage<2, [A8_NLSPipe], 0>, 658 InstrStage<2, [A8_LSPipe]>], 659 [2, 1, 1, 1, 1, 1]>, 660 // 661 // VST1x3u 662 InstrItinData<IIC_VST1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 663 InstrStage<3, [A8_NLSPipe], 0>, 664 InstrStage<3, [A8_LSPipe]>], 665 [2, 1, 1, 1, 1, 1, 2]>, 666 // 667 // VST1x4u 668 InstrItinData<IIC_VST1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 669 InstrStage<3, [A8_NLSPipe], 0>, 670 InstrStage<3, [A8_LSPipe]>], 671 [2, 1, 1, 1, 1, 1, 2, 2]>, 672 // 673 // VST1ln 674 InstrItinData<IIC_VST1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 675 InstrStage<2, [A8_NLSPipe], 0>, 676 InstrStage<2, [A8_LSPipe]>], 677 [1, 1, 1]>, 678 // 679 // VST1lnu 680 InstrItinData<IIC_VST1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 681 InstrStage<2, [A8_NLSPipe], 0>, 682 InstrStage<2, [A8_LSPipe]>], 683 [2, 1, 1, 1, 1]>, 684 // 685 // VST2 686 InstrItinData<IIC_VST2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 687 InstrStage<2, [A8_NLSPipe], 0>, 688 InstrStage<2, [A8_LSPipe]>], 689 [1, 1, 1, 1]>, 690 // 691 // VST2x2 692 InstrItinData<IIC_VST2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 693 InstrStage<4, [A8_NLSPipe], 0>, 694 InstrStage<4, [A8_LSPipe]>], 695 [1, 1, 1, 1, 2, 2]>, 696 // 697 // VST2u 698 InstrItinData<IIC_VST2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 699 InstrStage<2, [A8_NLSPipe], 0>, 700 InstrStage<2, [A8_LSPipe]>], 701 [2, 1, 1, 1, 1, 1]>, 702 // 703 // VST2x2u 704 InstrItinData<IIC_VST2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 705 InstrStage<4, [A8_NLSPipe], 0>, 706 InstrStage<4, [A8_LSPipe]>], 707 [2, 1, 1, 1, 1, 1, 2, 2]>, 708 // 709 // VST2ln 710 InstrItinData<IIC_VST2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 711 InstrStage<2, [A8_NLSPipe], 0>, 712 InstrStage<2, [A8_LSPipe]>], 713 [1, 1, 1, 1]>, 714 // 715 // VST2lnu 716 InstrItinData<IIC_VST2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 717 InstrStage<2, [A8_NLSPipe], 0>, 718 InstrStage<2, [A8_LSPipe]>], 719 [2, 1, 1, 1, 1, 1]>, 720 // 721 // VST3 722 InstrItinData<IIC_VST3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 723 InstrStage<3, [A8_NLSPipe], 0>, 724 InstrStage<3, [A8_LSPipe]>], 725 [1, 1, 1, 1, 2]>, 726 // 727 // VST3u 728 InstrItinData<IIC_VST3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 729 InstrStage<3, [A8_NLSPipe], 0>, 730 InstrStage<3, [A8_LSPipe]>], 731 [2, 1, 1, 1, 1, 1, 2]>, 732 // 733 // VST3ln 734 InstrItinData<IIC_VST3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 735 InstrStage<3, [A8_NLSPipe], 0>, 736 InstrStage<3, [A8_LSPipe]>], 737 [1, 1, 1, 1, 2]>, 738 // 739 // VST3lnu 740 InstrItinData<IIC_VST3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 741 InstrStage<3, [A8_NLSPipe], 0>, 742 InstrStage<3, [A8_LSPipe]>], 743 [2, 1, 1, 1, 1, 1, 2]>, 744 // 745 // VST4 746 InstrItinData<IIC_VST4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 747 InstrStage<4, [A8_NLSPipe], 0>, 748 InstrStage<4, [A8_LSPipe]>], 749 [1, 1, 1, 1, 2, 2]>, 750 // 751 // VST4u 752 InstrItinData<IIC_VST4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 753 InstrStage<4, [A8_NLSPipe], 0>, 754 InstrStage<4, [A8_LSPipe]>], 755 [2, 1, 1, 1, 1, 1, 2, 2]>, 756 // 757 // VST4ln 758 InstrItinData<IIC_VST4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 759 InstrStage<4, [A8_NLSPipe], 0>, 760 InstrStage<4, [A8_LSPipe]>], 761 [1, 1, 1, 1, 2, 2]>, 762 // 763 // VST4lnu 764 InstrItinData<IIC_VST4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 765 InstrStage<4, [A8_NLSPipe], 0>, 766 InstrStage<4, [A8_LSPipe]>], 767 [2, 1, 1, 1, 1, 1, 2, 2]>, 768 // 769 // Double-register FP Unary 770 InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 771 InstrStage<1, [A8_NPipe]>], [5, 2]>, 772 // 773 // Quad-register FP Unary 774 // Result written in N5, but that is relative to the last cycle of multicycle, 775 // so we use 6 for those cases 776 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 777 InstrStage<2, [A8_NPipe]>], [6, 2]>, 778 // 779 // Double-register FP Binary 780 InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 781 InstrStage<1, [A8_NPipe]>], [5, 2, 2]>, 782 // 783 // VPADD, etc. 784 InstrItinData<IIC_VPBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 785 InstrStage<1, [A8_NPipe]>], [5, 2, 2]>, 786 // 787 // Double-register FP VMUL 788 InstrItinData<IIC_VFMULD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 789 InstrStage<1, [A8_NPipe]>], [5, 2, 1]>, 790 791 // 792 // Quad-register FP Binary 793 // Result written in N5, but that is relative to the last cycle of multicycle, 794 // so we use 6 for those cases 795 InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 796 InstrStage<2, [A8_NPipe]>], [6, 2, 2]>, 797 // 798 // Quad-register FP VMUL 799 InstrItinData<IIC_VFMULQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 800 InstrStage<1, [A8_NPipe]>], [6, 2, 1]>, 801 // 802 // Move 803 InstrItinData<IIC_VMOV, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 804 InstrStage<1, [A8_NPipe]>], [1, 1]>, 805 // 806 // Move Immediate 807 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 808 InstrStage<1, [A8_NPipe]>], [3]>, 809 // 810 // Double-register Permute Move 811 InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 812 InstrStage<1, [A8_NLSPipe]>], [2, 1]>, 813 // 814 // Quad-register Permute Move 815 // Result written in N2, but that is relative to the last cycle of multicycle, 816 // so we use 3 for those cases 817 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 818 InstrStage<2, [A8_NLSPipe]>], [3, 1]>, 819 // 820 // Integer to Single-precision Move 821 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 822 InstrStage<1, [A8_NLSPipe]>], [2, 1]>, 823 // 824 // Integer to Double-precision Move 825 InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 826 InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>, 827 // 828 // Single-precision to Integer Move 829 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 830 InstrStage<1, [A8_NLSPipe]>], [20, 1]>, 831 // 832 // Double-precision to Integer Move 833 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 834 InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>, 835 // 836 // Integer to Lane Move 837 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 838 InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, 839 // 840 // Vector narrow move 841 InstrItinData<IIC_VMOVN , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 842 InstrStage<1, [A8_NPipe]>], [2, 1]>, 843 // 844 // Double-register Permute 845 InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 846 InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>, 847 // 848 // Quad-register Permute 849 // Result written in N2, but that is relative to the last cycle of multicycle, 850 // so we use 3 for those cases 851 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 852 InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>, 853 // 854 // Quad-register Permute (3 cycle issue) 855 // Result written in N2, but that is relative to the last cycle of multicycle, 856 // so we use 4 for those cases 857 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 858 InstrStage<1, [A8_NLSPipe]>, 859 InstrStage<1, [A8_NPipe], 0>, 860 InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>, 861 // 862 // Double-register FP Multiple-Accumulate 863 InstrItinData<IIC_VMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 864 InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>, 865 // 866 // Quad-register FP Multiple-Accumulate 867 // Result written in N9, but that is relative to the last cycle of multicycle, 868 // so we use 10 for those cases 869 InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 870 InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, 871 // 872 // Double-register Fused FP Multiple-Accumulate 873 InstrItinData<IIC_VFMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 874 InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>, 875 // 876 // Quad-register Fused FP Multiple-Accumulate 877 // Result written in N9, but that is relative to the last cycle of multicycle, 878 // so we use 10 for those cases 879 InstrItinData<IIC_VFMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 880 InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, 881 // 882 // Double-register Reciprical Step 883 InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 884 InstrStage<1, [A8_NPipe]>], [9, 2, 2]>, 885 // 886 // Quad-register Reciprical Step 887 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 888 InstrStage<2, [A8_NPipe]>], [10, 2, 2]>, 889 // 890 // Double-register Integer Count 891 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 892 InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, 893 // 894 // Quad-register Integer Count 895 // Result written in N3, but that is relative to the last cycle of multicycle, 896 // so we use 4 for those cases 897 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 898 InstrStage<2, [A8_NPipe]>], [4, 2, 2]>, 899 // 900 // Double-register Integer Unary 901 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 902 InstrStage<1, [A8_NPipe]>], [4, 2]>, 903 // 904 // Quad-register Integer Unary 905 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 906 InstrStage<1, [A8_NPipe]>], [4, 2]>, 907 // 908 // Double-register Integer Q-Unary 909 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 910 InstrStage<1, [A8_NPipe]>], [4, 1]>, 911 // 912 // Quad-register Integer CountQ-Unary 913 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 914 InstrStage<1, [A8_NPipe]>], [4, 1]>, 915 // 916 // Double-register Integer Binary 917 InstrItinData<IIC_VBINiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 918 InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, 919 // 920 // Quad-register Integer Binary 921 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 922 InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, 923 // 924 // Double-register Integer Binary (4 cycle) 925 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 926 InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, 927 // 928 // Quad-register Integer Binary (4 cycle) 929 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 930 InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, 931 932 // 933 // Double-register Integer Subtract 934 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 935 InstrStage<1, [A8_NPipe]>], [3, 2, 1]>, 936 // 937 // Quad-register Integer Subtract 938 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 939 InstrStage<1, [A8_NPipe]>], [3, 2, 1]>, 940 // 941 // Double-register Integer Subtract 942 InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 943 InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, 944 // 945 // Quad-register Integer Subtract 946 InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 947 InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, 948 // 949 // Double-register Integer Shift 950 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 951 InstrStage<1, [A8_NPipe]>], [3, 1, 1]>, 952 // 953 // Quad-register Integer Shift 954 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 955 InstrStage<2, [A8_NPipe]>], [4, 1, 1]>, 956 // 957 // Double-register Integer Shift (4 cycle) 958 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 959 InstrStage<1, [A8_NPipe]>], [4, 1, 1]>, 960 // 961 // Quad-register Integer Shift (4 cycle) 962 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 963 InstrStage<2, [A8_NPipe]>], [5, 1, 1]>, 964 // 965 // Double-register Integer Pair Add Long 966 InstrItinData<IIC_VPALiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 967 InstrStage<1, [A8_NPipe]>], [6, 3, 1]>, 968 // 969 // Quad-register Integer Pair Add Long 970 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 971 InstrStage<2, [A8_NPipe]>], [7, 3, 1]>, 972 // 973 // Double-register Absolute Difference and Accumulate 974 InstrItinData<IIC_VABAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 975 InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>, 976 // 977 // Quad-register Absolute Difference and Accumulate 978 InstrItinData<IIC_VABAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 979 InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>, 980 981 // 982 // Double-register Integer Multiply (.8, .16) 983 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 984 InstrStage<1, [A8_NPipe]>], [6, 2, 2]>, 985 // 986 // Double-register Integer Multiply (.32) 987 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 988 InstrStage<2, [A8_NPipe]>], [7, 2, 1]>, 989 // 990 // Quad-register Integer Multiply (.8, .16) 991 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 992 InstrStage<2, [A8_NPipe]>], [7, 2, 2]>, 993 // 994 // Quad-register Integer Multiply (.32) 995 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 996 InstrStage<1, [A8_NPipe]>, 997 InstrStage<2, [A8_NLSPipe], 0>, 998 InstrStage<3, [A8_NPipe]>], [9, 2, 1]>, 999 // 1000 // Double-register Integer Multiply-Accumulate (.8, .16) 1001 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1002 InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>, 1003 // 1004 // Double-register Integer Multiply-Accumulate (.32) 1005 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1006 InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>, 1007 // 1008 // Quad-register Integer Multiply-Accumulate (.8, .16) 1009 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1010 InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>, 1011 // 1012 // Quad-register Integer Multiply-Accumulate (.32) 1013 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1014 InstrStage<1, [A8_NPipe]>, 1015 InstrStage<2, [A8_NLSPipe], 0>, 1016 InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>, 1017 // 1018 // Double-register VEXT 1019 InstrItinData<IIC_VEXTD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1020 InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>, 1021 // 1022 // Quad-register VEXT 1023 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1024 InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, 1025 // 1026 // VTB 1027 InstrItinData<IIC_VTB1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1028 InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>, 1029 InstrItinData<IIC_VTB2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1030 InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>, 1031 InstrItinData<IIC_VTB3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1032 InstrStage<1, [A8_NLSPipe]>, 1033 InstrStage<1, [A8_NPipe], 0>, 1034 InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>, 1035 InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1036 InstrStage<1, [A8_NLSPipe]>, 1037 InstrStage<1, [A8_NPipe], 0>, 1038 InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>, 1039 // 1040 // VTBX 1041 InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1042 InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>, 1043 InstrItinData<IIC_VTBX2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1044 InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>, 1045 InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1046 InstrStage<1, [A8_NLSPipe]>, 1047 InstrStage<1, [A8_NPipe], 0>, 1048 InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>, 1049 InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1050 InstrStage<1, [A8_NLSPipe]>, 1051 InstrStage<1, [A8_NPipe], 0>, 1052 InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> 1053 ]>; 1054