1 //===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file describes the ARM NEON instruction set. 11 // 12 //===----------------------------------------------------------------------===// 13 14 15 //===----------------------------------------------------------------------===// 16 // NEON-specific Operands. 17 //===----------------------------------------------------------------------===// 18 def nModImm : Operand<i32> { 19 let PrintMethod = "printNEONModImmOperand"; 20 } 21 22 def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 23 def nImmSplatI8 : Operand<i32> { 24 let PrintMethod = "printNEONModImmOperand"; 25 let ParserMatchClass = nImmSplatI8AsmOperand; 26 } 27 def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 28 def nImmSplatI16 : Operand<i32> { 29 let PrintMethod = "printNEONModImmOperand"; 30 let ParserMatchClass = nImmSplatI16AsmOperand; 31 } 32 def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 33 def nImmSplatI32 : Operand<i32> { 34 let PrintMethod = "printNEONModImmOperand"; 35 let ParserMatchClass = nImmSplatI32AsmOperand; 36 } 37 def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 38 def nImmVMOVI32 : Operand<i32> { 39 let PrintMethod = "printNEONModImmOperand"; 40 let ParserMatchClass = nImmVMOVI32AsmOperand; 41 } 42 def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 43 def nImmVMOVI32Neg : Operand<i32> { 44 let PrintMethod = "printNEONModImmOperand"; 45 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 46 } 47 def nImmVMOVF32 : Operand<i32> { 48 let PrintMethod = "printFPImmOperand"; 49 let ParserMatchClass = FPImmOperand; 50 } 51 def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 52 def nImmSplatI64 : Operand<i32> { 53 let PrintMethod = "printNEONModImmOperand"; 54 let ParserMatchClass = nImmSplatI64AsmOperand; 55 } 56 57 def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 58 def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 59 def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 60 def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 61 return ((uint64_t)Imm) < 8; 62 }]> { 63 let ParserMatchClass = VectorIndex8Operand; 64 let PrintMethod = "printVectorIndex"; 65 let MIOperandInfo = (ops i32imm); 66 } 67 def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 68 return ((uint64_t)Imm) < 4; 69 }]> { 70 let ParserMatchClass = VectorIndex16Operand; 71 let PrintMethod = "printVectorIndex"; 72 let MIOperandInfo = (ops i32imm); 73 } 74 def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 75 return ((uint64_t)Imm) < 2; 76 }]> { 77 let ParserMatchClass = VectorIndex32Operand; 78 let PrintMethod = "printVectorIndex"; 79 let MIOperandInfo = (ops i32imm); 80 } 81 82 // Register list of one D register. 83 def VecListOneDAsmOperand : AsmOperandClass { 84 let Name = "VecListOneD"; 85 let ParserMethod = "parseVectorList"; 86 let RenderMethod = "addVecListOperands"; 87 } 88 def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 89 let ParserMatchClass = VecListOneDAsmOperand; 90 } 91 // Register list of two sequential D registers. 92 def VecListDPairAsmOperand : AsmOperandClass { 93 let Name = "VecListDPair"; 94 let ParserMethod = "parseVectorList"; 95 let RenderMethod = "addVecListOperands"; 96 } 97 def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 98 let ParserMatchClass = VecListDPairAsmOperand; 99 } 100 // Register list of three sequential D registers. 101 def VecListThreeDAsmOperand : AsmOperandClass { 102 let Name = "VecListThreeD"; 103 let ParserMethod = "parseVectorList"; 104 let RenderMethod = "addVecListOperands"; 105 } 106 def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 107 let ParserMatchClass = VecListThreeDAsmOperand; 108 } 109 // Register list of four sequential D registers. 110 def VecListFourDAsmOperand : AsmOperandClass { 111 let Name = "VecListFourD"; 112 let ParserMethod = "parseVectorList"; 113 let RenderMethod = "addVecListOperands"; 114 } 115 def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 116 let ParserMatchClass = VecListFourDAsmOperand; 117 } 118 // Register list of two D registers spaced by 2 (two sequential Q registers). 119 def VecListDPairSpacedAsmOperand : AsmOperandClass { 120 let Name = "VecListDPairSpaced"; 121 let ParserMethod = "parseVectorList"; 122 let RenderMethod = "addVecListOperands"; 123 } 124 def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 125 let ParserMatchClass = VecListDPairSpacedAsmOperand; 126 } 127 // Register list of three D registers spaced by 2 (three Q registers). 128 def VecListThreeQAsmOperand : AsmOperandClass { 129 let Name = "VecListThreeQ"; 130 let ParserMethod = "parseVectorList"; 131 let RenderMethod = "addVecListOperands"; 132 } 133 def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 134 let ParserMatchClass = VecListThreeQAsmOperand; 135 } 136 // Register list of three D registers spaced by 2 (three Q registers). 137 def VecListFourQAsmOperand : AsmOperandClass { 138 let Name = "VecListFourQ"; 139 let ParserMethod = "parseVectorList"; 140 let RenderMethod = "addVecListOperands"; 141 } 142 def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 143 let ParserMatchClass = VecListFourQAsmOperand; 144 } 145 146 // Register list of one D register, with "all lanes" subscripting. 147 def VecListOneDAllLanesAsmOperand : AsmOperandClass { 148 let Name = "VecListOneDAllLanes"; 149 let ParserMethod = "parseVectorList"; 150 let RenderMethod = "addVecListOperands"; 151 } 152 def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 153 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 154 } 155 // Register list of two D registers, with "all lanes" subscripting. 156 def VecListDPairAllLanesAsmOperand : AsmOperandClass { 157 let Name = "VecListDPairAllLanes"; 158 let ParserMethod = "parseVectorList"; 159 let RenderMethod = "addVecListOperands"; 160 } 161 def VecListDPairAllLanes : RegisterOperand<DPair, 162 "printVectorListTwoAllLanes"> { 163 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 164 } 165 // Register list of two D registers spaced by 2 (two sequential Q registers). 166 def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 167 let Name = "VecListDPairSpacedAllLanes"; 168 let ParserMethod = "parseVectorList"; 169 let RenderMethod = "addVecListOperands"; 170 } 171 def VecListDPairSpacedAllLanes : RegisterOperand<DPair, 172 "printVectorListTwoSpacedAllLanes"> { 173 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 174 } 175 // Register list of three D registers, with "all lanes" subscripting. 176 def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 177 let Name = "VecListThreeDAllLanes"; 178 let ParserMethod = "parseVectorList"; 179 let RenderMethod = "addVecListOperands"; 180 } 181 def VecListThreeDAllLanes : RegisterOperand<DPR, 182 "printVectorListThreeAllLanes"> { 183 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 184 } 185 // Register list of three D registers spaced by 2 (three sequential Q regs). 186 def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 187 let Name = "VecListThreeQAllLanes"; 188 let ParserMethod = "parseVectorList"; 189 let RenderMethod = "addVecListOperands"; 190 } 191 def VecListThreeQAllLanes : RegisterOperand<DPR, 192 "printVectorListThreeSpacedAllLanes"> { 193 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 194 } 195 // Register list of four D registers, with "all lanes" subscripting. 196 def VecListFourDAllLanesAsmOperand : AsmOperandClass { 197 let Name = "VecListFourDAllLanes"; 198 let ParserMethod = "parseVectorList"; 199 let RenderMethod = "addVecListOperands"; 200 } 201 def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 202 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 203 } 204 // Register list of four D registers spaced by 2 (four sequential Q regs). 205 def VecListFourQAllLanesAsmOperand : AsmOperandClass { 206 let Name = "VecListFourQAllLanes"; 207 let ParserMethod = "parseVectorList"; 208 let RenderMethod = "addVecListOperands"; 209 } 210 def VecListFourQAllLanes : RegisterOperand<DPR, 211 "printVectorListFourSpacedAllLanes"> { 212 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 213 } 214 215 216 // Register list of one D register, with byte lane subscripting. 217 def VecListOneDByteIndexAsmOperand : AsmOperandClass { 218 let Name = "VecListOneDByteIndexed"; 219 let ParserMethod = "parseVectorList"; 220 let RenderMethod = "addVecListIndexedOperands"; 221 } 222 def VecListOneDByteIndexed : Operand<i32> { 223 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 224 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 225 } 226 // ...with half-word lane subscripting. 227 def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 228 let Name = "VecListOneDHWordIndexed"; 229 let ParserMethod = "parseVectorList"; 230 let RenderMethod = "addVecListIndexedOperands"; 231 } 232 def VecListOneDHWordIndexed : Operand<i32> { 233 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 234 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 235 } 236 // ...with word lane subscripting. 237 def VecListOneDWordIndexAsmOperand : AsmOperandClass { 238 let Name = "VecListOneDWordIndexed"; 239 let ParserMethod = "parseVectorList"; 240 let RenderMethod = "addVecListIndexedOperands"; 241 } 242 def VecListOneDWordIndexed : Operand<i32> { 243 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 244 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 245 } 246 247 // Register list of two D registers with byte lane subscripting. 248 def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 249 let Name = "VecListTwoDByteIndexed"; 250 let ParserMethod = "parseVectorList"; 251 let RenderMethod = "addVecListIndexedOperands"; 252 } 253 def VecListTwoDByteIndexed : Operand<i32> { 254 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 255 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 256 } 257 // ...with half-word lane subscripting. 258 def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 259 let Name = "VecListTwoDHWordIndexed"; 260 let ParserMethod = "parseVectorList"; 261 let RenderMethod = "addVecListIndexedOperands"; 262 } 263 def VecListTwoDHWordIndexed : Operand<i32> { 264 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 265 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 266 } 267 // ...with word lane subscripting. 268 def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 269 let Name = "VecListTwoDWordIndexed"; 270 let ParserMethod = "parseVectorList"; 271 let RenderMethod = "addVecListIndexedOperands"; 272 } 273 def VecListTwoDWordIndexed : Operand<i32> { 274 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 275 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 276 } 277 // Register list of two Q registers with half-word lane subscripting. 278 def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 279 let Name = "VecListTwoQHWordIndexed"; 280 let ParserMethod = "parseVectorList"; 281 let RenderMethod = "addVecListIndexedOperands"; 282 } 283 def VecListTwoQHWordIndexed : Operand<i32> { 284 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 285 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 286 } 287 // ...with word lane subscripting. 288 def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 289 let Name = "VecListTwoQWordIndexed"; 290 let ParserMethod = "parseVectorList"; 291 let RenderMethod = "addVecListIndexedOperands"; 292 } 293 def VecListTwoQWordIndexed : Operand<i32> { 294 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 296 } 297 298 299 // Register list of three D registers with byte lane subscripting. 300 def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 301 let Name = "VecListThreeDByteIndexed"; 302 let ParserMethod = "parseVectorList"; 303 let RenderMethod = "addVecListIndexedOperands"; 304 } 305 def VecListThreeDByteIndexed : Operand<i32> { 306 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 307 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 308 } 309 // ...with half-word lane subscripting. 310 def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 311 let Name = "VecListThreeDHWordIndexed"; 312 let ParserMethod = "parseVectorList"; 313 let RenderMethod = "addVecListIndexedOperands"; 314 } 315 def VecListThreeDHWordIndexed : Operand<i32> { 316 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 317 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 318 } 319 // ...with word lane subscripting. 320 def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 321 let Name = "VecListThreeDWordIndexed"; 322 let ParserMethod = "parseVectorList"; 323 let RenderMethod = "addVecListIndexedOperands"; 324 } 325 def VecListThreeDWordIndexed : Operand<i32> { 326 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 327 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 328 } 329 // Register list of three Q registers with half-word lane subscripting. 330 def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 331 let Name = "VecListThreeQHWordIndexed"; 332 let ParserMethod = "parseVectorList"; 333 let RenderMethod = "addVecListIndexedOperands"; 334 } 335 def VecListThreeQHWordIndexed : Operand<i32> { 336 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 337 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 338 } 339 // ...with word lane subscripting. 340 def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 341 let Name = "VecListThreeQWordIndexed"; 342 let ParserMethod = "parseVectorList"; 343 let RenderMethod = "addVecListIndexedOperands"; 344 } 345 def VecListThreeQWordIndexed : Operand<i32> { 346 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 347 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 348 } 349 350 // Register list of four D registers with byte lane subscripting. 351 def VecListFourDByteIndexAsmOperand : AsmOperandClass { 352 let Name = "VecListFourDByteIndexed"; 353 let ParserMethod = "parseVectorList"; 354 let RenderMethod = "addVecListIndexedOperands"; 355 } 356 def VecListFourDByteIndexed : Operand<i32> { 357 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 358 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 359 } 360 // ...with half-word lane subscripting. 361 def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 362 let Name = "VecListFourDHWordIndexed"; 363 let ParserMethod = "parseVectorList"; 364 let RenderMethod = "addVecListIndexedOperands"; 365 } 366 def VecListFourDHWordIndexed : Operand<i32> { 367 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 368 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 369 } 370 // ...with word lane subscripting. 371 def VecListFourDWordIndexAsmOperand : AsmOperandClass { 372 let Name = "VecListFourDWordIndexed"; 373 let ParserMethod = "parseVectorList"; 374 let RenderMethod = "addVecListIndexedOperands"; 375 } 376 def VecListFourDWordIndexed : Operand<i32> { 377 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 378 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 379 } 380 // Register list of four Q registers with half-word lane subscripting. 381 def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 382 let Name = "VecListFourQHWordIndexed"; 383 let ParserMethod = "parseVectorList"; 384 let RenderMethod = "addVecListIndexedOperands"; 385 } 386 def VecListFourQHWordIndexed : Operand<i32> { 387 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 388 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 389 } 390 // ...with word lane subscripting. 391 def VecListFourQWordIndexAsmOperand : AsmOperandClass { 392 let Name = "VecListFourQWordIndexed"; 393 let ParserMethod = "parseVectorList"; 394 let RenderMethod = "addVecListIndexedOperands"; 395 } 396 def VecListFourQWordIndexed : Operand<i32> { 397 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 399 } 400 401 402 //===----------------------------------------------------------------------===// 403 // NEON-specific DAG Nodes. 404 //===----------------------------------------------------------------------===// 405 406 def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 407 def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; 408 409 def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 410 def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; 411 def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 412 def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; 413 def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; 414 def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 415 def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 416 def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; 417 def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; 418 def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 419 def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 420 421 // Types for vector shift by immediates. The "SHX" version is for long and 422 // narrow operations where the source and destination vectors have different 423 // types. The "SHINS" version is for shift and insert operations. 424 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 425 SDTCisVT<2, i32>]>; 426 def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 427 SDTCisVT<2, i32>]>; 428 def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 429 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 430 431 def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 432 def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 433 def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 434 def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 435 def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 436 def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 437 def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 438 439 def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 440 def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 441 def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 442 443 def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 444 def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 445 def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 446 def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 447 def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 448 def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 449 450 def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 451 def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 452 def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 453 454 def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 455 def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 456 457 def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 458 SDTCisVT<2, i32>]>; 459 def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 460 def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 461 462 def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 463 def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; 464 def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; 465 def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; 466 467 def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 468 SDTCisVT<2, i32>]>; 469 def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; 470 def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; 471 472 def NEONvbsl : SDNode<"ARMISD::VBSL", 473 SDTypeProfile<1, 3, [SDTCisVec<0>, 474 SDTCisSameAs<0, 1>, 475 SDTCisSameAs<0, 2>, 476 SDTCisSameAs<0, 3>]>>; 477 478 def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 479 480 // VDUPLANE can produce a quad-register result from a double-register source, 481 // so the result is not constrained to match the source. 482 def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 483 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 484 SDTCisVT<2, i32>]>>; 485 486 def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 487 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 488 def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 489 490 def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 491 def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 492 def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 493 def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 494 495 def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 496 SDTCisSameAs<0, 2>, 497 SDTCisSameAs<0, 3>]>; 498 def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 499 def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 500 def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 501 502 def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 503 SDTCisSameAs<1, 2>]>; 504 def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; 505 def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; 506 507 def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, 508 SDTCisSameAs<0, 2>]>; 509 def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; 510 def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; 511 512 def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 513 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 514 unsigned EltBits = 0; 515 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 516 return (EltBits == 32 && EltVal == 0); 517 }]>; 518 519 def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 520 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 521 unsigned EltBits = 0; 522 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 523 return (EltBits == 8 && EltVal == 0xff); 524 }]>; 525 526 //===----------------------------------------------------------------------===// 527 // NEON load / store instructions 528 //===----------------------------------------------------------------------===// 529 530 // Use VLDM to load a Q register as a D register pair. 531 // This is a pseudo instruction that is expanded to VLDMD after reg alloc. 532 def VLDMQIA 533 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 534 IIC_fpLoad_m, "", 535 [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; 536 537 // Use VSTM to store a Q register as a D register pair. 538 // This is a pseudo instruction that is expanded to VSTMD after reg alloc. 539 def VSTMQIA 540 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 541 IIC_fpStore_m, "", 542 [(store (v2f64 DPair:$src), GPR:$Rn)]>; 543 544 // Classes for VLD* pseudo-instructions with multi-register operands. 545 // These are expanded to real instructions after register allocation. 546 class VLDQPseudo<InstrItinClass itin> 547 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 548 class VLDQWBPseudo<InstrItinClass itin> 549 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 550 (ins addrmode6:$addr, am6offset:$offset), itin, 551 "$addr.addr = $wb">; 552 class VLDQWBfixedPseudo<InstrItinClass itin> 553 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 554 (ins addrmode6:$addr), itin, 555 "$addr.addr = $wb">; 556 class VLDQWBregisterPseudo<InstrItinClass itin> 557 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 558 (ins addrmode6:$addr, rGPR:$offset), itin, 559 "$addr.addr = $wb">; 560 561 class VLDQQPseudo<InstrItinClass itin> 562 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 563 class VLDQQWBPseudo<InstrItinClass itin> 564 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 565 (ins addrmode6:$addr, am6offset:$offset), itin, 566 "$addr.addr = $wb">; 567 class VLDQQWBfixedPseudo<InstrItinClass itin> 568 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 569 (ins addrmode6:$addr), itin, 570 "$addr.addr = $wb">; 571 class VLDQQWBregisterPseudo<InstrItinClass itin> 572 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 573 (ins addrmode6:$addr, rGPR:$offset), itin, 574 "$addr.addr = $wb">; 575 576 577 class VLDQQQQPseudo<InstrItinClass itin> 578 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 579 "$src = $dst">; 580 class VLDQQQQWBPseudo<InstrItinClass itin> 581 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 582 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 583 "$addr.addr = $wb, $src = $dst">; 584 585 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 586 587 // VLD1 : Vector Load (multiple single elements) 588 class VLD1D<bits<4> op7_4, string Dt> 589 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 590 (ins addrmode6:$Rn), IIC_VLD1, 591 "vld1", Dt, "$Vd, $Rn", "", []> { 592 let Rm = 0b1111; 593 let Inst{4} = Rn{4}; 594 let DecoderMethod = "DecodeVLDInstruction"; 595 } 596 class VLD1Q<bits<4> op7_4, string Dt> 597 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 598 (ins addrmode6:$Rn), IIC_VLD1x2, 599 "vld1", Dt, "$Vd, $Rn", "", []> { 600 let Rm = 0b1111; 601 let Inst{5-4} = Rn{5-4}; 602 let DecoderMethod = "DecodeVLDInstruction"; 603 } 604 605 def VLD1d8 : VLD1D<{0,0,0,?}, "8">; 606 def VLD1d16 : VLD1D<{0,1,0,?}, "16">; 607 def VLD1d32 : VLD1D<{1,0,0,?}, "32">; 608 def VLD1d64 : VLD1D<{1,1,0,?}, "64">; 609 610 def VLD1q8 : VLD1Q<{0,0,?,?}, "8">; 611 def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; 612 def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; 613 def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; 614 615 // ...with address register writeback: 616 multiclass VLD1DWB<bits<4> op7_4, string Dt> { 617 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 618 (ins addrmode6:$Rn), IIC_VLD1u, 619 "vld1", Dt, "$Vd, $Rn!", 620 "$Rn.addr = $wb", []> { 621 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 622 let Inst{4} = Rn{4}; 623 let DecoderMethod = "DecodeVLDInstruction"; 624 let AsmMatchConverter = "cvtVLDwbFixed"; 625 } 626 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 627 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, 628 "vld1", Dt, "$Vd, $Rn, $Rm", 629 "$Rn.addr = $wb", []> { 630 let Inst{4} = Rn{4}; 631 let DecoderMethod = "DecodeVLDInstruction"; 632 let AsmMatchConverter = "cvtVLDwbRegister"; 633 } 634 } 635 multiclass VLD1QWB<bits<4> op7_4, string Dt> { 636 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 637 (ins addrmode6:$Rn), IIC_VLD1x2u, 638 "vld1", Dt, "$Vd, $Rn!", 639 "$Rn.addr = $wb", []> { 640 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 641 let Inst{5-4} = Rn{5-4}; 642 let DecoderMethod = "DecodeVLDInstruction"; 643 let AsmMatchConverter = "cvtVLDwbFixed"; 644 } 645 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 646 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 647 "vld1", Dt, "$Vd, $Rn, $Rm", 648 "$Rn.addr = $wb", []> { 649 let Inst{5-4} = Rn{5-4}; 650 let DecoderMethod = "DecodeVLDInstruction"; 651 let AsmMatchConverter = "cvtVLDwbRegister"; 652 } 653 } 654 655 defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">; 656 defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">; 657 defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">; 658 defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">; 659 defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">; 660 defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; 661 defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; 662 defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; 663 664 // ...with 3 registers 665 class VLD1D3<bits<4> op7_4, string Dt> 666 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 667 (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, 668 "$Vd, $Rn", "", []> { 669 let Rm = 0b1111; 670 let Inst{4} = Rn{4}; 671 let DecoderMethod = "DecodeVLDInstruction"; 672 } 673 multiclass VLD1D3WB<bits<4> op7_4, string Dt> { 674 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 675 (ins addrmode6:$Rn), IIC_VLD1x2u, 676 "vld1", Dt, "$Vd, $Rn!", 677 "$Rn.addr = $wb", []> { 678 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 679 let Inst{4} = Rn{4}; 680 let DecoderMethod = "DecodeVLDInstruction"; 681 let AsmMatchConverter = "cvtVLDwbFixed"; 682 } 683 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 684 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 685 "vld1", Dt, "$Vd, $Rn, $Rm", 686 "$Rn.addr = $wb", []> { 687 let Inst{4} = Rn{4}; 688 let DecoderMethod = "DecodeVLDInstruction"; 689 let AsmMatchConverter = "cvtVLDwbRegister"; 690 } 691 } 692 693 def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; 694 def VLD1d16T : VLD1D3<{0,1,0,?}, "16">; 695 def VLD1d32T : VLD1D3<{1,0,0,?}, "32">; 696 def VLD1d64T : VLD1D3<{1,1,0,?}, "64">; 697 698 defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">; 699 defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">; 700 defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">; 701 defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">; 702 703 def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; 704 705 // ...with 4 registers 706 class VLD1D4<bits<4> op7_4, string Dt> 707 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 708 (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, 709 "$Vd, $Rn", "", []> { 710 let Rm = 0b1111; 711 let Inst{5-4} = Rn{5-4}; 712 let DecoderMethod = "DecodeVLDInstruction"; 713 } 714 multiclass VLD1D4WB<bits<4> op7_4, string Dt> { 715 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 716 (ins addrmode6:$Rn), IIC_VLD1x2u, 717 "vld1", Dt, "$Vd, $Rn!", 718 "$Rn.addr = $wb", []> { 719 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 720 let Inst{5-4} = Rn{5-4}; 721 let DecoderMethod = "DecodeVLDInstruction"; 722 let AsmMatchConverter = "cvtVLDwbFixed"; 723 } 724 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 725 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 726 "vld1", Dt, "$Vd, $Rn, $Rm", 727 "$Rn.addr = $wb", []> { 728 let Inst{5-4} = Rn{5-4}; 729 let DecoderMethod = "DecodeVLDInstruction"; 730 let AsmMatchConverter = "cvtVLDwbRegister"; 731 } 732 } 733 734 def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; 735 def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; 736 def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; 737 def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; 738 739 defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">; 740 defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">; 741 defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; 742 defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; 743 744 def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; 745 746 // VLD2 : Vector Load (multiple 2-element structures) 747 class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 748 InstrItinClass itin> 749 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 750 (ins addrmode6:$Rn), itin, 751 "vld2", Dt, "$Vd, $Rn", "", []> { 752 let Rm = 0b1111; 753 let Inst{5-4} = Rn{5-4}; 754 let DecoderMethod = "DecodeVLDInstruction"; 755 } 756 757 def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; 758 def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>; 759 def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>; 760 761 def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; 762 def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; 763 def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; 764 765 def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; 766 def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; 767 def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; 768 769 // ...with address register writeback: 770 multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 771 RegisterOperand VdTy, InstrItinClass itin> { 772 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 773 (ins addrmode6:$Rn), itin, 774 "vld2", Dt, "$Vd, $Rn!", 775 "$Rn.addr = $wb", []> { 776 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 777 let Inst{5-4} = Rn{5-4}; 778 let DecoderMethod = "DecodeVLDInstruction"; 779 let AsmMatchConverter = "cvtVLDwbFixed"; 780 } 781 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 782 (ins addrmode6:$Rn, rGPR:$Rm), itin, 783 "vld2", Dt, "$Vd, $Rn, $Rm", 784 "$Rn.addr = $wb", []> { 785 let Inst{5-4} = Rn{5-4}; 786 let DecoderMethod = "DecodeVLDInstruction"; 787 let AsmMatchConverter = "cvtVLDwbRegister"; 788 } 789 } 790 791 defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>; 792 defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>; 793 defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>; 794 795 defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; 796 defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; 797 defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; 798 799 def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 800 def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 801 def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 802 def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 803 def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 804 def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 805 806 // ...with double-spaced registers 807 def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>; 808 def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>; 809 def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>; 810 defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>; 811 defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>; 812 defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>; 813 814 // VLD3 : Vector Load (multiple 3-element structures) 815 class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 816 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 817 (ins addrmode6:$Rn), IIC_VLD3, 818 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { 819 let Rm = 0b1111; 820 let Inst{4} = Rn{4}; 821 let DecoderMethod = "DecodeVLDInstruction"; 822 } 823 824 def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 825 def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 826 def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 827 828 def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>; 829 def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>; 830 def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>; 831 832 // ...with address register writeback: 833 class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 834 : NLdSt<0, 0b10, op11_8, op7_4, 835 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 836 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 837 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 838 "$Rn.addr = $wb", []> { 839 let Inst{4} = Rn{4}; 840 let DecoderMethod = "DecodeVLDInstruction"; 841 } 842 843 def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 844 def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 845 def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 846 847 def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 848 def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 849 def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 850 851 // ...with double-spaced registers: 852 def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 853 def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 854 def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 855 def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 856 def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 857 def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 858 859 def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 860 def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 861 def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 862 863 // ...alternate versions to be allocated odd register numbers: 864 def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 865 def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 866 def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 867 868 def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 869 def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 870 def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 871 872 // VLD4 : Vector Load (multiple 4-element structures) 873 class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 874 : NLdSt<0, 0b10, op11_8, op7_4, 875 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 876 (ins addrmode6:$Rn), IIC_VLD4, 877 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { 878 let Rm = 0b1111; 879 let Inst{5-4} = Rn{5-4}; 880 let DecoderMethod = "DecodeVLDInstruction"; 881 } 882 883 def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 884 def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 885 def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 886 887 def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>; 888 def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>; 889 def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>; 890 891 // ...with address register writeback: 892 class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 893 : NLdSt<0, 0b10, op11_8, op7_4, 894 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 895 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 896 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 897 "$Rn.addr = $wb", []> { 898 let Inst{5-4} = Rn{5-4}; 899 let DecoderMethod = "DecodeVLDInstruction"; 900 } 901 902 def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 903 def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 904 def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 905 906 def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 907 def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 908 def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 909 910 // ...with double-spaced registers: 911 def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 912 def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 913 def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 914 def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 915 def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 916 def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 917 918 def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 919 def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 920 def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 921 922 // ...alternate versions to be allocated odd register numbers: 923 def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 924 def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 925 def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 926 927 def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 928 def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 929 def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 930 931 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 932 933 // Classes for VLD*LN pseudo-instructions with multi-register operands. 934 // These are expanded to real instructions after register allocation. 935 class VLDQLNPseudo<InstrItinClass itin> 936 : PseudoNLdSt<(outs QPR:$dst), 937 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 938 itin, "$src = $dst">; 939 class VLDQLNWBPseudo<InstrItinClass itin> 940 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 941 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 942 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 943 class VLDQQLNPseudo<InstrItinClass itin> 944 : PseudoNLdSt<(outs QQPR:$dst), 945 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 946 itin, "$src = $dst">; 947 class VLDQQLNWBPseudo<InstrItinClass itin> 948 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 949 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 950 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 951 class VLDQQQQLNPseudo<InstrItinClass itin> 952 : PseudoNLdSt<(outs QQQQPR:$dst), 953 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 954 itin, "$src = $dst">; 955 class VLDQQQQLNWBPseudo<InstrItinClass itin> 956 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 957 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 958 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 959 960 // VLD1LN : Vector Load (single element to one lane) 961 class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 962 PatFrag LoadOp> 963 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 964 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 965 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 966 "$src = $Vd", 967 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 968 (i32 (LoadOp addrmode6:$Rn)), 969 imm:$lane))]> { 970 let Rm = 0b1111; 971 let DecoderMethod = "DecodeVLD1LN"; 972 } 973 class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 974 PatFrag LoadOp> 975 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 976 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 977 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 978 "$src = $Vd", 979 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 980 (i32 (LoadOp addrmode6oneL32:$Rn)), 981 imm:$lane))]> { 982 let Rm = 0b1111; 983 let DecoderMethod = "DecodeVLD1LN"; 984 } 985 class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { 986 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 987 (i32 (LoadOp addrmode6:$addr)), 988 imm:$lane))]; 989 } 990 991 def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 992 let Inst{7-5} = lane{2-0}; 993 } 994 def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 995 let Inst{7-6} = lane{1-0}; 996 let Inst{5-4} = Rn{5-4}; 997 } 998 def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 999 let Inst{7} = lane{0}; 1000 let Inst{5-4} = Rn{5-4}; 1001 } 1002 1003 def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1004 def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1005 def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1006 1007 def : Pat<(vector_insert (v2f32 DPR:$src), 1008 (f32 (load addrmode6:$addr)), imm:$lane), 1009 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1010 def : Pat<(vector_insert (v4f32 QPR:$src), 1011 (f32 (load addrmode6:$addr)), imm:$lane), 1012 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1013 1014 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 1015 1016 // ...with address register writeback: 1017 class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1018 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1019 (ins addrmode6:$Rn, am6offset:$Rm, 1020 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1021 "\\{$Vd[$lane]\\}, $Rn$Rm", 1022 "$src = $Vd, $Rn.addr = $wb", []> { 1023 let DecoderMethod = "DecodeVLD1LN"; 1024 } 1025 1026 def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1027 let Inst{7-5} = lane{2-0}; 1028 } 1029 def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1030 let Inst{7-6} = lane{1-0}; 1031 let Inst{4} = Rn{4}; 1032 } 1033 def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1034 let Inst{7} = lane{0}; 1035 let Inst{5} = Rn{4}; 1036 let Inst{4} = Rn{4}; 1037 } 1038 1039 def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1040 def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1041 def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1042 1043 // VLD2LN : Vector Load (single 2-element structure to one lane) 1044 class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1045 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1046 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1047 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1048 "$src1 = $Vd, $src2 = $dst2", []> { 1049 let Rm = 0b1111; 1050 let Inst{4} = Rn{4}; 1051 let DecoderMethod = "DecodeVLD2LN"; 1052 } 1053 1054 def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1055 let Inst{7-5} = lane{2-0}; 1056 } 1057 def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1058 let Inst{7-6} = lane{1-0}; 1059 } 1060 def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1061 let Inst{7} = lane{0}; 1062 } 1063 1064 def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1065 def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1066 def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1067 1068 // ...with double-spaced registers: 1069 def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1070 let Inst{7-6} = lane{1-0}; 1071 } 1072 def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1073 let Inst{7} = lane{0}; 1074 } 1075 1076 def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1077 def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1078 1079 // ...with address register writeback: 1080 class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1081 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1082 (ins addrmode6:$Rn, am6offset:$Rm, 1083 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1084 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1085 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1086 let Inst{4} = Rn{4}; 1087 let DecoderMethod = "DecodeVLD2LN"; 1088 } 1089 1090 def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1091 let Inst{7-5} = lane{2-0}; 1092 } 1093 def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1094 let Inst{7-6} = lane{1-0}; 1095 } 1096 def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1097 let Inst{7} = lane{0}; 1098 } 1099 1100 def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1101 def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1102 def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1103 1104 def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1105 let Inst{7-6} = lane{1-0}; 1106 } 1107 def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1108 let Inst{7} = lane{0}; 1109 } 1110 1111 def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1112 def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1113 1114 // VLD3LN : Vector Load (single 3-element structure to one lane) 1115 class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1116 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1117 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1118 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1119 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1120 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { 1121 let Rm = 0b1111; 1122 let DecoderMethod = "DecodeVLD3LN"; 1123 } 1124 1125 def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1126 let Inst{7-5} = lane{2-0}; 1127 } 1128 def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1129 let Inst{7-6} = lane{1-0}; 1130 } 1131 def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1132 let Inst{7} = lane{0}; 1133 } 1134 1135 def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1136 def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1137 def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1138 1139 // ...with double-spaced registers: 1140 def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1141 let Inst{7-6} = lane{1-0}; 1142 } 1143 def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1144 let Inst{7} = lane{0}; 1145 } 1146 1147 def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1148 def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1149 1150 // ...with address register writeback: 1151 class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1152 : NLdStLn<1, 0b10, op11_8, op7_4, 1153 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1154 (ins addrmode6:$Rn, am6offset:$Rm, 1155 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1156 IIC_VLD3lnu, "vld3", Dt, 1157 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1158 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1159 []> { 1160 let DecoderMethod = "DecodeVLD3LN"; 1161 } 1162 1163 def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1164 let Inst{7-5} = lane{2-0}; 1165 } 1166 def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1167 let Inst{7-6} = lane{1-0}; 1168 } 1169 def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1170 let Inst{7} = lane{0}; 1171 } 1172 1173 def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1174 def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1175 def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1176 1177 def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1178 let Inst{7-6} = lane{1-0}; 1179 } 1180 def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1181 let Inst{7} = lane{0}; 1182 } 1183 1184 def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1185 def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1186 1187 // VLD4LN : Vector Load (single 4-element structure to one lane) 1188 class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1189 : NLdStLn<1, 0b10, op11_8, op7_4, 1190 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1191 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1192 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1193 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1194 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { 1195 let Rm = 0b1111; 1196 let Inst{4} = Rn{4}; 1197 let DecoderMethod = "DecodeVLD4LN"; 1198 } 1199 1200 def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1201 let Inst{7-5} = lane{2-0}; 1202 } 1203 def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1204 let Inst{7-6} = lane{1-0}; 1205 } 1206 def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1207 let Inst{7} = lane{0}; 1208 let Inst{5} = Rn{5}; 1209 } 1210 1211 def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1212 def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1213 def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1214 1215 // ...with double-spaced registers: 1216 def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1217 let Inst{7-6} = lane{1-0}; 1218 } 1219 def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1220 let Inst{7} = lane{0}; 1221 let Inst{5} = Rn{5}; 1222 } 1223 1224 def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1225 def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1226 1227 // ...with address register writeback: 1228 class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1229 : NLdStLn<1, 0b10, op11_8, op7_4, 1230 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1231 (ins addrmode6:$Rn, am6offset:$Rm, 1232 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1233 IIC_VLD4lnu, "vld4", Dt, 1234 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1235 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1236 []> { 1237 let Inst{4} = Rn{4}; 1238 let DecoderMethod = "DecodeVLD4LN" ; 1239 } 1240 1241 def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1242 let Inst{7-5} = lane{2-0}; 1243 } 1244 def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1245 let Inst{7-6} = lane{1-0}; 1246 } 1247 def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1248 let Inst{7} = lane{0}; 1249 let Inst{5} = Rn{5}; 1250 } 1251 1252 def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1253 def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1254 def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1255 1256 def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1257 let Inst{7-6} = lane{1-0}; 1258 } 1259 def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1260 let Inst{7} = lane{0}; 1261 let Inst{5} = Rn{5}; 1262 } 1263 1264 def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1265 def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1266 1267 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1268 1269 // VLD1DUP : Vector Load (single element to all lanes) 1270 class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> 1271 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1272 (ins addrmode6dup:$Rn), 1273 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1274 [(set VecListOneDAllLanes:$Vd, 1275 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { 1276 let Rm = 0b1111; 1277 let Inst{4} = Rn{4}; 1278 let DecoderMethod = "DecodeVLD1DupInstruction"; 1279 } 1280 def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; 1281 def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; 1282 def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; 1283 1284 def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1285 (VLD1DUPd32 addrmode6:$addr)>; 1286 1287 class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> 1288 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1289 (ins addrmode6dup:$Rn), IIC_VLD1dup, 1290 "vld1", Dt, "$Vd, $Rn", "", 1291 [(set VecListDPairAllLanes:$Vd, 1292 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { 1293 let Rm = 0b1111; 1294 let Inst{4} = Rn{4}; 1295 let DecoderMethod = "DecodeVLD1DupInstruction"; 1296 } 1297 1298 def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>; 1299 def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>; 1300 def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>; 1301 1302 def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1303 (VLD1DUPq32 addrmode6:$addr)>; 1304 1305 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 1306 // ...with address register writeback: 1307 multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { 1308 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1309 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1310 (ins addrmode6dup:$Rn), IIC_VLD1dupu, 1311 "vld1", Dt, "$Vd, $Rn!", 1312 "$Rn.addr = $wb", []> { 1313 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1314 let Inst{4} = Rn{4}; 1315 let DecoderMethod = "DecodeVLD1DupInstruction"; 1316 let AsmMatchConverter = "cvtVLDwbFixed"; 1317 } 1318 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1319 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1320 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1321 "vld1", Dt, "$Vd, $Rn, $Rm", 1322 "$Rn.addr = $wb", []> { 1323 let Inst{4} = Rn{4}; 1324 let DecoderMethod = "DecodeVLD1DupInstruction"; 1325 let AsmMatchConverter = "cvtVLDwbRegister"; 1326 } 1327 } 1328 multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { 1329 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1330 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1331 (ins addrmode6dup:$Rn), IIC_VLD1dupu, 1332 "vld1", Dt, "$Vd, $Rn!", 1333 "$Rn.addr = $wb", []> { 1334 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1335 let Inst{4} = Rn{4}; 1336 let DecoderMethod = "DecodeVLD1DupInstruction"; 1337 let AsmMatchConverter = "cvtVLDwbFixed"; 1338 } 1339 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1340 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1341 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1342 "vld1", Dt, "$Vd, $Rn, $Rm", 1343 "$Rn.addr = $wb", []> { 1344 let Inst{4} = Rn{4}; 1345 let DecoderMethod = "DecodeVLD1DupInstruction"; 1346 let AsmMatchConverter = "cvtVLDwbRegister"; 1347 } 1348 } 1349 1350 defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">; 1351 defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">; 1352 defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">; 1353 1354 defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; 1355 defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; 1356 defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; 1357 1358 // VLD2DUP : Vector Load (single 2-element structure to all lanes) 1359 class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy> 1360 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1361 (ins addrmode6dup:$Rn), IIC_VLD2dup, 1362 "vld2", Dt, "$Vd, $Rn", "", []> { 1363 let Rm = 0b1111; 1364 let Inst{4} = Rn{4}; 1365 let DecoderMethod = "DecodeVLD2DupInstruction"; 1366 } 1367 1368 def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>; 1369 def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>; 1370 def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>; 1371 1372 // ...with double-spaced registers 1373 def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>; 1374 def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; 1375 def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; 1376 1377 // ...with address register writeback: 1378 multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { 1379 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1380 (outs VdTy:$Vd, GPR:$wb), 1381 (ins addrmode6dup:$Rn), IIC_VLD2dupu, 1382 "vld2", Dt, "$Vd, $Rn!", 1383 "$Rn.addr = $wb", []> { 1384 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1385 let Inst{4} = Rn{4}; 1386 let DecoderMethod = "DecodeVLD2DupInstruction"; 1387 let AsmMatchConverter = "cvtVLDwbFixed"; 1388 } 1389 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1390 (outs VdTy:$Vd, GPR:$wb), 1391 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1392 "vld2", Dt, "$Vd, $Rn, $Rm", 1393 "$Rn.addr = $wb", []> { 1394 let Inst{4} = Rn{4}; 1395 let DecoderMethod = "DecodeVLD2DupInstruction"; 1396 let AsmMatchConverter = "cvtVLDwbRegister"; 1397 } 1398 } 1399 1400 defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>; 1401 defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>; 1402 defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>; 1403 1404 defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>; 1405 defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; 1406 defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; 1407 1408 // VLD3DUP : Vector Load (single 3-element structure to all lanes) 1409 class VLD3DUP<bits<4> op7_4, string Dt> 1410 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1411 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1412 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { 1413 let Rm = 0b1111; 1414 let Inst{4} = 0; 1415 let DecoderMethod = "DecodeVLD3DupInstruction"; 1416 } 1417 1418 def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1419 def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1420 def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1421 1422 def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1423 def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1424 def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1425 1426 // ...with double-spaced registers (not used for codegen): 1427 def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1428 def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1429 def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1430 1431 // ...with address register writeback: 1432 class VLD3DUPWB<bits<4> op7_4, string Dt> 1433 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1434 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1435 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1436 "$Rn.addr = $wb", []> { 1437 let Inst{4} = 0; 1438 let DecoderMethod = "DecodeVLD3DupInstruction"; 1439 } 1440 1441 def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; 1442 def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; 1443 def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; 1444 1445 def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">; 1446 def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">; 1447 def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">; 1448 1449 def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1450 def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1451 def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1452 1453 // VLD4DUP : Vector Load (single 4-element structure to all lanes) 1454 class VLD4DUP<bits<4> op7_4, string Dt> 1455 : NLdSt<1, 0b10, 0b1111, op7_4, 1456 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1457 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1458 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1459 let Rm = 0b1111; 1460 let Inst{4} = Rn{4}; 1461 let DecoderMethod = "DecodeVLD4DupInstruction"; 1462 } 1463 1464 def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1465 def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1466 def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1467 1468 def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1469 def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1470 def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1471 1472 // ...with double-spaced registers (not used for codegen): 1473 def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1474 def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1475 def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1476 1477 // ...with address register writeback: 1478 class VLD4DUPWB<bits<4> op7_4, string Dt> 1479 : NLdSt<1, 0b10, 0b1111, op7_4, 1480 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1481 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1482 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1483 "$Rn.addr = $wb", []> { 1484 let Inst{4} = Rn{4}; 1485 let DecoderMethod = "DecodeVLD4DupInstruction"; 1486 } 1487 1488 def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1489 def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1490 def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1491 1492 def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1493 def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1494 def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1495 1496 def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1497 def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1498 def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1499 1500 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1501 1502 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 1503 1504 // Classes for VST* pseudo-instructions with multi-register operands. 1505 // These are expanded to real instructions after register allocation. 1506 class VSTQPseudo<InstrItinClass itin> 1507 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1508 class VSTQWBPseudo<InstrItinClass itin> 1509 : PseudoNLdSt<(outs GPR:$wb), 1510 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1511 "$addr.addr = $wb">; 1512 class VSTQWBfixedPseudo<InstrItinClass itin> 1513 : PseudoNLdSt<(outs GPR:$wb), 1514 (ins addrmode6:$addr, QPR:$src), itin, 1515 "$addr.addr = $wb">; 1516 class VSTQWBregisterPseudo<InstrItinClass itin> 1517 : PseudoNLdSt<(outs GPR:$wb), 1518 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1519 "$addr.addr = $wb">; 1520 class VSTQQPseudo<InstrItinClass itin> 1521 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1522 class VSTQQWBPseudo<InstrItinClass itin> 1523 : PseudoNLdSt<(outs GPR:$wb), 1524 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1525 "$addr.addr = $wb">; 1526 class VSTQQWBfixedPseudo<InstrItinClass itin> 1527 : PseudoNLdSt<(outs GPR:$wb), 1528 (ins addrmode6:$addr, QQPR:$src), itin, 1529 "$addr.addr = $wb">; 1530 class VSTQQWBregisterPseudo<InstrItinClass itin> 1531 : PseudoNLdSt<(outs GPR:$wb), 1532 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1533 "$addr.addr = $wb">; 1534 1535 class VSTQQQQPseudo<InstrItinClass itin> 1536 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1537 class VSTQQQQWBPseudo<InstrItinClass itin> 1538 : PseudoNLdSt<(outs GPR:$wb), 1539 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1540 "$addr.addr = $wb">; 1541 1542 // VST1 : Vector Store (multiple single elements) 1543 class VST1D<bits<4> op7_4, string Dt> 1544 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd), 1545 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { 1546 let Rm = 0b1111; 1547 let Inst{4} = Rn{4}; 1548 let DecoderMethod = "DecodeVSTInstruction"; 1549 } 1550 class VST1Q<bits<4> op7_4, string Dt> 1551 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), 1552 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { 1553 let Rm = 0b1111; 1554 let Inst{5-4} = Rn{5-4}; 1555 let DecoderMethod = "DecodeVSTInstruction"; 1556 } 1557 1558 def VST1d8 : VST1D<{0,0,0,?}, "8">; 1559 def VST1d16 : VST1D<{0,1,0,?}, "16">; 1560 def VST1d32 : VST1D<{1,0,0,?}, "32">; 1561 def VST1d64 : VST1D<{1,1,0,?}, "64">; 1562 1563 def VST1q8 : VST1Q<{0,0,?,?}, "8">; 1564 def VST1q16 : VST1Q<{0,1,?,?}, "16">; 1565 def VST1q32 : VST1Q<{1,0,?,?}, "32">; 1566 def VST1q64 : VST1Q<{1,1,?,?}, "64">; 1567 1568 // ...with address register writeback: 1569 multiclass VST1DWB<bits<4> op7_4, string Dt> { 1570 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1571 (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1572 "vst1", Dt, "$Vd, $Rn!", 1573 "$Rn.addr = $wb", []> { 1574 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1575 let Inst{4} = Rn{4}; 1576 let DecoderMethod = "DecodeVSTInstruction"; 1577 let AsmMatchConverter = "cvtVSTwbFixed"; 1578 } 1579 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1580 (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1581 IIC_VLD1u, 1582 "vst1", Dt, "$Vd, $Rn, $Rm", 1583 "$Rn.addr = $wb", []> { 1584 let Inst{4} = Rn{4}; 1585 let DecoderMethod = "DecodeVSTInstruction"; 1586 let AsmMatchConverter = "cvtVSTwbRegister"; 1587 } 1588 } 1589 multiclass VST1QWB<bits<4> op7_4, string Dt> { 1590 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1591 (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1592 "vst1", Dt, "$Vd, $Rn!", 1593 "$Rn.addr = $wb", []> { 1594 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1595 let Inst{5-4} = Rn{5-4}; 1596 let DecoderMethod = "DecodeVSTInstruction"; 1597 let AsmMatchConverter = "cvtVSTwbFixed"; 1598 } 1599 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1600 (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1601 IIC_VLD1x2u, 1602 "vst1", Dt, "$Vd, $Rn, $Rm", 1603 "$Rn.addr = $wb", []> { 1604 let Inst{5-4} = Rn{5-4}; 1605 let DecoderMethod = "DecodeVSTInstruction"; 1606 let AsmMatchConverter = "cvtVSTwbRegister"; 1607 } 1608 } 1609 1610 defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">; 1611 defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">; 1612 defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">; 1613 defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">; 1614 1615 defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">; 1616 defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; 1617 defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; 1618 defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; 1619 1620 // ...with 3 registers 1621 class VST1D3<bits<4> op7_4, string Dt> 1622 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1623 (ins addrmode6:$Rn, VecListThreeD:$Vd), 1624 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { 1625 let Rm = 0b1111; 1626 let Inst{4} = Rn{4}; 1627 let DecoderMethod = "DecodeVSTInstruction"; 1628 } 1629 multiclass VST1D3WB<bits<4> op7_4, string Dt> { 1630 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1631 (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1632 "vst1", Dt, "$Vd, $Rn!", 1633 "$Rn.addr = $wb", []> { 1634 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1635 let Inst{5-4} = Rn{5-4}; 1636 let DecoderMethod = "DecodeVSTInstruction"; 1637 let AsmMatchConverter = "cvtVSTwbFixed"; 1638 } 1639 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1640 (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1641 IIC_VLD1x3u, 1642 "vst1", Dt, "$Vd, $Rn, $Rm", 1643 "$Rn.addr = $wb", []> { 1644 let Inst{5-4} = Rn{5-4}; 1645 let DecoderMethod = "DecodeVSTInstruction"; 1646 let AsmMatchConverter = "cvtVSTwbRegister"; 1647 } 1648 } 1649 1650 def VST1d8T : VST1D3<{0,0,0,?}, "8">; 1651 def VST1d16T : VST1D3<{0,1,0,?}, "16">; 1652 def VST1d32T : VST1D3<{1,0,0,?}, "32">; 1653 def VST1d64T : VST1D3<{1,1,0,?}, "64">; 1654 1655 defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">; 1656 defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">; 1657 defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; 1658 defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; 1659 1660 def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; 1661 def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>; 1662 def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; 1663 1664 // ...with 4 registers 1665 class VST1D4<bits<4> op7_4, string Dt> 1666 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1667 (ins addrmode6:$Rn, VecListFourD:$Vd), 1668 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1669 []> { 1670 let Rm = 0b1111; 1671 let Inst{5-4} = Rn{5-4}; 1672 let DecoderMethod = "DecodeVSTInstruction"; 1673 } 1674 multiclass VST1D4WB<bits<4> op7_4, string Dt> { 1675 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1676 (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1677 "vst1", Dt, "$Vd, $Rn!", 1678 "$Rn.addr = $wb", []> { 1679 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1680 let Inst{5-4} = Rn{5-4}; 1681 let DecoderMethod = "DecodeVSTInstruction"; 1682 let AsmMatchConverter = "cvtVSTwbFixed"; 1683 } 1684 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1685 (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1686 IIC_VLD1x4u, 1687 "vst1", Dt, "$Vd, $Rn, $Rm", 1688 "$Rn.addr = $wb", []> { 1689 let Inst{5-4} = Rn{5-4}; 1690 let DecoderMethod = "DecodeVSTInstruction"; 1691 let AsmMatchConverter = "cvtVSTwbRegister"; 1692 } 1693 } 1694 1695 def VST1d8Q : VST1D4<{0,0,?,?}, "8">; 1696 def VST1d16Q : VST1D4<{0,1,?,?}, "16">; 1697 def VST1d32Q : VST1D4<{1,0,?,?}, "32">; 1698 def VST1d64Q : VST1D4<{1,1,?,?}, "64">; 1699 1700 defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">; 1701 defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">; 1702 defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; 1703 defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; 1704 1705 def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; 1706 def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>; 1707 def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; 1708 1709 // VST2 : Vector Store (multiple 2-element structures) 1710 class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1711 InstrItinClass itin> 1712 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd), 1713 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1714 let Rm = 0b1111; 1715 let Inst{5-4} = Rn{5-4}; 1716 let DecoderMethod = "DecodeVSTInstruction"; 1717 } 1718 1719 def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; 1720 def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>; 1721 def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>; 1722 1723 def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; 1724 def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; 1725 def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; 1726 1727 def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; 1728 def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; 1729 def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; 1730 1731 // ...with address register writeback: 1732 multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1733 RegisterOperand VdTy> { 1734 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1735 (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u, 1736 "vst2", Dt, "$Vd, $Rn!", 1737 "$Rn.addr = $wb", []> { 1738 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1739 let Inst{5-4} = Rn{5-4}; 1740 let DecoderMethod = "DecodeVSTInstruction"; 1741 let AsmMatchConverter = "cvtVSTwbFixed"; 1742 } 1743 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1744 (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1745 "vst2", Dt, "$Vd, $Rn, $Rm", 1746 "$Rn.addr = $wb", []> { 1747 let Inst{5-4} = Rn{5-4}; 1748 let DecoderMethod = "DecodeVSTInstruction"; 1749 let AsmMatchConverter = "cvtVSTwbRegister"; 1750 } 1751 } 1752 multiclass VST2QWB<bits<4> op7_4, string Dt> { 1753 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1754 (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1755 "vst2", Dt, "$Vd, $Rn!", 1756 "$Rn.addr = $wb", []> { 1757 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1758 let Inst{5-4} = Rn{5-4}; 1759 let DecoderMethod = "DecodeVSTInstruction"; 1760 let AsmMatchConverter = "cvtVSTwbFixed"; 1761 } 1762 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1763 (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1764 IIC_VLD1u, 1765 "vst2", Dt, "$Vd, $Rn, $Rm", 1766 "$Rn.addr = $wb", []> { 1767 let Inst{5-4} = Rn{5-4}; 1768 let DecoderMethod = "DecodeVSTInstruction"; 1769 let AsmMatchConverter = "cvtVSTwbRegister"; 1770 } 1771 } 1772 1773 defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>; 1774 defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>; 1775 defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>; 1776 1777 defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; 1778 defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; 1779 defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; 1780 1781 def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1782 def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1783 def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1784 def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1785 def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1786 def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1787 1788 // ...with double-spaced registers 1789 def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>; 1790 def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>; 1791 def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>; 1792 defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>; 1793 defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>; 1794 defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>; 1795 1796 // VST3 : Vector Store (multiple 3-element structures) 1797 class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1798 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1799 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1800 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { 1801 let Rm = 0b1111; 1802 let Inst{4} = Rn{4}; 1803 let DecoderMethod = "DecodeVSTInstruction"; 1804 } 1805 1806 def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1807 def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1808 def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1809 1810 def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>; 1811 def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>; 1812 def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>; 1813 1814 // ...with address register writeback: 1815 class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1816 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1817 (ins addrmode6:$Rn, am6offset:$Rm, 1818 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 1819 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1820 "$Rn.addr = $wb", []> { 1821 let Inst{4} = Rn{4}; 1822 let DecoderMethod = "DecodeVSTInstruction"; 1823 } 1824 1825 def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 1826 def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 1827 def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 1828 1829 def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1830 def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1831 def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1832 1833 // ...with double-spaced registers: 1834 def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 1835 def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 1836 def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 1837 def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 1838 def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 1839 def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 1840 1841 def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1842 def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1843 def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1844 1845 // ...alternate versions to be allocated odd register numbers: 1846 def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1847 def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1848 def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1849 1850 def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1851 def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1852 def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1853 1854 // VST4 : Vector Store (multiple 4-element structures) 1855 class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 1856 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1857 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 1858 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 1859 "", []> { 1860 let Rm = 0b1111; 1861 let Inst{5-4} = Rn{5-4}; 1862 let DecoderMethod = "DecodeVSTInstruction"; 1863 } 1864 1865 def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 1866 def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 1867 def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 1868 1869 def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>; 1870 def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>; 1871 def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>; 1872 1873 // ...with address register writeback: 1874 class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1875 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1876 (ins addrmode6:$Rn, am6offset:$Rm, 1877 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 1878 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 1879 "$Rn.addr = $wb", []> { 1880 let Inst{5-4} = Rn{5-4}; 1881 let DecoderMethod = "DecodeVSTInstruction"; 1882 } 1883 1884 def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 1885 def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 1886 def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 1887 1888 def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1889 def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1890 def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1891 1892 // ...with double-spaced registers: 1893 def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 1894 def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 1895 def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 1896 def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 1897 def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 1898 def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 1899 1900 def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1901 def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1902 def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1903 1904 // ...alternate versions to be allocated odd register numbers: 1905 def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1906 def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1907 def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1908 1909 def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1910 def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1911 def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1912 1913 } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 1914 1915 // Classes for VST*LN pseudo-instructions with multi-register operands. 1916 // These are expanded to real instructions after register allocation. 1917 class VSTQLNPseudo<InstrItinClass itin> 1918 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1919 itin, "">; 1920 class VSTQLNWBPseudo<InstrItinClass itin> 1921 : PseudoNLdSt<(outs GPR:$wb), 1922 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1923 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1924 class VSTQQLNPseudo<InstrItinClass itin> 1925 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1926 itin, "">; 1927 class VSTQQLNWBPseudo<InstrItinClass itin> 1928 : PseudoNLdSt<(outs GPR:$wb), 1929 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1930 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1931 class VSTQQQQLNPseudo<InstrItinClass itin> 1932 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1933 itin, "">; 1934 class VSTQQQQLNWBPseudo<InstrItinClass itin> 1935 : PseudoNLdSt<(outs GPR:$wb), 1936 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1937 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1938 1939 // VST1LN : Vector Store (single element from one lane) 1940 class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1941 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 1942 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1943 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 1944 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 1945 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { 1946 let Rm = 0b1111; 1947 let DecoderMethod = "DecodeVST1LN"; 1948 } 1949 class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 1950 : VSTQLNPseudo<IIC_VST1ln> { 1951 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 1952 addrmode6:$addr)]; 1953 } 1954 1955 def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 1956 NEONvgetlaneu, addrmode6> { 1957 let Inst{7-5} = lane{2-0}; 1958 } 1959 def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 1960 NEONvgetlaneu, addrmode6> { 1961 let Inst{7-6} = lane{1-0}; 1962 let Inst{4} = Rn{5}; 1963 } 1964 1965 def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 1966 addrmode6oneL32> { 1967 let Inst{7} = lane{0}; 1968 let Inst{5-4} = Rn{5-4}; 1969 } 1970 1971 def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>; 1972 def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>; 1973 def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 1974 1975 def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 1976 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1977 def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 1978 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1979 1980 // ...with address register writeback: 1981 class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1982 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 1983 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1984 (ins AdrMode:$Rn, am6offset:$Rm, 1985 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 1986 "\\{$Vd[$lane]\\}, $Rn$Rm", 1987 "$Rn.addr = $wb", 1988 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 1989 AdrMode:$Rn, am6offset:$Rm))]> { 1990 let DecoderMethod = "DecodeVST1LN"; 1991 } 1992 class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 1993 : VSTQLNWBPseudo<IIC_VST1lnu> { 1994 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 1995 addrmode6:$addr, am6offset:$offset))]; 1996 } 1997 1998 def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 1999 NEONvgetlaneu, addrmode6> { 2000 let Inst{7-5} = lane{2-0}; 2001 } 2002 def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2003 NEONvgetlaneu, addrmode6> { 2004 let Inst{7-6} = lane{1-0}; 2005 let Inst{4} = Rn{5}; 2006 } 2007 def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2008 extractelt, addrmode6oneL32> { 2009 let Inst{7} = lane{0}; 2010 let Inst{5-4} = Rn{5-4}; 2011 } 2012 2013 def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; 2014 def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; 2015 def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2016 2017 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 2018 2019 // VST2LN : Vector Store (single 2-element structure from one lane) 2020 class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2021 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2022 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2023 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2024 "", []> { 2025 let Rm = 0b1111; 2026 let Inst{4} = Rn{4}; 2027 let DecoderMethod = "DecodeVST2LN"; 2028 } 2029 2030 def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2031 let Inst{7-5} = lane{2-0}; 2032 } 2033 def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2034 let Inst{7-6} = lane{1-0}; 2035 } 2036 def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2037 let Inst{7} = lane{0}; 2038 } 2039 2040 def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2041 def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2042 def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2043 2044 // ...with double-spaced registers: 2045 def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2046 let Inst{7-6} = lane{1-0}; 2047 let Inst{4} = Rn{4}; 2048 } 2049 def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2050 let Inst{7} = lane{0}; 2051 let Inst{4} = Rn{4}; 2052 } 2053 2054 def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2055 def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2056 2057 // ...with address register writeback: 2058 class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2059 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2060 (ins addrmode6:$Rn, am6offset:$Rm, 2061 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2062 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2063 "$Rn.addr = $wb", []> { 2064 let Inst{4} = Rn{4}; 2065 let DecoderMethod = "DecodeVST2LN"; 2066 } 2067 2068 def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2069 let Inst{7-5} = lane{2-0}; 2070 } 2071 def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2072 let Inst{7-6} = lane{1-0}; 2073 } 2074 def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2075 let Inst{7} = lane{0}; 2076 } 2077 2078 def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2079 def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2080 def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2081 2082 def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2083 let Inst{7-6} = lane{1-0}; 2084 } 2085 def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2086 let Inst{7} = lane{0}; 2087 } 2088 2089 def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2090 def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2091 2092 // VST3LN : Vector Store (single 3-element structure from one lane) 2093 class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2094 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2095 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2096 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2097 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { 2098 let Rm = 0b1111; 2099 let DecoderMethod = "DecodeVST3LN"; 2100 } 2101 2102 def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2103 let Inst{7-5} = lane{2-0}; 2104 } 2105 def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2106 let Inst{7-6} = lane{1-0}; 2107 } 2108 def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2109 let Inst{7} = lane{0}; 2110 } 2111 2112 def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2113 def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2114 def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2115 2116 // ...with double-spaced registers: 2117 def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2118 let Inst{7-6} = lane{1-0}; 2119 } 2120 def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2121 let Inst{7} = lane{0}; 2122 } 2123 2124 def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2125 def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2126 2127 // ...with address register writeback: 2128 class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2129 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2130 (ins addrmode6:$Rn, am6offset:$Rm, 2131 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2132 IIC_VST3lnu, "vst3", Dt, 2133 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2134 "$Rn.addr = $wb", []> { 2135 let DecoderMethod = "DecodeVST3LN"; 2136 } 2137 2138 def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2139 let Inst{7-5} = lane{2-0}; 2140 } 2141 def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2142 let Inst{7-6} = lane{1-0}; 2143 } 2144 def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2145 let Inst{7} = lane{0}; 2146 } 2147 2148 def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2149 def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2150 def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2151 2152 def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2153 let Inst{7-6} = lane{1-0}; 2154 } 2155 def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2156 let Inst{7} = lane{0}; 2157 } 2158 2159 def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2160 def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2161 2162 // VST4LN : Vector Store (single 4-element structure from one lane) 2163 class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2164 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2165 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2166 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2167 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2168 "", []> { 2169 let Rm = 0b1111; 2170 let Inst{4} = Rn{4}; 2171 let DecoderMethod = "DecodeVST4LN"; 2172 } 2173 2174 def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2175 let Inst{7-5} = lane{2-0}; 2176 } 2177 def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2178 let Inst{7-6} = lane{1-0}; 2179 } 2180 def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2181 let Inst{7} = lane{0}; 2182 let Inst{5} = Rn{5}; 2183 } 2184 2185 def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2186 def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2187 def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2188 2189 // ...with double-spaced registers: 2190 def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2191 let Inst{7-6} = lane{1-0}; 2192 } 2193 def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2194 let Inst{7} = lane{0}; 2195 let Inst{5} = Rn{5}; 2196 } 2197 2198 def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2199 def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2200 2201 // ...with address register writeback: 2202 class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2203 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2204 (ins addrmode6:$Rn, am6offset:$Rm, 2205 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2206 IIC_VST4lnu, "vst4", Dt, 2207 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2208 "$Rn.addr = $wb", []> { 2209 let Inst{4} = Rn{4}; 2210 let DecoderMethod = "DecodeVST4LN"; 2211 } 2212 2213 def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2214 let Inst{7-5} = lane{2-0}; 2215 } 2216 def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2217 let Inst{7-6} = lane{1-0}; 2218 } 2219 def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2220 let Inst{7} = lane{0}; 2221 let Inst{5} = Rn{5}; 2222 } 2223 2224 def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2225 def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2226 def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2227 2228 def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2229 let Inst{7-6} = lane{1-0}; 2230 } 2231 def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2232 let Inst{7} = lane{0}; 2233 let Inst{5} = Rn{5}; 2234 } 2235 2236 def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2237 def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2238 2239 } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 2240 2241 2242 //===----------------------------------------------------------------------===// 2243 // NEON pattern fragments 2244 //===----------------------------------------------------------------------===// 2245 2246 // Extract D sub-registers of Q registers. 2247 def DSubReg_i8_reg : SDNodeXForm<imm, [{ 2248 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2249 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); 2250 }]>; 2251 def DSubReg_i16_reg : SDNodeXForm<imm, [{ 2252 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2253 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); 2254 }]>; 2255 def DSubReg_i32_reg : SDNodeXForm<imm, [{ 2256 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2257 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); 2258 }]>; 2259 def DSubReg_f64_reg : SDNodeXForm<imm, [{ 2260 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2261 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); 2262 }]>; 2263 2264 // Extract S sub-registers of Q/D registers. 2265 def SSubReg_f32_reg : SDNodeXForm<imm, [{ 2266 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 2267 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); 2268 }]>; 2269 2270 // Translate lane numbers from Q registers to D subregs. 2271 def SubReg_i8_lane : SDNodeXForm<imm, [{ 2272 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 2273 }]>; 2274 def SubReg_i16_lane : SDNodeXForm<imm, [{ 2275 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 2276 }]>; 2277 def SubReg_i32_lane : SDNodeXForm<imm, [{ 2278 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 2279 }]>; 2280 2281 //===----------------------------------------------------------------------===// 2282 // Instruction Classes 2283 //===----------------------------------------------------------------------===// 2284 2285 // Basic 2-register operations: double- and quad-register. 2286 class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2287 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2288 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2289 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2290 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2291 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2292 class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2293 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2294 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2295 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2296 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2297 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2298 2299 // Basic 2-register intrinsics, both double- and quad-register. 2300 class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2301 bits<2> op17_16, bits<5> op11_7, bit op4, 2302 InstrItinClass itin, string OpcodeStr, string Dt, 2303 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2304 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2305 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2306 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2307 class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2308 bits<2> op17_16, bits<5> op11_7, bit op4, 2309 InstrItinClass itin, string OpcodeStr, string Dt, 2310 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2311 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2312 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2313 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2314 2315 // Narrow 2-register operations. 2316 class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2317 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2318 InstrItinClass itin, string OpcodeStr, string Dt, 2319 ValueType TyD, ValueType TyQ, SDNode OpNode> 2320 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2321 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2322 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2323 2324 // Narrow 2-register intrinsics. 2325 class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2326 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2327 InstrItinClass itin, string OpcodeStr, string Dt, 2328 ValueType TyD, ValueType TyQ, Intrinsic IntOp> 2329 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2330 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2331 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2332 2333 // Long 2-register operations (currently only used for VMOVL). 2334 class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2335 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2336 InstrItinClass itin, string OpcodeStr, string Dt, 2337 ValueType TyQ, ValueType TyD, SDNode OpNode> 2338 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2339 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2340 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2341 2342 // Long 2-register intrinsics. 2343 class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2344 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2345 InstrItinClass itin, string OpcodeStr, string Dt, 2346 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 2347 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2348 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2349 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2350 2351 // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2352 class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2353 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2354 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2355 OpcodeStr, Dt, "$Vd, $Vm", 2356 "$src1 = $Vd, $src2 = $Vm", []>; 2357 class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2358 InstrItinClass itin, string OpcodeStr, string Dt> 2359 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2360 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2361 "$src1 = $Vd, $src2 = $Vm", []>; 2362 2363 // Basic 3-register operations: double- and quad-register. 2364 class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2365 InstrItinClass itin, string OpcodeStr, string Dt, 2366 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2367 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2368 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2369 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2370 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2371 let isCommutable = Commutable; 2372 } 2373 // Same as N3VD but no data type. 2374 class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2375 InstrItinClass itin, string OpcodeStr, 2376 ValueType ResTy, ValueType OpTy, 2377 SDNode OpNode, bit Commutable> 2378 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2379 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2380 OpcodeStr, "$Vd, $Vn, $Vm", "", 2381 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2382 let isCommutable = Commutable; 2383 } 2384 2385 class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2386 InstrItinClass itin, string OpcodeStr, string Dt, 2387 ValueType Ty, SDNode ShOp> 2388 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2389 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2390 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2391 [(set (Ty DPR:$Vd), 2392 (Ty (ShOp (Ty DPR:$Vn), 2393 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2394 let isCommutable = 0; 2395 } 2396 class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2397 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2398 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2399 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2400 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2401 [(set (Ty DPR:$Vd), 2402 (Ty (ShOp (Ty DPR:$Vn), 2403 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2404 let isCommutable = 0; 2405 } 2406 2407 class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2408 InstrItinClass itin, string OpcodeStr, string Dt, 2409 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2410 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2411 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2412 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2413 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2414 let isCommutable = Commutable; 2415 } 2416 class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2417 InstrItinClass itin, string OpcodeStr, 2418 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2419 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2420 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2421 OpcodeStr, "$Vd, $Vn, $Vm", "", 2422 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2423 let isCommutable = Commutable; 2424 } 2425 class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2426 InstrItinClass itin, string OpcodeStr, string Dt, 2427 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2428 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2429 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2430 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2431 [(set (ResTy QPR:$Vd), 2432 (ResTy (ShOp (ResTy QPR:$Vn), 2433 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2434 imm:$lane)))))]> { 2435 let isCommutable = 0; 2436 } 2437 class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2438 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2439 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2440 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2441 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2442 [(set (ResTy QPR:$Vd), 2443 (ResTy (ShOp (ResTy QPR:$Vn), 2444 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2445 imm:$lane)))))]> { 2446 let isCommutable = 0; 2447 } 2448 2449 // Basic 3-register intrinsics, both double- and quad-register. 2450 class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2451 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2452 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> 2453 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2454 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2455 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2456 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2457 let isCommutable = Commutable; 2458 } 2459 class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2460 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> 2461 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2462 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2463 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2464 [(set (Ty DPR:$Vd), 2465 (Ty (IntOp (Ty DPR:$Vn), 2466 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2467 imm:$lane)))))]> { 2468 let isCommutable = 0; 2469 } 2470 class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2471 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> 2472 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2473 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2474 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2475 [(set (Ty DPR:$Vd), 2476 (Ty (IntOp (Ty DPR:$Vn), 2477 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2478 let isCommutable = 0; 2479 } 2480 class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2481 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2482 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2483 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2484 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2485 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2486 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2487 let isCommutable = 0; 2488 } 2489 2490 class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2491 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2492 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> 2493 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2494 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2495 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2496 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2497 let isCommutable = Commutable; 2498 } 2499 class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2500 string OpcodeStr, string Dt, 2501 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2502 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2503 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2504 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2505 [(set (ResTy QPR:$Vd), 2506 (ResTy (IntOp (ResTy QPR:$Vn), 2507 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2508 imm:$lane)))))]> { 2509 let isCommutable = 0; 2510 } 2511 class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2512 string OpcodeStr, string Dt, 2513 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2514 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2515 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2516 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2517 [(set (ResTy QPR:$Vd), 2518 (ResTy (IntOp (ResTy QPR:$Vn), 2519 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2520 imm:$lane)))))]> { 2521 let isCommutable = 0; 2522 } 2523 class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2524 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2525 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2526 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2527 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2528 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2529 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2530 let isCommutable = 0; 2531 } 2532 2533 // Multiply-Add/Sub operations: double- and quad-register. 2534 class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2535 InstrItinClass itin, string OpcodeStr, string Dt, 2536 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2537 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2538 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2539 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2540 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2541 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2542 2543 class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2544 string OpcodeStr, string Dt, 2545 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2546 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2547 (outs DPR:$Vd), 2548 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2549 NVMulSLFrm, itin, 2550 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2551 [(set (Ty DPR:$Vd), 2552 (Ty (ShOp (Ty DPR:$src1), 2553 (Ty (MulOp DPR:$Vn, 2554 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2555 imm:$lane)))))))]>; 2556 class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2557 string OpcodeStr, string Dt, 2558 ValueType Ty, SDNode MulOp, SDNode ShOp> 2559 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2560 (outs DPR:$Vd), 2561 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2562 NVMulSLFrm, itin, 2563 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2564 [(set (Ty DPR:$Vd), 2565 (Ty (ShOp (Ty DPR:$src1), 2566 (Ty (MulOp DPR:$Vn, 2567 (Ty (NEONvduplane (Ty DPR_8:$Vm), 2568 imm:$lane)))))))]>; 2569 2570 class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2571 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2572 SDPatternOperator MulOp, SDPatternOperator OpNode> 2573 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2574 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2575 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2576 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2577 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2578 class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2579 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2580 SDPatternOperator MulOp, SDPatternOperator ShOp> 2581 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2582 (outs QPR:$Vd), 2583 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2584 NVMulSLFrm, itin, 2585 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2586 [(set (ResTy QPR:$Vd), 2587 (ResTy (ShOp (ResTy QPR:$src1), 2588 (ResTy (MulOp QPR:$Vn, 2589 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2590 imm:$lane)))))))]>; 2591 class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2592 string OpcodeStr, string Dt, 2593 ValueType ResTy, ValueType OpTy, 2594 SDNode MulOp, SDNode ShOp> 2595 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2596 (outs QPR:$Vd), 2597 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2598 NVMulSLFrm, itin, 2599 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2600 [(set (ResTy QPR:$Vd), 2601 (ResTy (ShOp (ResTy QPR:$src1), 2602 (ResTy (MulOp QPR:$Vn, 2603 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2604 imm:$lane)))))))]>; 2605 2606 // Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2607 class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2608 InstrItinClass itin, string OpcodeStr, string Dt, 2609 ValueType Ty, Intrinsic IntOp, SDNode OpNode> 2610 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2611 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2612 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2613 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2614 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2615 class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2616 InstrItinClass itin, string OpcodeStr, string Dt, 2617 ValueType Ty, Intrinsic IntOp, SDNode OpNode> 2618 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2619 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2620 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2621 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2622 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2623 2624 // Neon 3-argument intrinsics, both double- and quad-register. 2625 // The destination register is also used as the first source operand register. 2626 class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2627 InstrItinClass itin, string OpcodeStr, string Dt, 2628 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2629 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2630 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2631 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2632 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2633 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2634 class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2635 InstrItinClass itin, string OpcodeStr, string Dt, 2636 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2637 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2638 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2639 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2640 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2641 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2642 2643 // Long Multiply-Add/Sub operations. 2644 class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2645 InstrItinClass itin, string OpcodeStr, string Dt, 2646 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2647 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2648 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2649 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2650 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2651 (TyQ (MulOp (TyD DPR:$Vn), 2652 (TyD DPR:$Vm)))))]>; 2653 class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2654 InstrItinClass itin, string OpcodeStr, string Dt, 2655 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2656 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2657 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2658 NVMulSLFrm, itin, 2659 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2660 [(set QPR:$Vd, 2661 (OpNode (TyQ QPR:$src1), 2662 (TyQ (MulOp (TyD DPR:$Vn), 2663 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm), 2664 imm:$lane))))))]>; 2665 class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2666 InstrItinClass itin, string OpcodeStr, string Dt, 2667 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2668 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2669 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2670 NVMulSLFrm, itin, 2671 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2672 [(set QPR:$Vd, 2673 (OpNode (TyQ QPR:$src1), 2674 (TyQ (MulOp (TyD DPR:$Vn), 2675 (TyD (NEONvduplane (TyD DPR_8:$Vm), 2676 imm:$lane))))))]>; 2677 2678 // Long Intrinsic-Op vector operations with explicit extend (VABAL). 2679 class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2680 InstrItinClass itin, string OpcodeStr, string Dt, 2681 ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, 2682 SDNode OpNode> 2683 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2684 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2685 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2686 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2687 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2688 (TyD DPR:$Vm)))))))]>; 2689 2690 // Neon Long 3-argument intrinsic. The destination register is 2691 // a quad-register and is also used as the first source operand register. 2692 class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2693 InstrItinClass itin, string OpcodeStr, string Dt, 2694 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 2695 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2696 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2697 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2698 [(set QPR:$Vd, 2699 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 2700 class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2701 string OpcodeStr, string Dt, 2702 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2703 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2704 (outs QPR:$Vd), 2705 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2706 NVMulSLFrm, itin, 2707 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2708 [(set (ResTy QPR:$Vd), 2709 (ResTy (IntOp (ResTy QPR:$src1), 2710 (OpTy DPR:$Vn), 2711 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2712 imm:$lane)))))]>; 2713 class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2714 InstrItinClass itin, string OpcodeStr, string Dt, 2715 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2716 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2717 (outs QPR:$Vd), 2718 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2719 NVMulSLFrm, itin, 2720 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2721 [(set (ResTy QPR:$Vd), 2722 (ResTy (IntOp (ResTy QPR:$src1), 2723 (OpTy DPR:$Vn), 2724 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2725 imm:$lane)))))]>; 2726 2727 // Narrowing 3-register intrinsics. 2728 class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2729 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 2730 Intrinsic IntOp, bit Commutable> 2731 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2732 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 2733 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2734 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 2735 let isCommutable = Commutable; 2736 } 2737 2738 // Long 3-register operations. 2739 class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2740 InstrItinClass itin, string OpcodeStr, string Dt, 2741 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 2742 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2743 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2744 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2745 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2746 let isCommutable = Commutable; 2747 } 2748 class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2749 InstrItinClass itin, string OpcodeStr, string Dt, 2750 ValueType TyQ, ValueType TyD, SDNode OpNode> 2751 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2752 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2753 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2754 [(set QPR:$Vd, 2755 (TyQ (OpNode (TyD DPR:$Vn), 2756 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 2757 class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2758 InstrItinClass itin, string OpcodeStr, string Dt, 2759 ValueType TyQ, ValueType TyD, SDNode OpNode> 2760 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2761 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2762 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2763 [(set QPR:$Vd, 2764 (TyQ (OpNode (TyD DPR:$Vn), 2765 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 2766 2767 // Long 3-register operations with explicitly extended operands. 2768 class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2769 InstrItinClass itin, string OpcodeStr, string Dt, 2770 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 2771 bit Commutable> 2772 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2773 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2774 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2775 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 2776 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2777 let isCommutable = Commutable; 2778 } 2779 2780 // Long 3-register intrinsics with explicit extend (VABDL). 2781 class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2782 InstrItinClass itin, string OpcodeStr, string Dt, 2783 ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, 2784 bit Commutable> 2785 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2786 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2787 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2788 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2789 (TyD DPR:$Vm))))))]> { 2790 let isCommutable = Commutable; 2791 } 2792 2793 // Long 3-register intrinsics. 2794 class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2795 InstrItinClass itin, string OpcodeStr, string Dt, 2796 ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> 2797 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2798 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2799 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2800 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2801 let isCommutable = Commutable; 2802 } 2803 class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2804 string OpcodeStr, string Dt, 2805 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2806 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2807 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2808 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2809 [(set (ResTy QPR:$Vd), 2810 (ResTy (IntOp (OpTy DPR:$Vn), 2811 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2812 imm:$lane)))))]>; 2813 class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2814 InstrItinClass itin, string OpcodeStr, string Dt, 2815 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2816 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2817 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2818 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2819 [(set (ResTy QPR:$Vd), 2820 (ResTy (IntOp (OpTy DPR:$Vn), 2821 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2822 imm:$lane)))))]>; 2823 2824 // Wide 3-register operations. 2825 class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2826 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 2827 SDNode OpNode, SDNode ExtOp, bit Commutable> 2828 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2829 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 2830 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2831 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 2832 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2833 let isCommutable = Commutable; 2834 } 2835 2836 // Pairwise long 2-register intrinsics, both double- and quad-register. 2837 class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2838 bits<2> op17_16, bits<5> op11_7, bit op4, 2839 string OpcodeStr, string Dt, 2840 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2841 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2842 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2843 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2844 class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2845 bits<2> op17_16, bits<5> op11_7, bit op4, 2846 string OpcodeStr, string Dt, 2847 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2848 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2849 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2850 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2851 2852 // Pairwise long 2-register accumulate intrinsics, 2853 // both double- and quad-register. 2854 // The destination register is also used as the first source operand register. 2855 class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2856 bits<2> op17_16, bits<5> op11_7, bit op4, 2857 string OpcodeStr, string Dt, 2858 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2859 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 2860 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 2861 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 2862 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 2863 class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2864 bits<2> op17_16, bits<5> op11_7, bit op4, 2865 string OpcodeStr, string Dt, 2866 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2867 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 2868 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 2869 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 2870 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 2871 2872 // Shift by immediate, 2873 // both double- and quad-register. 2874 class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2875 Format f, InstrItinClass itin, Operand ImmTy, 2876 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 2877 : N2VImm<op24, op23, op11_8, op7, 0, op4, 2878 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 2879 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2880 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 2881 class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2882 Format f, InstrItinClass itin, Operand ImmTy, 2883 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 2884 : N2VImm<op24, op23, op11_8, op7, 1, op4, 2885 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 2886 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2887 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 2888 2889 // Long shift by immediate. 2890 class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 2891 string OpcodeStr, string Dt, 2892 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> 2893 : N2VImm<op24, op23, op11_8, op7, op6, op4, 2894 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 2895 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2896 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), 2897 (i32 imm:$SIMM))))]>; 2898 2899 // Narrow shift by immediate. 2900 class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 2901 InstrItinClass itin, string OpcodeStr, string Dt, 2902 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> 2903 : N2VImm<op24, op23, op11_8, op7, op6, op4, 2904 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 2905 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2906 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 2907 (i32 imm:$SIMM))))]>; 2908 2909 // Shift right by immediate and accumulate, 2910 // both double- and quad-register. 2911 class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2912 Operand ImmTy, string OpcodeStr, string Dt, 2913 ValueType Ty, SDNode ShOp> 2914 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 2915 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 2916 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 2917 [(set DPR:$Vd, (Ty (add DPR:$src1, 2918 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 2919 class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2920 Operand ImmTy, string OpcodeStr, string Dt, 2921 ValueType Ty, SDNode ShOp> 2922 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 2923 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 2924 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 2925 [(set QPR:$Vd, (Ty (add QPR:$src1, 2926 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 2927 2928 // Shift by immediate and insert, 2929 // both double- and quad-register. 2930 class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2931 Operand ImmTy, Format f, string OpcodeStr, string Dt, 2932 ValueType Ty,SDNode ShOp> 2933 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 2934 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 2935 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 2936 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 2937 class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2938 Operand ImmTy, Format f, string OpcodeStr, string Dt, 2939 ValueType Ty,SDNode ShOp> 2940 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 2941 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 2942 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 2943 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 2944 2945 // Convert, with fractional bits immediate, 2946 // both double- and quad-register. 2947 class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2948 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2949 Intrinsic IntOp> 2950 : N2VImm<op24, op23, op11_8, op7, 0, op4, 2951 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 2952 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2953 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 2954 class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2955 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2956 Intrinsic IntOp> 2957 : N2VImm<op24, op23, op11_8, op7, 1, op4, 2958 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 2959 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2960 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 2961 2962 //===----------------------------------------------------------------------===// 2963 // Multiclasses 2964 //===----------------------------------------------------------------------===// 2965 2966 // Abbreviations used in multiclass suffixes: 2967 // Q = quarter int (8 bit) elements 2968 // H = half int (16 bit) elements 2969 // S = single int (32 bit) elements 2970 // D = double int (64 bit) elements 2971 2972 // Neon 2-register vector operations and intrinsics. 2973 2974 // Neon 2-register comparisons. 2975 // source operand element sizes of 8, 16 and 32 bits: 2976 multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2977 bits<5> op11_7, bit op4, string opc, string Dt, 2978 string asm, SDNode OpNode> { 2979 // 64-bit vector types. 2980 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 2981 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 2982 opc, !strconcat(Dt, "8"), asm, "", 2983 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; 2984 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 2985 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 2986 opc, !strconcat(Dt, "16"), asm, "", 2987 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; 2988 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 2989 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 2990 opc, !strconcat(Dt, "32"), asm, "", 2991 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; 2992 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 2993 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 2994 opc, "f32", asm, "", 2995 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { 2996 let Inst{10} = 1; // overwrite F = 1 2997 } 2998 2999 // 128-bit vector types. 3000 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3001 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3002 opc, !strconcat(Dt, "8"), asm, "", 3003 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; 3004 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3005 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3006 opc, !strconcat(Dt, "16"), asm, "", 3007 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; 3008 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3009 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3010 opc, !strconcat(Dt, "32"), asm, "", 3011 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; 3012 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3013 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3014 opc, "f32", asm, "", 3015 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { 3016 let Inst{10} = 1; // overwrite F = 1 3017 } 3018 } 3019 3020 3021 // Neon 2-register vector intrinsics, 3022 // element sizes of 8, 16 and 32 bits: 3023 multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3024 bits<5> op11_7, bit op4, 3025 InstrItinClass itinD, InstrItinClass itinQ, 3026 string OpcodeStr, string Dt, Intrinsic IntOp> { 3027 // 64-bit vector types. 3028 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3029 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3030 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3031 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3032 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3033 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3034 3035 // 128-bit vector types. 3036 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3037 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3038 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3039 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3040 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3041 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3042 } 3043 3044 3045 // Neon Narrowing 2-register vector operations, 3046 // source operand element sizes of 16, 32 and 64 bits: 3047 multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3048 bits<5> op11_7, bit op6, bit op4, 3049 InstrItinClass itin, string OpcodeStr, string Dt, 3050 SDNode OpNode> { 3051 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3052 itin, OpcodeStr, !strconcat(Dt, "16"), 3053 v8i8, v8i16, OpNode>; 3054 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3055 itin, OpcodeStr, !strconcat(Dt, "32"), 3056 v4i16, v4i32, OpNode>; 3057 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3058 itin, OpcodeStr, !strconcat(Dt, "64"), 3059 v2i32, v2i64, OpNode>; 3060 } 3061 3062 // Neon Narrowing 2-register vector intrinsics, 3063 // source operand element sizes of 16, 32 and 64 bits: 3064 multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3065 bits<5> op11_7, bit op6, bit op4, 3066 InstrItinClass itin, string OpcodeStr, string Dt, 3067 Intrinsic IntOp> { 3068 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3069 itin, OpcodeStr, !strconcat(Dt, "16"), 3070 v8i8, v8i16, IntOp>; 3071 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3072 itin, OpcodeStr, !strconcat(Dt, "32"), 3073 v4i16, v4i32, IntOp>; 3074 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3075 itin, OpcodeStr, !strconcat(Dt, "64"), 3076 v2i32, v2i64, IntOp>; 3077 } 3078 3079 3080 // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3081 // source operand element sizes of 16, 32 and 64 bits: 3082 multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3083 string OpcodeStr, string Dt, SDNode OpNode> { 3084 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3085 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3086 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3087 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3088 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3089 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3090 } 3091 3092 3093 // Neon 3-register vector operations. 3094 3095 // First with only element sizes of 8, 16 and 32 bits: 3096 multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3097 InstrItinClass itinD16, InstrItinClass itinD32, 3098 InstrItinClass itinQ16, InstrItinClass itinQ32, 3099 string OpcodeStr, string Dt, 3100 SDNode OpNode, bit Commutable = 0> { 3101 // 64-bit vector types. 3102 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3103 OpcodeStr, !strconcat(Dt, "8"), 3104 v8i8, v8i8, OpNode, Commutable>; 3105 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3106 OpcodeStr, !strconcat(Dt, "16"), 3107 v4i16, v4i16, OpNode, Commutable>; 3108 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3109 OpcodeStr, !strconcat(Dt, "32"), 3110 v2i32, v2i32, OpNode, Commutable>; 3111 3112 // 128-bit vector types. 3113 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3114 OpcodeStr, !strconcat(Dt, "8"), 3115 v16i8, v16i8, OpNode, Commutable>; 3116 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3117 OpcodeStr, !strconcat(Dt, "16"), 3118 v8i16, v8i16, OpNode, Commutable>; 3119 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3120 OpcodeStr, !strconcat(Dt, "32"), 3121 v4i32, v4i32, OpNode, Commutable>; 3122 } 3123 3124 multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3125 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3126 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3127 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3128 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3129 v4i32, v2i32, ShOp>; 3130 } 3131 3132 // ....then also with element size 64 bits: 3133 multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3134 InstrItinClass itinD, InstrItinClass itinQ, 3135 string OpcodeStr, string Dt, 3136 SDNode OpNode, bit Commutable = 0> 3137 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3138 OpcodeStr, Dt, OpNode, Commutable> { 3139 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3140 OpcodeStr, !strconcat(Dt, "64"), 3141 v1i64, v1i64, OpNode, Commutable>; 3142 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3143 OpcodeStr, !strconcat(Dt, "64"), 3144 v2i64, v2i64, OpNode, Commutable>; 3145 } 3146 3147 3148 // Neon 3-register vector intrinsics. 3149 3150 // First with only element sizes of 16 and 32 bits: 3151 multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3152 InstrItinClass itinD16, InstrItinClass itinD32, 3153 InstrItinClass itinQ16, InstrItinClass itinQ32, 3154 string OpcodeStr, string Dt, 3155 Intrinsic IntOp, bit Commutable = 0> { 3156 // 64-bit vector types. 3157 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3158 OpcodeStr, !strconcat(Dt, "16"), 3159 v4i16, v4i16, IntOp, Commutable>; 3160 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3161 OpcodeStr, !strconcat(Dt, "32"), 3162 v2i32, v2i32, IntOp, Commutable>; 3163 3164 // 128-bit vector types. 3165 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3166 OpcodeStr, !strconcat(Dt, "16"), 3167 v8i16, v8i16, IntOp, Commutable>; 3168 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3169 OpcodeStr, !strconcat(Dt, "32"), 3170 v4i32, v4i32, IntOp, Commutable>; 3171 } 3172 multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3173 InstrItinClass itinD16, InstrItinClass itinD32, 3174 InstrItinClass itinQ16, InstrItinClass itinQ32, 3175 string OpcodeStr, string Dt, 3176 Intrinsic IntOp> { 3177 // 64-bit vector types. 3178 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3179 OpcodeStr, !strconcat(Dt, "16"), 3180 v4i16, v4i16, IntOp>; 3181 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3182 OpcodeStr, !strconcat(Dt, "32"), 3183 v2i32, v2i32, IntOp>; 3184 3185 // 128-bit vector types. 3186 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3187 OpcodeStr, !strconcat(Dt, "16"), 3188 v8i16, v8i16, IntOp>; 3189 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3190 OpcodeStr, !strconcat(Dt, "32"), 3191 v4i32, v4i32, IntOp>; 3192 } 3193 3194 multiclass N3VIntSL_HS<bits<4> op11_8, 3195 InstrItinClass itinD16, InstrItinClass itinD32, 3196 InstrItinClass itinQ16, InstrItinClass itinQ32, 3197 string OpcodeStr, string Dt, Intrinsic IntOp> { 3198 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3199 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3200 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3201 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3202 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3203 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3204 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3205 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3206 } 3207 3208 // ....then also with element size of 8 bits: 3209 multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3210 InstrItinClass itinD16, InstrItinClass itinD32, 3211 InstrItinClass itinQ16, InstrItinClass itinQ32, 3212 string OpcodeStr, string Dt, 3213 Intrinsic IntOp, bit Commutable = 0> 3214 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3215 OpcodeStr, Dt, IntOp, Commutable> { 3216 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3217 OpcodeStr, !strconcat(Dt, "8"), 3218 v8i8, v8i8, IntOp, Commutable>; 3219 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3220 OpcodeStr, !strconcat(Dt, "8"), 3221 v16i8, v16i8, IntOp, Commutable>; 3222 } 3223 multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3224 InstrItinClass itinD16, InstrItinClass itinD32, 3225 InstrItinClass itinQ16, InstrItinClass itinQ32, 3226 string OpcodeStr, string Dt, 3227 Intrinsic IntOp> 3228 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3229 OpcodeStr, Dt, IntOp> { 3230 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3231 OpcodeStr, !strconcat(Dt, "8"), 3232 v8i8, v8i8, IntOp>; 3233 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3234 OpcodeStr, !strconcat(Dt, "8"), 3235 v16i8, v16i8, IntOp>; 3236 } 3237 3238 3239 // ....then also with element size of 64 bits: 3240 multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3241 InstrItinClass itinD16, InstrItinClass itinD32, 3242 InstrItinClass itinQ16, InstrItinClass itinQ32, 3243 string OpcodeStr, string Dt, 3244 Intrinsic IntOp, bit Commutable = 0> 3245 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3246 OpcodeStr, Dt, IntOp, Commutable> { 3247 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3248 OpcodeStr, !strconcat(Dt, "64"), 3249 v1i64, v1i64, IntOp, Commutable>; 3250 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3251 OpcodeStr, !strconcat(Dt, "64"), 3252 v2i64, v2i64, IntOp, Commutable>; 3253 } 3254 multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3255 InstrItinClass itinD16, InstrItinClass itinD32, 3256 InstrItinClass itinQ16, InstrItinClass itinQ32, 3257 string OpcodeStr, string Dt, 3258 Intrinsic IntOp> 3259 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3260 OpcodeStr, Dt, IntOp> { 3261 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3262 OpcodeStr, !strconcat(Dt, "64"), 3263 v1i64, v1i64, IntOp>; 3264 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3265 OpcodeStr, !strconcat(Dt, "64"), 3266 v2i64, v2i64, IntOp>; 3267 } 3268 3269 // Neon Narrowing 3-register vector intrinsics, 3270 // source operand element sizes of 16, 32 and 64 bits: 3271 multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3272 string OpcodeStr, string Dt, 3273 Intrinsic IntOp, bit Commutable = 0> { 3274 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3275 OpcodeStr, !strconcat(Dt, "16"), 3276 v8i8, v8i16, IntOp, Commutable>; 3277 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3278 OpcodeStr, !strconcat(Dt, "32"), 3279 v4i16, v4i32, IntOp, Commutable>; 3280 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3281 OpcodeStr, !strconcat(Dt, "64"), 3282 v2i32, v2i64, IntOp, Commutable>; 3283 } 3284 3285 3286 // Neon Long 3-register vector operations. 3287 3288 multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3289 InstrItinClass itin16, InstrItinClass itin32, 3290 string OpcodeStr, string Dt, 3291 SDNode OpNode, bit Commutable = 0> { 3292 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3293 OpcodeStr, !strconcat(Dt, "8"), 3294 v8i16, v8i8, OpNode, Commutable>; 3295 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3296 OpcodeStr, !strconcat(Dt, "16"), 3297 v4i32, v4i16, OpNode, Commutable>; 3298 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3299 OpcodeStr, !strconcat(Dt, "32"), 3300 v2i64, v2i32, OpNode, Commutable>; 3301 } 3302 3303 multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3304 InstrItinClass itin, string OpcodeStr, string Dt, 3305 SDNode OpNode> { 3306 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3307 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3308 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3309 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3310 } 3311 3312 multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3313 InstrItinClass itin16, InstrItinClass itin32, 3314 string OpcodeStr, string Dt, 3315 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3316 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3317 OpcodeStr, !strconcat(Dt, "8"), 3318 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3319 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3320 OpcodeStr, !strconcat(Dt, "16"), 3321 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3322 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3323 OpcodeStr, !strconcat(Dt, "32"), 3324 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3325 } 3326 3327 // Neon Long 3-register vector intrinsics. 3328 3329 // First with only element sizes of 16 and 32 bits: 3330 multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3331 InstrItinClass itin16, InstrItinClass itin32, 3332 string OpcodeStr, string Dt, 3333 Intrinsic IntOp, bit Commutable = 0> { 3334 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3335 OpcodeStr, !strconcat(Dt, "16"), 3336 v4i32, v4i16, IntOp, Commutable>; 3337 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3338 OpcodeStr, !strconcat(Dt, "32"), 3339 v2i64, v2i32, IntOp, Commutable>; 3340 } 3341 3342 multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3343 InstrItinClass itin, string OpcodeStr, string Dt, 3344 Intrinsic IntOp> { 3345 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3346 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3347 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3348 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3349 } 3350 3351 // ....then also with element size of 8 bits: 3352 multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3353 InstrItinClass itin16, InstrItinClass itin32, 3354 string OpcodeStr, string Dt, 3355 Intrinsic IntOp, bit Commutable = 0> 3356 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3357 IntOp, Commutable> { 3358 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3359 OpcodeStr, !strconcat(Dt, "8"), 3360 v8i16, v8i8, IntOp, Commutable>; 3361 } 3362 3363 // ....with explicit extend (VABDL). 3364 multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3365 InstrItinClass itin, string OpcodeStr, string Dt, 3366 Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> { 3367 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3368 OpcodeStr, !strconcat(Dt, "8"), 3369 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3370 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3371 OpcodeStr, !strconcat(Dt, "16"), 3372 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3373 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3374 OpcodeStr, !strconcat(Dt, "32"), 3375 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3376 } 3377 3378 3379 // Neon Wide 3-register vector intrinsics, 3380 // source operand element sizes of 8, 16 and 32 bits: 3381 multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3382 string OpcodeStr, string Dt, 3383 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3384 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3385 OpcodeStr, !strconcat(Dt, "8"), 3386 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3387 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3388 OpcodeStr, !strconcat(Dt, "16"), 3389 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3390 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3391 OpcodeStr, !strconcat(Dt, "32"), 3392 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3393 } 3394 3395 3396 // Neon Multiply-Op vector operations, 3397 // element sizes of 8, 16 and 32 bits: 3398 multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3399 InstrItinClass itinD16, InstrItinClass itinD32, 3400 InstrItinClass itinQ16, InstrItinClass itinQ32, 3401 string OpcodeStr, string Dt, SDNode OpNode> { 3402 // 64-bit vector types. 3403 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3404 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3405 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3406 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3407 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3408 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3409 3410 // 128-bit vector types. 3411 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3412 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3413 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3414 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3415 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3416 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3417 } 3418 3419 multiclass N3VMulOpSL_HS<bits<4> op11_8, 3420 InstrItinClass itinD16, InstrItinClass itinD32, 3421 InstrItinClass itinQ16, InstrItinClass itinQ32, 3422 string OpcodeStr, string Dt, SDNode ShOp> { 3423 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3424 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3425 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3426 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3427 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3428 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3429 mul, ShOp>; 3430 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3431 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3432 mul, ShOp>; 3433 } 3434 3435 // Neon Intrinsic-Op vector operations, 3436 // element sizes of 8, 16 and 32 bits: 3437 multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3438 InstrItinClass itinD, InstrItinClass itinQ, 3439 string OpcodeStr, string Dt, Intrinsic IntOp, 3440 SDNode OpNode> { 3441 // 64-bit vector types. 3442 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3443 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3444 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3445 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3446 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3447 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3448 3449 // 128-bit vector types. 3450 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3451 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3452 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3453 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3454 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3455 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3456 } 3457 3458 // Neon 3-argument intrinsics, 3459 // element sizes of 8, 16 and 32 bits: 3460 multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3461 InstrItinClass itinD, InstrItinClass itinQ, 3462 string OpcodeStr, string Dt, Intrinsic IntOp> { 3463 // 64-bit vector types. 3464 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, 3465 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3466 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, 3467 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3468 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, 3469 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3470 3471 // 128-bit vector types. 3472 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, 3473 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3474 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, 3475 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3476 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, 3477 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3478 } 3479 3480 3481 // Neon Long Multiply-Op vector operations, 3482 // element sizes of 8, 16 and 32 bits: 3483 multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3484 InstrItinClass itin16, InstrItinClass itin32, 3485 string OpcodeStr, string Dt, SDNode MulOp, 3486 SDNode OpNode> { 3487 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3488 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3489 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3490 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3491 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3492 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3493 } 3494 3495 multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3496 string Dt, SDNode MulOp, SDNode OpNode> { 3497 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3498 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3499 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3500 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3501 } 3502 3503 3504 // Neon Long 3-argument intrinsics. 3505 3506 // First with only element sizes of 16 and 32 bits: 3507 multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3508 InstrItinClass itin16, InstrItinClass itin32, 3509 string OpcodeStr, string Dt, Intrinsic IntOp> { 3510 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3511 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3512 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3513 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3514 } 3515 3516 multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3517 string OpcodeStr, string Dt, Intrinsic IntOp> { 3518 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3519 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3520 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3521 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3522 } 3523 3524 // ....then also with element size of 8 bits: 3525 multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3526 InstrItinClass itin16, InstrItinClass itin32, 3527 string OpcodeStr, string Dt, Intrinsic IntOp> 3528 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3529 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3530 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3531 } 3532 3533 // ....with explicit extend (VABAL). 3534 multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3535 InstrItinClass itin, string OpcodeStr, string Dt, 3536 Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> { 3537 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3538 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3539 IntOp, ExtOp, OpNode>; 3540 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3541 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3542 IntOp, ExtOp, OpNode>; 3543 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3544 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3545 IntOp, ExtOp, OpNode>; 3546 } 3547 3548 3549 // Neon Pairwise long 2-register intrinsics, 3550 // element sizes of 8, 16 and 32 bits: 3551 multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3552 bits<5> op11_7, bit op4, 3553 string OpcodeStr, string Dt, Intrinsic IntOp> { 3554 // 64-bit vector types. 3555 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3556 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3557 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3558 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3559 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3560 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3561 3562 // 128-bit vector types. 3563 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3564 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3565 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3566 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3567 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3568 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3569 } 3570 3571 3572 // Neon Pairwise long 2-register accumulate intrinsics, 3573 // element sizes of 8, 16 and 32 bits: 3574 multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3575 bits<5> op11_7, bit op4, 3576 string OpcodeStr, string Dt, Intrinsic IntOp> { 3577 // 64-bit vector types. 3578 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3579 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3580 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3581 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3582 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3583 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3584 3585 // 128-bit vector types. 3586 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3587 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3588 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3589 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3590 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3591 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3592 } 3593 3594 3595 // Neon 2-register vector shift by immediate, 3596 // with f of either N2RegVShLFrm or N2RegVShRFrm 3597 // element sizes of 8, 16, 32 and 64 bits: 3598 multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3599 InstrItinClass itin, string OpcodeStr, string Dt, 3600 SDNode OpNode> { 3601 // 64-bit vector types. 3602 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3603 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3604 let Inst{21-19} = 0b001; // imm6 = 001xxx 3605 } 3606 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3607 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3608 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3609 } 3610 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3611 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3612 let Inst{21} = 0b1; // imm6 = 1xxxxx 3613 } 3614 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3615 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3616 // imm6 = xxxxxx 3617 3618 // 128-bit vector types. 3619 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3620 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3621 let Inst{21-19} = 0b001; // imm6 = 001xxx 3622 } 3623 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3624 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3625 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3626 } 3627 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3628 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3629 let Inst{21} = 0b1; // imm6 = 1xxxxx 3630 } 3631 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3632 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3633 // imm6 = xxxxxx 3634 } 3635 multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3636 InstrItinClass itin, string OpcodeStr, string Dt, 3637 string baseOpc, SDNode OpNode> { 3638 // 64-bit vector types. 3639 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3640 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3641 let Inst{21-19} = 0b001; // imm6 = 001xxx 3642 } 3643 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3644 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3645 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3646 } 3647 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3648 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3649 let Inst{21} = 0b1; // imm6 = 1xxxxx 3650 } 3651 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3652 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3653 // imm6 = xxxxxx 3654 3655 // 128-bit vector types. 3656 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3657 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3658 let Inst{21-19} = 0b001; // imm6 = 001xxx 3659 } 3660 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3661 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3662 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3663 } 3664 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3665 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3666 let Inst{21} = 0b1; // imm6 = 1xxxxx 3667 } 3668 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3669 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3670 // imm6 = xxxxxx 3671 3672 // Aliases for two-operand forms (source and dest regs the same). 3673 def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"), 3674 (!cast<Instruction>(!strconcat(baseOpc, "v8i8")) 3675 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; 3676 def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"), 3677 (!cast<Instruction>(!strconcat(baseOpc, "v4i16")) 3678 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; 3679 def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"), 3680 (!cast<Instruction>(!strconcat(baseOpc, "v2i32")) 3681 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; 3682 def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"), 3683 (!cast<Instruction>(!strconcat(baseOpc, "v1i64")) 3684 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; 3685 3686 def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"), 3687 (!cast<Instruction>(!strconcat(baseOpc, "v16i8")) 3688 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; 3689 def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"), 3690 (!cast<Instruction>(!strconcat(baseOpc, "v8i16")) 3691 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; 3692 def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"), 3693 (!cast<Instruction>(!strconcat(baseOpc, "v4i32")) 3694 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; 3695 def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"), 3696 (!cast<Instruction>(!strconcat(baseOpc, "v2i64")) 3697 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; 3698 } 3699 3700 // Neon Shift-Accumulate vector operations, 3701 // element sizes of 8, 16, 32 and 64 bits: 3702 multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3703 string OpcodeStr, string Dt, SDNode ShOp> { 3704 // 64-bit vector types. 3705 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3706 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 3707 let Inst{21-19} = 0b001; // imm6 = 001xxx 3708 } 3709 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3710 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 3711 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3712 } 3713 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3714 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 3715 let Inst{21} = 0b1; // imm6 = 1xxxxx 3716 } 3717 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3718 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 3719 // imm6 = xxxxxx 3720 3721 // 128-bit vector types. 3722 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3723 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 3724 let Inst{21-19} = 0b001; // imm6 = 001xxx 3725 } 3726 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3727 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 3728 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3729 } 3730 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3731 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 3732 let Inst{21} = 0b1; // imm6 = 1xxxxx 3733 } 3734 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3735 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 3736 // imm6 = xxxxxx 3737 } 3738 3739 // Neon Shift-Insert vector operations, 3740 // with f of either N2RegVShLFrm or N2RegVShRFrm 3741 // element sizes of 8, 16, 32 and 64 bits: 3742 multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3743 string OpcodeStr> { 3744 // 64-bit vector types. 3745 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3746 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> { 3747 let Inst{21-19} = 0b001; // imm6 = 001xxx 3748 } 3749 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3750 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> { 3751 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3752 } 3753 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3754 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> { 3755 let Inst{21} = 0b1; // imm6 = 1xxxxx 3756 } 3757 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 3758 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>; 3759 // imm6 = xxxxxx 3760 3761 // 128-bit vector types. 3762 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3763 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> { 3764 let Inst{21-19} = 0b001; // imm6 = 001xxx 3765 } 3766 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3767 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> { 3768 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3769 } 3770 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3771 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> { 3772 let Inst{21} = 0b1; // imm6 = 1xxxxx 3773 } 3774 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 3775 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>; 3776 // imm6 = xxxxxx 3777 } 3778 multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3779 string OpcodeStr> { 3780 // 64-bit vector types. 3781 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 3782 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> { 3783 let Inst{21-19} = 0b001; // imm6 = 001xxx 3784 } 3785 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 3786 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> { 3787 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3788 } 3789 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 3790 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> { 3791 let Inst{21} = 0b1; // imm6 = 1xxxxx 3792 } 3793 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 3794 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>; 3795 // imm6 = xxxxxx 3796 3797 // 128-bit vector types. 3798 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 3799 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> { 3800 let Inst{21-19} = 0b001; // imm6 = 001xxx 3801 } 3802 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 3803 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> { 3804 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3805 } 3806 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 3807 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> { 3808 let Inst{21} = 0b1; // imm6 = 1xxxxx 3809 } 3810 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 3811 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>; 3812 // imm6 = xxxxxx 3813 } 3814 3815 // Neon Shift Long operations, 3816 // element sizes of 8, 16, 32 bits: 3817 multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3818 bit op4, string OpcodeStr, string Dt, SDNode OpNode> { 3819 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3820 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 3821 let Inst{21-19} = 0b001; // imm6 = 001xxx 3822 } 3823 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3824 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 3825 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3826 } 3827 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3828 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 3829 let Inst{21} = 0b1; // imm6 = 1xxxxx 3830 } 3831 } 3832 3833 // Neon Shift Narrow operations, 3834 // element sizes of 16, 32, 64 bits: 3835 multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3836 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 3837 SDNode OpNode> { 3838 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3839 OpcodeStr, !strconcat(Dt, "16"), 3840 v8i8, v8i16, shr_imm8, OpNode> { 3841 let Inst{21-19} = 0b001; // imm6 = 001xxx 3842 } 3843 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3844 OpcodeStr, !strconcat(Dt, "32"), 3845 v4i16, v4i32, shr_imm16, OpNode> { 3846 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3847 } 3848 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3849 OpcodeStr, !strconcat(Dt, "64"), 3850 v2i32, v2i64, shr_imm32, OpNode> { 3851 let Inst{21} = 0b1; // imm6 = 1xxxxx 3852 } 3853 } 3854 3855 //===----------------------------------------------------------------------===// 3856 // Instruction Definitions. 3857 //===----------------------------------------------------------------------===// 3858 3859 // Vector Add Operations. 3860 3861 // VADD : Vector Add (integer and floating-point) 3862 defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 3863 add, 1>; 3864 def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 3865 v2f32, v2f32, fadd, 1>; 3866 def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 3867 v4f32, v4f32, fadd, 1>; 3868 // VADDL : Vector Add Long (Q = D + D) 3869 defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3870 "vaddl", "s", add, sext, 1>; 3871 defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3872 "vaddl", "u", add, zext, 1>; 3873 // VADDW : Vector Add Wide (Q = Q + D) 3874 defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 3875 defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 3876 // VHADD : Vector Halving Add 3877 defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 3878 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3879 "vhadd", "s", int_arm_neon_vhadds, 1>; 3880 defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 3881 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3882 "vhadd", "u", int_arm_neon_vhaddu, 1>; 3883 // VRHADD : Vector Rounding Halving Add 3884 defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 3885 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3886 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 3887 defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 3888 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3889 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 3890 // VQADD : Vector Saturating Add 3891 defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 3892 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3893 "vqadd", "s", int_arm_neon_vqadds, 1>; 3894 defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 3895 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3896 "vqadd", "u", int_arm_neon_vqaddu, 1>; 3897 // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 3898 defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", 3899 int_arm_neon_vaddhn, 1>; 3900 // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 3901 defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 3902 int_arm_neon_vraddhn, 1>; 3903 3904 // Vector Multiply Operations. 3905 3906 // VMUL : Vector Multiply (integer, polynomial and floating-point) 3907 defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 3908 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 3909 def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 3910 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 3911 def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 3912 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 3913 def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 3914 v2f32, v2f32, fmul, 1>; 3915 def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 3916 v4f32, v4f32, fmul, 1>; 3917 defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 3918 def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 3919 def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 3920 v2f32, fmul>; 3921 3922 def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 3923 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 3924 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 3925 (v4i16 (EXTRACT_SUBREG QPR:$src2, 3926 (DSubReg_i16_reg imm:$lane))), 3927 (SubReg_i16_lane imm:$lane)))>; 3928 def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 3929 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 3930 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 3931 (v2i32 (EXTRACT_SUBREG QPR:$src2, 3932 (DSubReg_i32_reg imm:$lane))), 3933 (SubReg_i32_lane imm:$lane)))>; 3934 def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 3935 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 3936 (v4f32 (VMULslfq (v4f32 QPR:$src1), 3937 (v2f32 (EXTRACT_SUBREG QPR:$src2, 3938 (DSubReg_i32_reg imm:$lane))), 3939 (SubReg_i32_lane imm:$lane)))>; 3940 3941 // VQDMULH : Vector Saturating Doubling Multiply Returning High Half 3942 defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 3943 IIC_VMULi16Q, IIC_VMULi32Q, 3944 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 3945 defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 3946 IIC_VMULi16Q, IIC_VMULi32Q, 3947 "vqdmulh", "s", int_arm_neon_vqdmulh>; 3948 def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 3949 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 3950 imm:$lane)))), 3951 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 3952 (v4i16 (EXTRACT_SUBREG QPR:$src2, 3953 (DSubReg_i16_reg imm:$lane))), 3954 (SubReg_i16_lane imm:$lane)))>; 3955 def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 3956 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 3957 imm:$lane)))), 3958 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 3959 (v2i32 (EXTRACT_SUBREG QPR:$src2, 3960 (DSubReg_i32_reg imm:$lane))), 3961 (SubReg_i32_lane imm:$lane)))>; 3962 3963 // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 3964 defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 3965 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 3966 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 3967 defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 3968 IIC_VMULi16Q, IIC_VMULi32Q, 3969 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 3970 def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 3971 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 3972 imm:$lane)))), 3973 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 3974 (v4i16 (EXTRACT_SUBREG QPR:$src2, 3975 (DSubReg_i16_reg imm:$lane))), 3976 (SubReg_i16_lane imm:$lane)))>; 3977 def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 3978 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 3979 imm:$lane)))), 3980 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 3981 (v2i32 (EXTRACT_SUBREG QPR:$src2, 3982 (DSubReg_i32_reg imm:$lane))), 3983 (SubReg_i32_lane imm:$lane)))>; 3984 3985 // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 3986 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 3987 "vmull", "s", NEONvmulls, 1>; 3988 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 3989 "vmull", "u", NEONvmullu, 1>; 3990 def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 3991 v8i16, v8i8, int_arm_neon_vmullp, 1>; 3992 defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; 3993 defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; 3994 3995 // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 3996 defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 3997 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 3998 defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 3999 "vqdmull", "s", int_arm_neon_vqdmull>; 4000 4001 // Vector Multiply-Accumulate and Multiply-Subtract Operations. 4002 4003 // VMLA : Vector Multiply Accumulate (integer and floating-point) 4004 defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4005 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4006 def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4007 v2f32, fmul_su, fadd_mlx>, 4008 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4009 def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4010 v4f32, fmul_su, fadd_mlx>, 4011 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4012 defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4013 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4014 def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4015 v2f32, fmul_su, fadd_mlx>, 4016 Requires<[HasNEON, UseFPVMLx]>; 4017 def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4018 v4f32, v2f32, fmul_su, fadd_mlx>, 4019 Requires<[HasNEON, UseFPVMLx]>; 4020 4021 def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4022 (mul (v8i16 QPR:$src2), 4023 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4024 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4025 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4026 (DSubReg_i16_reg imm:$lane))), 4027 (SubReg_i16_lane imm:$lane)))>; 4028 4029 def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4030 (mul (v4i32 QPR:$src2), 4031 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4032 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4033 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4034 (DSubReg_i32_reg imm:$lane))), 4035 (SubReg_i32_lane imm:$lane)))>; 4036 4037 def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4038 (fmul_su (v4f32 QPR:$src2), 4039 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4040 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4041 (v4f32 QPR:$src2), 4042 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4043 (DSubReg_i32_reg imm:$lane))), 4044 (SubReg_i32_lane imm:$lane)))>, 4045 Requires<[HasNEON, UseFPVMLx]>; 4046 4047 // VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4048 defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4049 "vmlal", "s", NEONvmulls, add>; 4050 defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4051 "vmlal", "u", NEONvmullu, add>; 4052 4053 defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; 4054 defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; 4055 4056 // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4057 defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4058 "vqdmlal", "s", int_arm_neon_vqdmlal>; 4059 defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; 4060 4061 // VMLS : Vector Multiply Subtract (integer and floating-point) 4062 defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4063 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4064 def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4065 v2f32, fmul_su, fsub_mlx>, 4066 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4067 def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4068 v4f32, fmul_su, fsub_mlx>, 4069 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4070 defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4071 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4072 def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4073 v2f32, fmul_su, fsub_mlx>, 4074 Requires<[HasNEON, UseFPVMLx]>; 4075 def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4076 v4f32, v2f32, fmul_su, fsub_mlx>, 4077 Requires<[HasNEON, UseFPVMLx]>; 4078 4079 def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4080 (mul (v8i16 QPR:$src2), 4081 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4082 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4083 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4084 (DSubReg_i16_reg imm:$lane))), 4085 (SubReg_i16_lane imm:$lane)))>; 4086 4087 def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4088 (mul (v4i32 QPR:$src2), 4089 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4090 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4091 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4092 (DSubReg_i32_reg imm:$lane))), 4093 (SubReg_i32_lane imm:$lane)))>; 4094 4095 def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4096 (fmul_su (v4f32 QPR:$src2), 4097 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4098 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4099 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4100 (DSubReg_i32_reg imm:$lane))), 4101 (SubReg_i32_lane imm:$lane)))>, 4102 Requires<[HasNEON, UseFPVMLx]>; 4103 4104 // VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4105 defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4106 "vmlsl", "s", NEONvmulls, sub>; 4107 defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4108 "vmlsl", "u", NEONvmullu, sub>; 4109 4110 defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; 4111 defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; 4112 4113 // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4114 defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4115 "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 4116 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 4117 4118 // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4119 def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4120 v2f32, fmul_su, fadd_mlx>, 4121 Requires<[HasVFP4,UseFusedMAC]>; 4122 4123 def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4124 v4f32, fmul_su, fadd_mlx>, 4125 Requires<[HasVFP4,UseFusedMAC]>; 4126 4127 // Fused Vector Multiply Subtract (floating-point) 4128 def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4129 v2f32, fmul_su, fsub_mlx>, 4130 Requires<[HasVFP4,UseFusedMAC]>; 4131 def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4132 v4f32, fmul_su, fsub_mlx>, 4133 Requires<[HasVFP4,UseFusedMAC]>; 4134 4135 // Match @llvm.fma.* intrinsics 4136 def : Pat<(v2f32 (fma DPR:$src1, DPR:$Vn, DPR:$Vm)), 4137 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4138 Requires<[HasVFP4]>; 4139 def : Pat<(v4f32 (fma QPR:$src1, QPR:$Vn, QPR:$Vm)), 4140 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4141 Requires<[HasVFP4]>; 4142 def : Pat<(v2f32 (fma (fneg DPR:$src1), DPR:$Vn, DPR:$Vm)), 4143 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4144 Requires<[HasVFP4]>; 4145 def : Pat<(v4f32 (fma (fneg QPR:$src1), QPR:$Vn, QPR:$Vm)), 4146 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4147 Requires<[HasVFP4]>; 4148 4149 // Vector Subtract Operations. 4150 4151 // VSUB : Vector Subtract (integer and floating-point) 4152 defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 4153 "vsub", "i", sub, 0>; 4154 def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 4155 v2f32, v2f32, fsub, 0>; 4156 def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 4157 v4f32, v4f32, fsub, 0>; 4158 // VSUBL : Vector Subtract Long (Q = D - D) 4159 defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4160 "vsubl", "s", sub, sext, 0>; 4161 defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4162 "vsubl", "u", sub, zext, 0>; 4163 // VSUBW : Vector Subtract Wide (Q = Q - D) 4164 defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 4165 defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 4166 // VHSUB : Vector Halving Subtract 4167 defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 4168 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4169 "vhsub", "s", int_arm_neon_vhsubs, 0>; 4170 defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 4171 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4172 "vhsub", "u", int_arm_neon_vhsubu, 0>; 4173 // VQSUB : Vector Saturing Subtract 4174 defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 4175 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4176 "vqsub", "s", int_arm_neon_vqsubs, 0>; 4177 defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 4178 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4179 "vqsub", "u", int_arm_neon_vqsubu, 0>; 4180 // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 4181 defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", 4182 int_arm_neon_vsubhn, 0>; 4183 // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 4184 defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 4185 int_arm_neon_vrsubhn, 0>; 4186 4187 // Vector Comparisons. 4188 4189 // VCEQ : Vector Compare Equal 4190 defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4191 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 4192 def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 4193 NEONvceq, 1>; 4194 def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 4195 NEONvceq, 1>; 4196 4197 defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 4198 "$Vd, $Vm, #0", NEONvceqz>; 4199 4200 // VCGE : Vector Compare Greater Than or Equal 4201 defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4202 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 4203 defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4204 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 4205 def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 4206 NEONvcge, 0>; 4207 def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 4208 NEONvcge, 0>; 4209 4210 defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 4211 "$Vd, $Vm, #0", NEONvcgez>; 4212 defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 4213 "$Vd, $Vm, #0", NEONvclez>; 4214 4215 // VCGT : Vector Compare Greater Than 4216 defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4217 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 4218 defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4219 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 4220 def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 4221 NEONvcgt, 0>; 4222 def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 4223 NEONvcgt, 0>; 4224 4225 defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 4226 "$Vd, $Vm, #0", NEONvcgtz>; 4227 defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 4228 "$Vd, $Vm, #0", NEONvcltz>; 4229 4230 // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 4231 def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 4232 "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; 4233 def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 4234 "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; 4235 // VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 4236 def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 4237 "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; 4238 def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 4239 "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; 4240 // VTST : Vector Test Bits 4241 defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 4242 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 4243 4244 // Vector Bitwise Operations. 4245 4246 def vnotd : PatFrag<(ops node:$in), 4247 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 4248 def vnotq : PatFrag<(ops node:$in), 4249 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 4250 4251 4252 // VAND : Vector Bitwise AND 4253 def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 4254 v2i32, v2i32, and, 1>; 4255 def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 4256 v4i32, v4i32, and, 1>; 4257 4258 // VEOR : Vector Bitwise Exclusive OR 4259 def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 4260 v2i32, v2i32, xor, 1>; 4261 def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 4262 v4i32, v4i32, xor, 1>; 4263 4264 // VORR : Vector Bitwise OR 4265 def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 4266 v2i32, v2i32, or, 1>; 4267 def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 4268 v4i32, v4i32, or, 1>; 4269 4270 def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 4271 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4272 IIC_VMOVImm, 4273 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4274 [(set DPR:$Vd, 4275 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4276 let Inst{9} = SIMM{9}; 4277 } 4278 4279 def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 4280 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4281 IIC_VMOVImm, 4282 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4283 [(set DPR:$Vd, 4284 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4285 let Inst{10-9} = SIMM{10-9}; 4286 } 4287 4288 def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 4289 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4290 IIC_VMOVImm, 4291 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4292 [(set QPR:$Vd, 4293 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4294 let Inst{9} = SIMM{9}; 4295 } 4296 4297 def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 4298 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4299 IIC_VMOVImm, 4300 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4301 [(set QPR:$Vd, 4302 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4303 let Inst{10-9} = SIMM{10-9}; 4304 } 4305 4306 4307 // VBIC : Vector Bitwise Bit Clear (AND NOT) 4308 def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4309 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4310 "vbic", "$Vd, $Vn, $Vm", "", 4311 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 4312 (vnotd DPR:$Vm))))]>; 4313 def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4314 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4315 "vbic", "$Vd, $Vn, $Vm", "", 4316 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 4317 (vnotq QPR:$Vm))))]>; 4318 4319 def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 4320 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4321 IIC_VMOVImm, 4322 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4323 [(set DPR:$Vd, 4324 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4325 let Inst{9} = SIMM{9}; 4326 } 4327 4328 def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 4329 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4330 IIC_VMOVImm, 4331 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4332 [(set DPR:$Vd, 4333 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4334 let Inst{10-9} = SIMM{10-9}; 4335 } 4336 4337 def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 4338 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4339 IIC_VMOVImm, 4340 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4341 [(set QPR:$Vd, 4342 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4343 let Inst{9} = SIMM{9}; 4344 } 4345 4346 def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 4347 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4348 IIC_VMOVImm, 4349 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4350 [(set QPR:$Vd, 4351 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4352 let Inst{10-9} = SIMM{10-9}; 4353 } 4354 4355 // VORN : Vector Bitwise OR NOT 4356 def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 4357 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4358 "vorn", "$Vd, $Vn, $Vm", "", 4359 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 4360 (vnotd DPR:$Vm))))]>; 4361 def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 4362 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4363 "vorn", "$Vd, $Vn, $Vm", "", 4364 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 4365 (vnotq QPR:$Vm))))]>; 4366 4367 // VMVN : Vector Bitwise NOT (Immediate) 4368 4369 let isReMaterializable = 1 in { 4370 4371 def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 4372 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4373 "vmvn", "i16", "$Vd, $SIMM", "", 4374 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { 4375 let Inst{9} = SIMM{9}; 4376 } 4377 4378 def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 4379 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4380 "vmvn", "i16", "$Vd, $SIMM", "", 4381 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { 4382 let Inst{9} = SIMM{9}; 4383 } 4384 4385 def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 4386 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4387 "vmvn", "i32", "$Vd, $SIMM", "", 4388 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { 4389 let Inst{11-8} = SIMM{11-8}; 4390 } 4391 4392 def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 4393 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4394 "vmvn", "i32", "$Vd, $SIMM", "", 4395 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { 4396 let Inst{11-8} = SIMM{11-8}; 4397 } 4398 } 4399 4400 // VMVN : Vector Bitwise NOT 4401 def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 4402 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 4403 "vmvn", "$Vd, $Vm", "", 4404 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 4405 def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 4406 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 4407 "vmvn", "$Vd, $Vm", "", 4408 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 4409 def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 4410 def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 4411 4412 // VBSL : Vector Bitwise Select 4413 def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4414 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4415 N3RegFrm, IIC_VCNTiD, 4416 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4417 [(set DPR:$Vd, 4418 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 4419 4420 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 4421 (and DPR:$Vm, (vnotd DPR:$Vd)))), 4422 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 4423 4424 def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4425 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4426 N3RegFrm, IIC_VCNTiQ, 4427 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4428 [(set QPR:$Vd, 4429 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 4430 4431 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 4432 (and QPR:$Vm, (vnotq QPR:$Vd)))), 4433 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 4434 4435 // VBIF : Vector Bitwise Insert if False 4436 // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 4437 // FIXME: This instruction's encoding MAY NOT BE correct. 4438 def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 4439 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4440 N3RegFrm, IIC_VBINiD, 4441 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4442 []>; 4443 def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 4444 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4445 N3RegFrm, IIC_VBINiQ, 4446 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4447 []>; 4448 4449 // VBIT : Vector Bitwise Insert if True 4450 // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 4451 // FIXME: This instruction's encoding MAY NOT BE correct. 4452 def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 4453 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4454 N3RegFrm, IIC_VBINiD, 4455 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4456 []>; 4457 def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 4458 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4459 N3RegFrm, IIC_VBINiQ, 4460 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4461 []>; 4462 4463 // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 4464 // for equivalent operations with different register constraints; it just 4465 // inserts copies. 4466 4467 // Vector Absolute Differences. 4468 4469 // VABD : Vector Absolute Difference 4470 defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 4471 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4472 "vabd", "s", int_arm_neon_vabds, 1>; 4473 defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 4474 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4475 "vabd", "u", int_arm_neon_vabdu, 1>; 4476 def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 4477 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 4478 def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 4479 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 4480 4481 // VABDL : Vector Absolute Difference Long (Q = | D - D |) 4482 defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 4483 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 4484 defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 4485 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 4486 4487 // VABA : Vector Absolute Difference and Accumulate 4488 defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4489 "vaba", "s", int_arm_neon_vabds, add>; 4490 defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4491 "vaba", "u", int_arm_neon_vabdu, add>; 4492 4493 // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 4494 defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 4495 "vabal", "s", int_arm_neon_vabds, zext, add>; 4496 defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 4497 "vabal", "u", int_arm_neon_vabdu, zext, add>; 4498 4499 // Vector Maximum and Minimum. 4500 4501 // VMAX : Vector Maximum 4502 defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 4503 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4504 "vmax", "s", int_arm_neon_vmaxs, 1>; 4505 defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 4506 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4507 "vmax", "u", int_arm_neon_vmaxu, 1>; 4508 def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 4509 "vmax", "f32", 4510 v2f32, v2f32, int_arm_neon_vmaxs, 1>; 4511 def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4512 "vmax", "f32", 4513 v4f32, v4f32, int_arm_neon_vmaxs, 1>; 4514 4515 // VMIN : Vector Minimum 4516 defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 4517 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4518 "vmin", "s", int_arm_neon_vmins, 1>; 4519 defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 4520 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4521 "vmin", "u", int_arm_neon_vminu, 1>; 4522 def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 4523 "vmin", "f32", 4524 v2f32, v2f32, int_arm_neon_vmins, 1>; 4525 def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4526 "vmin", "f32", 4527 v4f32, v4f32, int_arm_neon_vmins, 1>; 4528 4529 // Vector Pairwise Operations. 4530 4531 // VPADD : Vector Pairwise Add 4532 def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4533 "vpadd", "i8", 4534 v8i8, v8i8, int_arm_neon_vpadd, 0>; 4535 def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4536 "vpadd", "i16", 4537 v4i16, v4i16, int_arm_neon_vpadd, 0>; 4538 def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4539 "vpadd", "i32", 4540 v2i32, v2i32, int_arm_neon_vpadd, 0>; 4541 def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 4542 IIC_VPBIND, "vpadd", "f32", 4543 v2f32, v2f32, int_arm_neon_vpadd, 0>; 4544 4545 // VPADDL : Vector Pairwise Add Long 4546 defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 4547 int_arm_neon_vpaddls>; 4548 defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 4549 int_arm_neon_vpaddlu>; 4550 4551 // VPADAL : Vector Pairwise Add and Accumulate Long 4552 defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 4553 int_arm_neon_vpadals>; 4554 defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 4555 int_arm_neon_vpadalu>; 4556 4557 // VPMAX : Vector Pairwise Maximum 4558 def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4559 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 4560 def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4561 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 4562 def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4563 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 4564 def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4565 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 4566 def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4567 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 4568 def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4569 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 4570 def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 4571 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 4572 4573 // VPMIN : Vector Pairwise Minimum 4574 def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4575 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 4576 def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4577 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 4578 def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4579 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 4580 def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4581 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 4582 def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4583 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 4584 def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4585 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 4586 def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 4587 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 4588 4589 // Vector Reciprocal and Reciprocal Square Root Estimate and Step. 4590 4591 // VRECPE : Vector Reciprocal Estimate 4592 def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4593 IIC_VUNAD, "vrecpe", "u32", 4594 v2i32, v2i32, int_arm_neon_vrecpe>; 4595 def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4596 IIC_VUNAQ, "vrecpe", "u32", 4597 v4i32, v4i32, int_arm_neon_vrecpe>; 4598 def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4599 IIC_VUNAD, "vrecpe", "f32", 4600 v2f32, v2f32, int_arm_neon_vrecpe>; 4601 def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4602 IIC_VUNAQ, "vrecpe", "f32", 4603 v4f32, v4f32, int_arm_neon_vrecpe>; 4604 4605 // VRECPS : Vector Reciprocal Step 4606 def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4607 IIC_VRECSD, "vrecps", "f32", 4608 v2f32, v2f32, int_arm_neon_vrecps, 1>; 4609 def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4610 IIC_VRECSQ, "vrecps", "f32", 4611 v4f32, v4f32, int_arm_neon_vrecps, 1>; 4612 4613 // VRSQRTE : Vector Reciprocal Square Root Estimate 4614 def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 4615 IIC_VUNAD, "vrsqrte", "u32", 4616 v2i32, v2i32, int_arm_neon_vrsqrte>; 4617 def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 4618 IIC_VUNAQ, "vrsqrte", "u32", 4619 v4i32, v4i32, int_arm_neon_vrsqrte>; 4620 def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 4621 IIC_VUNAD, "vrsqrte", "f32", 4622 v2f32, v2f32, int_arm_neon_vrsqrte>; 4623 def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 4624 IIC_VUNAQ, "vrsqrte", "f32", 4625 v4f32, v4f32, int_arm_neon_vrsqrte>; 4626 4627 // VRSQRTS : Vector Reciprocal Square Root Step 4628 def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 4629 IIC_VRECSD, "vrsqrts", "f32", 4630 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 4631 def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 4632 IIC_VRECSQ, "vrsqrts", "f32", 4633 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 4634 4635 // Vector Shifts. 4636 4637 // VSHL : Vector Shift 4638 defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 4639 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 4640 "vshl", "s", int_arm_neon_vshifts>; 4641 defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 4642 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 4643 "vshl", "u", int_arm_neon_vshiftu>; 4644 4645 // VSHL : Vector Shift Left (Immediate) 4646 defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; 4647 4648 // VSHR : Vector Shift Right (Immediate) 4649 defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", 4650 NEONvshrs>; 4651 defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", 4652 NEONvshru>; 4653 4654 // VSHLL : Vector Shift Left Long 4655 defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; 4656 defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; 4657 4658 // VSHLL : Vector Shift Left Long (with maximum shift count) 4659 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 4660 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 4661 ValueType OpTy, Operand ImmTy, SDNode OpNode> 4662 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 4663 ResTy, OpTy, ImmTy, OpNode> { 4664 let Inst{21-16} = op21_16; 4665 let DecoderMethod = "DecodeVSHLMaxInstruction"; 4666 } 4667 def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 4668 v8i16, v8i8, imm8, NEONvshlli>; 4669 def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 4670 v4i32, v4i16, imm16, NEONvshlli>; 4671 def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 4672 v2i64, v2i32, imm32, NEONvshlli>; 4673 4674 // VSHRN : Vector Shift Right and Narrow 4675 defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 4676 NEONvshrn>; 4677 4678 // VRSHL : Vector Rounding Shift 4679 defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 4680 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4681 "vrshl", "s", int_arm_neon_vrshifts>; 4682 defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 4683 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4684 "vrshl", "u", int_arm_neon_vrshiftu>; 4685 // VRSHR : Vector Rounding Shift Right 4686 defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", 4687 NEONvrshrs>; 4688 defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", 4689 NEONvrshru>; 4690 4691 // VRSHRN : Vector Rounding Shift Right and Narrow 4692 defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 4693 NEONvrshrn>; 4694 4695 // VQSHL : Vector Saturating Shift 4696 defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 4697 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4698 "vqshl", "s", int_arm_neon_vqshifts>; 4699 defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 4700 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4701 "vqshl", "u", int_arm_neon_vqshiftu>; 4702 // VQSHL : Vector Saturating Shift Left (Immediate) 4703 defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; 4704 defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; 4705 4706 // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 4707 defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; 4708 4709 // VQSHRN : Vector Saturating Shift Right and Narrow 4710 defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 4711 NEONvqshrns>; 4712 defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 4713 NEONvqshrnu>; 4714 4715 // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 4716 defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 4717 NEONvqshrnsu>; 4718 4719 // VQRSHL : Vector Saturating Rounding Shift 4720 defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 4721 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4722 "vqrshl", "s", int_arm_neon_vqrshifts>; 4723 defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 4724 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4725 "vqrshl", "u", int_arm_neon_vqrshiftu>; 4726 4727 // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 4728 defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 4729 NEONvqrshrns>; 4730 defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 4731 NEONvqrshrnu>; 4732 4733 // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 4734 defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 4735 NEONvqrshrnsu>; 4736 4737 // VSRA : Vector Shift Right and Accumulate 4738 defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 4739 defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 4740 // VRSRA : Vector Rounding Shift Right and Accumulate 4741 defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 4742 defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 4743 4744 // VSLI : Vector Shift Left and Insert 4745 defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 4746 4747 // VSRI : Vector Shift Right and Insert 4748 defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 4749 4750 // Vector Absolute and Saturating Absolute. 4751 4752 // VABS : Vector Absolute Value 4753 defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 4754 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", 4755 int_arm_neon_vabs>; 4756 def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 4757 IIC_VUNAD, "vabs", "f32", 4758 v2f32, v2f32, int_arm_neon_vabs>; 4759 def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 4760 IIC_VUNAQ, "vabs", "f32", 4761 v4f32, v4f32, int_arm_neon_vabs>; 4762 4763 // VQABS : Vector Saturating Absolute Value 4764 defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 4765 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 4766 int_arm_neon_vqabs>; 4767 4768 // Vector Negate. 4769 4770 def vnegd : PatFrag<(ops node:$in), 4771 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 4772 def vnegq : PatFrag<(ops node:$in), 4773 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 4774 4775 class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 4776 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 4777 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 4778 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 4779 class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 4780 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 4781 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 4782 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 4783 4784 // VNEG : Vector Negate (integer) 4785 def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 4786 def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 4787 def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 4788 def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 4789 def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 4790 def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 4791 4792 // VNEG : Vector Negate (floating-point) 4793 def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 4794 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 4795 "vneg", "f32", "$Vd, $Vm", "", 4796 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 4797 def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 4798 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 4799 "vneg", "f32", "$Vd, $Vm", "", 4800 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 4801 4802 def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 4803 def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 4804 def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 4805 def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 4806 def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 4807 def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 4808 4809 // VQNEG : Vector Saturating Negate 4810 defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 4811 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 4812 int_arm_neon_vqneg>; 4813 4814 // Vector Bit Counting Operations. 4815 4816 // VCLS : Vector Count Leading Sign Bits 4817 defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 4818 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 4819 int_arm_neon_vcls>; 4820 // VCLZ : Vector Count Leading Zeros 4821 defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 4822 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 4823 int_arm_neon_vclz>; 4824 // VCNT : Vector Count One Bits 4825 def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 4826 IIC_VCNTiD, "vcnt", "8", 4827 v8i8, v8i8, int_arm_neon_vcnt>; 4828 def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 4829 IIC_VCNTiQ, "vcnt", "8", 4830 v16i8, v16i8, int_arm_neon_vcnt>; 4831 4832 // Vector Swap 4833 def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 4834 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 4835 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 4836 []>; 4837 def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 4838 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 4839 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 4840 []>; 4841 4842 // Vector Move Operations. 4843 4844 // VMOV : Vector Move (Register) 4845 def : InstAlias<"vmov${p} $Vd, $Vm", 4846 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 4847 def : InstAlias<"vmov${p} $Vd, $Vm", 4848 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 4849 4850 // VMOV : Vector Move (Immediate) 4851 4852 let isReMaterializable = 1 in { 4853 def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 4854 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 4855 "vmov", "i8", "$Vd, $SIMM", "", 4856 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; 4857 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 4858 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 4859 "vmov", "i8", "$Vd, $SIMM", "", 4860 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; 4861 4862 def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 4863 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4864 "vmov", "i16", "$Vd, $SIMM", "", 4865 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { 4866 let Inst{9} = SIMM{9}; 4867 } 4868 4869 def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 4870 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4871 "vmov", "i16", "$Vd, $SIMM", "", 4872 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { 4873 let Inst{9} = SIMM{9}; 4874 } 4875 4876 def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 4877 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4878 "vmov", "i32", "$Vd, $SIMM", "", 4879 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { 4880 let Inst{11-8} = SIMM{11-8}; 4881 } 4882 4883 def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 4884 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4885 "vmov", "i32", "$Vd, $SIMM", "", 4886 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { 4887 let Inst{11-8} = SIMM{11-8}; 4888 } 4889 4890 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 4891 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 4892 "vmov", "i64", "$Vd, $SIMM", "", 4893 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; 4894 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 4895 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 4896 "vmov", "i64", "$Vd, $SIMM", "", 4897 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; 4898 4899 def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 4900 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 4901 "vmov", "f32", "$Vd, $SIMM", "", 4902 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; 4903 def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 4904 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 4905 "vmov", "f32", "$Vd, $SIMM", "", 4906 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; 4907 } // isReMaterializable 4908 4909 // VMOV : Vector Get Lane (move scalar to ARM core register) 4910 4911 def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 4912 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 4913 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 4914 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), 4915 imm:$lane))]> { 4916 let Inst{21} = lane{2}; 4917 let Inst{6-5} = lane{1-0}; 4918 } 4919 def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 4920 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 4921 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 4922 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), 4923 imm:$lane))]> { 4924 let Inst{21} = lane{1}; 4925 let Inst{6} = lane{0}; 4926 } 4927 def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 4928 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 4929 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 4930 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), 4931 imm:$lane))]> { 4932 let Inst{21} = lane{2}; 4933 let Inst{6-5} = lane{1-0}; 4934 } 4935 def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 4936 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 4937 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 4938 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), 4939 imm:$lane))]> { 4940 let Inst{21} = lane{1}; 4941 let Inst{6} = lane{0}; 4942 } 4943 def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 4944 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 4945 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 4946 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 4947 imm:$lane))]> { 4948 let Inst{21} = lane{0}; 4949 } 4950 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 4951 def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 4952 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 4953 (DSubReg_i8_reg imm:$lane))), 4954 (SubReg_i8_lane imm:$lane))>; 4955 def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 4956 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 4957 (DSubReg_i16_reg imm:$lane))), 4958 (SubReg_i16_lane imm:$lane))>; 4959 def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 4960 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 4961 (DSubReg_i8_reg imm:$lane))), 4962 (SubReg_i8_lane imm:$lane))>; 4963 def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 4964 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 4965 (DSubReg_i16_reg imm:$lane))), 4966 (SubReg_i16_lane imm:$lane))>; 4967 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 4968 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 4969 (DSubReg_i32_reg imm:$lane))), 4970 (SubReg_i32_lane imm:$lane))>; 4971 def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 4972 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 4973 (SSubReg_f32_reg imm:$src2))>; 4974 def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 4975 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 4976 (SSubReg_f32_reg imm:$src2))>; 4977 //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 4978 // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 4979 def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 4980 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 4981 4982 4983 // VMOV : Vector Set Lane (move ARM core register to scalar) 4984 4985 let Constraints = "$src1 = $V" in { 4986 def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 4987 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 4988 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 4989 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 4990 GPR:$R, imm:$lane))]> { 4991 let Inst{21} = lane{2}; 4992 let Inst{6-5} = lane{1-0}; 4993 } 4994 def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 4995 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 4996 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 4997 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 4998 GPR:$R, imm:$lane))]> { 4999 let Inst{21} = lane{1}; 5000 let Inst{6} = lane{0}; 5001 } 5002 def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 5003 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 5004 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 5005 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 5006 GPR:$R, imm:$lane))]> { 5007 let Inst{21} = lane{0}; 5008 } 5009 } 5010 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 5011 (v16i8 (INSERT_SUBREG QPR:$src1, 5012 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 5013 (DSubReg_i8_reg imm:$lane))), 5014 GPR:$src2, (SubReg_i8_lane imm:$lane))), 5015 (DSubReg_i8_reg imm:$lane)))>; 5016 def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 5017 (v8i16 (INSERT_SUBREG QPR:$src1, 5018 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 5019 (DSubReg_i16_reg imm:$lane))), 5020 GPR:$src2, (SubReg_i16_lane imm:$lane))), 5021 (DSubReg_i16_reg imm:$lane)))>; 5022 def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 5023 (v4i32 (INSERT_SUBREG QPR:$src1, 5024 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 5025 (DSubReg_i32_reg imm:$lane))), 5026 GPR:$src2, (SubReg_i32_lane imm:$lane))), 5027 (DSubReg_i32_reg imm:$lane)))>; 5028 5029 def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 5030 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 5031 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5032 def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 5033 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 5034 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5035 5036 //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5037 // (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5038 def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5039 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5040 5041 def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 5042 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5043 def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 5044 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 5045 def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 5046 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5047 5048 def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 5049 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5050 def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 5051 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5052 def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 5053 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5054 5055 def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 5056 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 5057 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5058 dsub_0)>; 5059 def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 5060 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 5061 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5062 dsub_0)>; 5063 def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 5064 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 5065 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5066 dsub_0)>; 5067 5068 // VDUP : Vector Duplicate (from ARM core register to all elements) 5069 5070 class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5071 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 5072 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5073 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5074 class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5075 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 5076 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5077 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5078 5079 def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 5080 def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 5081 def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; 5082 def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 5083 def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 5084 def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 5085 5086 def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>; 5087 def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; 5088 5089 // VDUP : Vector Duplicate Lane (from scalar to all elements) 5090 5091 class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 5092 ValueType Ty, Operand IdxTy> 5093 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 5094 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 5095 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>; 5096 5097 class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 5098 ValueType ResTy, ValueType OpTy, Operand IdxTy> 5099 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 5100 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 5101 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm), 5102 VectorIndex32:$lane)))]>; 5103 5104 // Inst{19-16} is partially specified depending on the element size. 5105 5106 def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 5107 bits<3> lane; 5108 let Inst{19-17} = lane{2-0}; 5109 } 5110 def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 5111 bits<2> lane; 5112 let Inst{19-18} = lane{1-0}; 5113 } 5114 def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 5115 bits<1> lane; 5116 let Inst{19} = lane{0}; 5117 } 5118 def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 5119 bits<3> lane; 5120 let Inst{19-17} = lane{2-0}; 5121 } 5122 def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 5123 bits<2> lane; 5124 let Inst{19-18} = lane{1-0}; 5125 } 5126 def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 5127 bits<1> lane; 5128 let Inst{19} = lane{0}; 5129 } 5130 5131 def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 5132 (VDUPLN32d DPR:$Vm, imm:$lane)>; 5133 5134 def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 5135 (VDUPLN32q DPR:$Vm, imm:$lane)>; 5136 5137 def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 5138 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 5139 (DSubReg_i8_reg imm:$lane))), 5140 (SubReg_i8_lane imm:$lane)))>; 5141 def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 5142 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 5143 (DSubReg_i16_reg imm:$lane))), 5144 (SubReg_i16_lane imm:$lane)))>; 5145 def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 5146 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 5147 (DSubReg_i32_reg imm:$lane))), 5148 (SubReg_i32_lane imm:$lane)))>; 5149 def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 5150 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 5151 (DSubReg_i32_reg imm:$lane))), 5152 (SubReg_i32_lane imm:$lane)))>; 5153 5154 def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 5155 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; 5156 def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 5157 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; 5158 5159 // VMOVN : Vector Narrowing Move 5160 defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 5161 "vmovn", "i", trunc>; 5162 // VQMOVN : Vector Saturating Narrowing Move 5163 defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 5164 "vqmovn", "s", int_arm_neon_vqmovns>; 5165 defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 5166 "vqmovn", "u", int_arm_neon_vqmovnu>; 5167 defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 5168 "vqmovun", "s", int_arm_neon_vqmovnsu>; 5169 // VMOVL : Vector Lengthening Move 5170 defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 5171 defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 5172 def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 5173 def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 5174 def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 5175 5176 // Vector Conversions. 5177 5178 // VCVT : Vector Convert Between Floating-Point and Integers 5179 def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 5180 v2i32, v2f32, fp_to_sint>; 5181 def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 5182 v2i32, v2f32, fp_to_uint>; 5183 def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 5184 v2f32, v2i32, sint_to_fp>; 5185 def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 5186 v2f32, v2i32, uint_to_fp>; 5187 5188 def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 5189 v4i32, v4f32, fp_to_sint>; 5190 def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 5191 v4i32, v4f32, fp_to_uint>; 5192 def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 5193 v4f32, v4i32, sint_to_fp>; 5194 def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 5195 v4f32, v4i32, uint_to_fp>; 5196 5197 // VCVT : Vector Convert Between Floating-Point and Fixed-Point. 5198 let DecoderMethod = "DecodeVCVTD" in { 5199 def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 5200 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 5201 def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 5202 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 5203 def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 5204 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 5205 def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 5206 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 5207 } 5208 5209 let DecoderMethod = "DecodeVCVTQ" in { 5210 def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 5211 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 5212 def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 5213 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 5214 def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 5215 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 5216 def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 5217 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 5218 } 5219 5220 // VCVT : Vector Convert Between Half-Precision and Single-Precision. 5221 def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 5222 IIC_VUNAQ, "vcvt", "f16.f32", 5223 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 5224 Requires<[HasNEON, HasFP16]>; 5225 def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 5226 IIC_VUNAQ, "vcvt", "f32.f16", 5227 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 5228 Requires<[HasNEON, HasFP16]>; 5229 5230 // Vector Reverse. 5231 5232 // VREV64 : Vector Reverse elements within 64-bit doublewords 5233 5234 class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5235 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 5236 (ins DPR:$Vm), IIC_VMOVD, 5237 OpcodeStr, Dt, "$Vd, $Vm", "", 5238 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>; 5239 class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5240 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 5241 (ins QPR:$Vm), IIC_VMOVQ, 5242 OpcodeStr, Dt, "$Vd, $Vm", "", 5243 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>; 5244 5245 def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 5246 def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 5247 def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 5248 def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 5249 5250 def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 5251 def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 5252 def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 5253 def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; 5254 5255 // VREV32 : Vector Reverse elements within 32-bit words 5256 5257 class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5258 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 5259 (ins DPR:$Vm), IIC_VMOVD, 5260 OpcodeStr, Dt, "$Vd, $Vm", "", 5261 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>; 5262 class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5263 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 5264 (ins QPR:$Vm), IIC_VMOVQ, 5265 OpcodeStr, Dt, "$Vd, $Vm", "", 5266 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>; 5267 5268 def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 5269 def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 5270 5271 def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 5272 def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 5273 5274 // VREV16 : Vector Reverse elements within 16-bit halfwords 5275 5276 class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5277 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 5278 (ins DPR:$Vm), IIC_VMOVD, 5279 OpcodeStr, Dt, "$Vd, $Vm", "", 5280 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>; 5281 class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5282 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 5283 (ins QPR:$Vm), IIC_VMOVQ, 5284 OpcodeStr, Dt, "$Vd, $Vm", "", 5285 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>; 5286 5287 def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 5288 def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 5289 5290 // Other Vector Shuffles. 5291 5292 // Aligned extractions: really just dropping registers 5293 5294 class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 5295 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 5296 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>; 5297 5298 def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 5299 5300 def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 5301 5302 def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 5303 5304 def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 5305 5306 def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 5307 5308 5309 // VEXT : Vector Extract 5310 5311 class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 5312 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 5313 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 5314 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 5315 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 5316 (Ty DPR:$Vm), imm:$index)))]> { 5317 bits<4> index; 5318 let Inst{11-8} = index{3-0}; 5319 } 5320 5321 class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 5322 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 5323 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 5324 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 5325 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 5326 (Ty QPR:$Vm), imm:$index)))]> { 5327 bits<4> index; 5328 let Inst{11-8} = index{3-0}; 5329 } 5330 5331 def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 5332 let Inst{11-8} = index{3-0}; 5333 } 5334 def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 5335 let Inst{11-9} = index{2-0}; 5336 let Inst{8} = 0b0; 5337 } 5338 def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 5339 let Inst{11-10} = index{1-0}; 5340 let Inst{9-8} = 0b00; 5341 } 5342 def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), 5343 (v2f32 DPR:$Vm), 5344 (i32 imm:$index))), 5345 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 5346 5347 def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 5348 let Inst{11-8} = index{3-0}; 5349 } 5350 def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 5351 let Inst{11-9} = index{2-0}; 5352 let Inst{8} = 0b0; 5353 } 5354 def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 5355 let Inst{11-10} = index{1-0}; 5356 let Inst{9-8} = 0b00; 5357 } 5358 def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 5359 let Inst{11} = index{0}; 5360 let Inst{10-8} = 0b000; 5361 } 5362 def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), 5363 (v4f32 QPR:$Vm), 5364 (i32 imm:$index))), 5365 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 5366 5367 // VTRN : Vector Transpose 5368 5369 def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 5370 def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 5371 def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 5372 5373 def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 5374 def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 5375 def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 5376 5377 // VUZP : Vector Unzip (Deinterleave) 5378 5379 def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 5380 def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 5381 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 5382 def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 5383 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 5384 5385 def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 5386 def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 5387 def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 5388 5389 // VZIP : Vector Zip (Interleave) 5390 5391 def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 5392 def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 5393 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 5394 def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 5395 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 5396 5397 def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 5398 def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 5399 def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 5400 5401 // Vector Table Lookup and Table Extension. 5402 5403 // VTBL : Vector Table Lookup 5404 let DecoderMethod = "DecodeTBLInstruction" in { 5405 def VTBL1 5406 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 5407 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 5408 "vtbl", "8", "$Vd, $Vn, $Vm", "", 5409 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 5410 let hasExtraSrcRegAllocReq = 1 in { 5411 def VTBL2 5412 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 5413 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 5414 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5415 def VTBL3 5416 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 5417 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 5418 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5419 def VTBL4 5420 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 5421 (ins VecListFourD:$Vn, DPR:$Vm), 5422 NVTBLFrm, IIC_VTB4, 5423 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5424 } // hasExtraSrcRegAllocReq = 1 5425 5426 def VTBL3Pseudo 5427 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 5428 def VTBL4Pseudo 5429 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 5430 5431 // VTBX : Vector Table Extension 5432 def VTBX1 5433 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 5434 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 5435 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 5436 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 5437 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 5438 let hasExtraSrcRegAllocReq = 1 in { 5439 def VTBX2 5440 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 5441 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 5442 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 5443 def VTBX3 5444 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 5445 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 5446 NVTBLFrm, IIC_VTBX3, 5447 "vtbx", "8", "$Vd, $Vn, $Vm", 5448 "$orig = $Vd", []>; 5449 def VTBX4 5450 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 5451 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 5452 "vtbx", "8", "$Vd, $Vn, $Vm", 5453 "$orig = $Vd", []>; 5454 } // hasExtraSrcRegAllocReq = 1 5455 5456 def VTBX3Pseudo 5457 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 5458 IIC_VTBX3, "$orig = $dst", []>; 5459 def VTBX4Pseudo 5460 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 5461 IIC_VTBX4, "$orig = $dst", []>; 5462 } // DecoderMethod = "DecodeTBLInstruction" 5463 5464 //===----------------------------------------------------------------------===// 5465 // NEON instructions for single-precision FP math 5466 //===----------------------------------------------------------------------===// 5467 5468 class N2VSPat<SDNode OpNode, NeonI Inst> 5469 : NEONFPPat<(f32 (OpNode SPR:$a)), 5470 (EXTRACT_SUBREG 5471 (v2f32 (COPY_TO_REGCLASS (Inst 5472 (INSERT_SUBREG 5473 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5474 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 5475 5476 class N3VSPat<SDNode OpNode, NeonI Inst> 5477 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 5478 (EXTRACT_SUBREG 5479 (v2f32 (COPY_TO_REGCLASS (Inst 5480 (INSERT_SUBREG 5481 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5482 SPR:$a, ssub_0), 5483 (INSERT_SUBREG 5484 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5485 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 5486 5487 class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 5488 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 5489 (EXTRACT_SUBREG 5490 (v2f32 (COPY_TO_REGCLASS (Inst 5491 (INSERT_SUBREG 5492 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5493 SPR:$acc, ssub_0), 5494 (INSERT_SUBREG 5495 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5496 SPR:$a, ssub_0), 5497 (INSERT_SUBREG 5498 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5499 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 5500 5501 def : N3VSPat<fadd, VADDfd>; 5502 def : N3VSPat<fsub, VSUBfd>; 5503 def : N3VSPat<fmul, VMULfd>; 5504 def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 5505 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 5506 def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 5507 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 5508 def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 5509 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 5510 def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 5511 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 5512 def : N2VSPat<fabs, VABSfd>; 5513 def : N2VSPat<fneg, VNEGfd>; 5514 def : N3VSPat<NEONfmax, VMAXfd>; 5515 def : N3VSPat<NEONfmin, VMINfd>; 5516 def : N2VSPat<arm_ftosi, VCVTf2sd>; 5517 def : N2VSPat<arm_ftoui, VCVTf2ud>; 5518 def : N2VSPat<arm_sitof, VCVTs2fd>; 5519 def : N2VSPat<arm_uitof, VCVTu2fd>; 5520 5521 //===----------------------------------------------------------------------===// 5522 // Non-Instruction Patterns 5523 //===----------------------------------------------------------------------===// 5524 5525 // bit_convert 5526 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 5527 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 5528 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 5529 def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 5530 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 5531 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 5532 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 5533 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 5534 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 5535 def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 5536 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 5537 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 5538 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 5539 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 5540 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 5541 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 5542 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 5543 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 5544 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 5545 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 5546 def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 5547 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 5548 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 5549 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 5550 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 5551 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 5552 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 5553 def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 5554 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 5555 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 5556 5557 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 5558 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 5559 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 5560 def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 5561 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 5562 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 5563 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 5564 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 5565 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 5566 def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 5567 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 5568 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 5569 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 5570 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 5571 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 5572 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 5573 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 5574 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 5575 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 5576 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 5577 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 5578 def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 5579 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 5580 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 5581 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 5582 def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 5583 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 5584 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 5585 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 5586 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 5587 5588 // Vector lengthening move with load, matching extending loads. 5589 5590 // extload, zextload and sextload for a standard lengthening load. Example: 5591 // Lengthen_Single<"8", "i16", "i8"> = Pat<(v8i16 (extloadvi8 addrmode5:$addr)) 5592 // (VMOVLuv8i16 (VLDRD addrmode5:$addr))>; 5593 multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 5594 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5595 (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), 5596 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 5597 (VLDRD addrmode5:$addr))>; 5598 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5599 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), 5600 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 5601 (VLDRD addrmode5:$addr))>; 5602 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5603 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), 5604 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 5605 (VLDRD addrmode5:$addr))>; 5606 } 5607 5608 // extload, zextload and sextload for a lengthening load which only uses 5609 // half the lanes available. Example: 5610 // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 5611 // Pat<(v4i16 (extloadvi8 addrmode5:$addr)) 5612 // (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5613 // (VLDRS addrmode5:$addr), 5614 // ssub_0)), 5615 // dsub_0)>; 5616 multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 5617 string InsnLanes, string InsnTy> { 5618 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5619 (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), 5620 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 5621 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), 5622 dsub_0)>; 5623 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5624 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), 5625 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 5626 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), 5627 dsub_0)>; 5628 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5629 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), 5630 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 5631 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), 5632 dsub_0)>; 5633 } 5634 5635 // extload, zextload and sextload for a lengthening load followed by another 5636 // lengthening load, to quadruple the initial length. 5637 // 5638 // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> = 5639 // Pat<(v4i32 (extloadvi8 addrmode5:$addr)) 5640 // (EXTRACT_SUBREG (VMOVLuv4i32 5641 // (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5642 // (VLDRS addrmode5:$addr), 5643 // ssub_0)), 5644 // dsub_0)), 5645 // qsub_0)>; 5646 multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 5647 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 5648 string Insn2Ty> { 5649 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5650 (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), 5651 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 5652 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 5653 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), 5654 ssub_0)), dsub_0))>; 5655 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5656 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), 5657 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 5658 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 5659 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), 5660 ssub_0)), dsub_0))>; 5661 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5662 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), 5663 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 5664 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 5665 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), 5666 ssub_0)), dsub_0))>; 5667 } 5668 5669 // extload, zextload and sextload for a lengthening load followed by another 5670 // lengthening load, to quadruple the initial length, but which ends up only 5671 // requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 5672 // 5673 // Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 5674 // Pat<(v4i32 (extloadvi8 addrmode5:$addr)) 5675 // (EXTRACT_SUBREG (VMOVLuv4i32 5676 // (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5677 // (VLDRS addrmode5:$addr), 5678 // ssub_0)), 5679 // dsub_0)), 5680 // dsub_0)>; 5681 multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 5682 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 5683 string Insn2Ty> { 5684 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5685 (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), 5686 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 5687 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 5688 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), 5689 ssub_0)), dsub_0)), 5690 dsub_0)>; 5691 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5692 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), 5693 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 5694 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 5695 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), 5696 ssub_0)), dsub_0)), 5697 dsub_0)>; 5698 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5699 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), 5700 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 5701 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 5702 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), 5703 ssub_0)), dsub_0)), 5704 dsub_0)>; 5705 } 5706 5707 defm : Lengthen_Single<"8", "i16", "i8">; // v8i8 -> v8i16 5708 defm : Lengthen_Single<"4", "i32", "i16">; // v4i16 -> v4i32 5709 defm : Lengthen_Single<"2", "i64", "i32">; // v2i32 -> v2i64 5710 5711 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 5712 defm : Lengthen_HalfSingle<"2", "i16", "i8", "8", "i16">; // v2i8 -> v2i16 5713 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 5714 5715 // Double lengthening - v4i8 -> v4i16 -> v4i32 5716 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 5717 // v2i8 -> v2i16 -> v2i32 5718 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 5719 // v2i16 -> v2i32 -> v2i64 5720 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 5721 5722 // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 5723 def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)), 5724 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 5725 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), 5726 dsub_0)), dsub_0))>; 5727 def : Pat<(v2i64 (zextloadvi8 addrmode5:$addr)), 5728 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 5729 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), 5730 dsub_0)), dsub_0))>; 5731 def : Pat<(v2i64 (sextloadvi8 addrmode5:$addr)), 5732 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 5733 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), 5734 dsub_0)), dsub_0))>; 5735 5736 //===----------------------------------------------------------------------===// 5737 // Assembler aliases 5738 // 5739 5740 def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 5741 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 5742 def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 5743 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 5744 5745 5746 // VADD two-operand aliases. 5747 def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", 5748 (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5749 def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", 5750 (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5751 def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", 5752 (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5753 def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", 5754 (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5755 5756 def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", 5757 (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5758 def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", 5759 (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5760 def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", 5761 (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5762 def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", 5763 (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5764 5765 def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", 5766 (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5767 def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", 5768 (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5769 5770 // VSUB two-operand aliases. 5771 def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", 5772 (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5773 def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", 5774 (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5775 def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", 5776 (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5777 def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", 5778 (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5779 5780 def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", 5781 (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5782 def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", 5783 (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5784 def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", 5785 (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5786 def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", 5787 (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5788 5789 def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", 5790 (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5791 def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", 5792 (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5793 5794 // VADDW two-operand aliases. 5795 def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm", 5796 (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; 5797 def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm", 5798 (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; 5799 def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm", 5800 (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; 5801 def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm", 5802 (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; 5803 def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm", 5804 (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; 5805 def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm", 5806 (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; 5807 5808 // VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 5809 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 5810 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 5811 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 5812 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 5813 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 5814 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 5815 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 5816 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 5817 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 5818 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 5819 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 5820 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 5821 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 5822 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 5823 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 5824 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 5825 // ... two-operand aliases 5826 def : NEONInstAlias<"vand${p} $Vdn, $Vm", 5827 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5828 def : NEONInstAlias<"vand${p} $Vdn, $Vm", 5829 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5830 def : NEONInstAlias<"vbic${p} $Vdn, $Vm", 5831 (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5832 def : NEONInstAlias<"vbic${p} $Vdn, $Vm", 5833 (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5834 def : NEONInstAlias<"veor${p} $Vdn, $Vm", 5835 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5836 def : NEONInstAlias<"veor${p} $Vdn, $Vm", 5837 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5838 def : NEONInstAlias<"vorr${p} $Vdn, $Vm", 5839 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5840 def : NEONInstAlias<"vorr${p} $Vdn, $Vm", 5841 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5842 5843 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 5844 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5845 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 5846 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5847 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 5848 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5849 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 5850 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5851 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 5852 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5853 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 5854 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5855 5856 // VMUL two-operand aliases. 5857 def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm", 5858 (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; 5859 def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm", 5860 (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; 5861 def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm", 5862 (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; 5863 def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm", 5864 (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; 5865 5866 def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm", 5867 (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; 5868 def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm", 5869 (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; 5870 def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm", 5871 (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; 5872 def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm", 5873 (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; 5874 5875 def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm", 5876 (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; 5877 def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm", 5878 (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; 5879 5880 def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane", 5881 (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm, 5882 VectorIndex16:$lane, pred:$p)>; 5883 def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane", 5884 (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm, 5885 VectorIndex16:$lane, pred:$p)>; 5886 5887 def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane", 5888 (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, 5889 VectorIndex32:$lane, pred:$p)>; 5890 def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane", 5891 (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, 5892 VectorIndex32:$lane, pred:$p)>; 5893 5894 def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane", 5895 (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, 5896 VectorIndex32:$lane, pred:$p)>; 5897 def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane", 5898 (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, 5899 VectorIndex32:$lane, pred:$p)>; 5900 5901 // VQADD (register) two-operand aliases. 5902 def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", 5903 (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5904 def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", 5905 (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5906 def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", 5907 (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5908 def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", 5909 (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5910 def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", 5911 (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5912 def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", 5913 (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5914 def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", 5915 (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5916 def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", 5917 (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5918 5919 def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", 5920 (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5921 def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", 5922 (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5923 def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", 5924 (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5925 def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", 5926 (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5927 def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", 5928 (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5929 def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", 5930 (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5931 def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", 5932 (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5933 def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", 5934 (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5935 5936 // VSHL (immediate) two-operand aliases. 5937 def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", 5938 (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>; 5939 def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", 5940 (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>; 5941 def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", 5942 (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>; 5943 def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", 5944 (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>; 5945 5946 def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", 5947 (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>; 5948 def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", 5949 (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>; 5950 def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", 5951 (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>; 5952 def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", 5953 (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>; 5954 5955 // VSHL (register) two-operand aliases. 5956 def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", 5957 (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5958 def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", 5959 (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5960 def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", 5961 (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5962 def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", 5963 (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5964 def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", 5965 (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5966 def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", 5967 (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5968 def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", 5969 (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5970 def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", 5971 (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5972 5973 def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", 5974 (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5975 def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", 5976 (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5977 def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", 5978 (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5979 def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", 5980 (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5981 def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", 5982 (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5983 def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", 5984 (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5985 def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", 5986 (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5987 def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", 5988 (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5989 5990 // VSHR (immediate) two-operand aliases. 5991 def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", 5992 (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; 5993 def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", 5994 (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; 5995 def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", 5996 (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; 5997 def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", 5998 (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; 5999 6000 def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", 6001 (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; 6002 def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", 6003 (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; 6004 def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", 6005 (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; 6006 def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", 6007 (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; 6008 6009 def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", 6010 (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; 6011 def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", 6012 (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; 6013 def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", 6014 (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; 6015 def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", 6016 (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; 6017 6018 def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", 6019 (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; 6020 def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", 6021 (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; 6022 def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", 6023 (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; 6024 def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", 6025 (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; 6026 6027 // VRSHL two-operand aliases. 6028 def : NEONInstAlias<"vrshl${p}.s8 $Vdn, $Vm", 6029 (VRSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6030 def : NEONInstAlias<"vrshl${p}.s16 $Vdn, $Vm", 6031 (VRSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6032 def : NEONInstAlias<"vrshl${p}.s32 $Vdn, $Vm", 6033 (VRSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6034 def : NEONInstAlias<"vrshl${p}.s64 $Vdn, $Vm", 6035 (VRSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6036 def : NEONInstAlias<"vrshl${p}.u8 $Vdn, $Vm", 6037 (VRSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6038 def : NEONInstAlias<"vrshl${p}.u16 $Vdn, $Vm", 6039 (VRSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6040 def : NEONInstAlias<"vrshl${p}.u32 $Vdn, $Vm", 6041 (VRSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6042 def : NEONInstAlias<"vrshl${p}.u64 $Vdn, $Vm", 6043 (VRSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6044 6045 def : NEONInstAlias<"vrshl${p}.s8 $Vdn, $Vm", 6046 (VRSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6047 def : NEONInstAlias<"vrshl${p}.s16 $Vdn, $Vm", 6048 (VRSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6049 def : NEONInstAlias<"vrshl${p}.s32 $Vdn, $Vm", 6050 (VRSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6051 def : NEONInstAlias<"vrshl${p}.s64 $Vdn, $Vm", 6052 (VRSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6053 def : NEONInstAlias<"vrshl${p}.u8 $Vdn, $Vm", 6054 (VRSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6055 def : NEONInstAlias<"vrshl${p}.u16 $Vdn, $Vm", 6056 (VRSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6057 def : NEONInstAlias<"vrshl${p}.u32 $Vdn, $Vm", 6058 (VRSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6059 def : NEONInstAlias<"vrshl${p}.u64 $Vdn, $Vm", 6060 (VRSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6061 6062 // VLD1 single-lane pseudo-instructions. These need special handling for 6063 // the lane index that an InstAlias can't handle, so we use these instead. 6064 def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 6065 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6066 def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 6067 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6068 def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 6069 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6070 6071 def VLD1LNdWB_fixed_Asm_8 : 6072 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 6073 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6074 def VLD1LNdWB_fixed_Asm_16 : 6075 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 6076 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6077 def VLD1LNdWB_fixed_Asm_32 : 6078 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 6079 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6080 def VLD1LNdWB_register_Asm_8 : 6081 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 6082 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 6083 rGPR:$Rm, pred:$p)>; 6084 def VLD1LNdWB_register_Asm_16 : 6085 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 6086 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, 6087 rGPR:$Rm, pred:$p)>; 6088 def VLD1LNdWB_register_Asm_32 : 6089 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 6090 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, 6091 rGPR:$Rm, pred:$p)>; 6092 6093 6094 // VST1 single-lane pseudo-instructions. These need special handling for 6095 // the lane index that an InstAlias can't handle, so we use these instead. 6096 def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 6097 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6098 def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 6099 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6100 def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 6101 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6102 6103 def VST1LNdWB_fixed_Asm_8 : 6104 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 6105 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6106 def VST1LNdWB_fixed_Asm_16 : 6107 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 6108 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6109 def VST1LNdWB_fixed_Asm_32 : 6110 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 6111 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6112 def VST1LNdWB_register_Asm_8 : 6113 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 6114 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 6115 rGPR:$Rm, pred:$p)>; 6116 def VST1LNdWB_register_Asm_16 : 6117 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 6118 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, 6119 rGPR:$Rm, pred:$p)>; 6120 def VST1LNdWB_register_Asm_32 : 6121 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 6122 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, 6123 rGPR:$Rm, pred:$p)>; 6124 6125 // VLD2 single-lane pseudo-instructions. These need special handling for 6126 // the lane index that an InstAlias can't handle, so we use these instead. 6127 def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 6128 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6129 def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 6130 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6131 def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 6132 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6133 def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 6134 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6135 def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 6136 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6137 6138 def VLD2LNdWB_fixed_Asm_8 : 6139 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 6140 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6141 def VLD2LNdWB_fixed_Asm_16 : 6142 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 6143 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6144 def VLD2LNdWB_fixed_Asm_32 : 6145 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 6146 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6147 def VLD2LNqWB_fixed_Asm_16 : 6148 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 6149 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6150 def VLD2LNqWB_fixed_Asm_32 : 6151 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 6152 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6153 def VLD2LNdWB_register_Asm_8 : 6154 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 6155 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, 6156 rGPR:$Rm, pred:$p)>; 6157 def VLD2LNdWB_register_Asm_16 : 6158 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 6159 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, 6160 rGPR:$Rm, pred:$p)>; 6161 def VLD2LNdWB_register_Asm_32 : 6162 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 6163 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, 6164 rGPR:$Rm, pred:$p)>; 6165 def VLD2LNqWB_register_Asm_16 : 6166 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 6167 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, 6168 rGPR:$Rm, pred:$p)>; 6169 def VLD2LNqWB_register_Asm_32 : 6170 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 6171 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, 6172 rGPR:$Rm, pred:$p)>; 6173 6174 6175 // VST2 single-lane pseudo-instructions. These need special handling for 6176 // the lane index that an InstAlias can't handle, so we use these instead. 6177 def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 6178 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6179 def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 6180 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6181 def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 6182 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6183 def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 6184 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6185 def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 6186 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6187 6188 def VST2LNdWB_fixed_Asm_8 : 6189 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 6190 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6191 def VST2LNdWB_fixed_Asm_16 : 6192 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 6193 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6194 def VST2LNdWB_fixed_Asm_32 : 6195 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 6196 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6197 def VST2LNqWB_fixed_Asm_16 : 6198 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 6199 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6200 def VST2LNqWB_fixed_Asm_32 : 6201 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 6202 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6203 def VST2LNdWB_register_Asm_8 : 6204 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 6205 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, 6206 rGPR:$Rm, pred:$p)>; 6207 def VST2LNdWB_register_Asm_16 : 6208 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 6209 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, 6210 rGPR:$Rm, pred:$p)>; 6211 def VST2LNdWB_register_Asm_32 : 6212 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 6213 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, 6214 rGPR:$Rm, pred:$p)>; 6215 def VST2LNqWB_register_Asm_16 : 6216 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 6217 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, 6218 rGPR:$Rm, pred:$p)>; 6219 def VST2LNqWB_register_Asm_32 : 6220 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 6221 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, 6222 rGPR:$Rm, pred:$p)>; 6223 6224 // VLD3 all-lanes pseudo-instructions. These need special handling for 6225 // the lane index that an InstAlias can't handle, so we use these instead. 6226 def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6227 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6228 def VLD3DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6229 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6230 def VLD3DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6231 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6232 def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6233 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6234 def VLD3DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6235 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6236 def VLD3DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6237 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6238 6239 def VLD3DUPdWB_fixed_Asm_8 : 6240 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6241 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6242 def VLD3DUPdWB_fixed_Asm_16 : 6243 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6244 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6245 def VLD3DUPdWB_fixed_Asm_32 : 6246 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6247 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6248 def VLD3DUPqWB_fixed_Asm_8 : 6249 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6250 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6251 def VLD3DUPqWB_fixed_Asm_16 : 6252 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6253 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6254 def VLD3DUPqWB_fixed_Asm_32 : 6255 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6256 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6257 def VLD3DUPdWB_register_Asm_8 : 6258 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6259 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6260 rGPR:$Rm, pred:$p)>; 6261 def VLD3DUPdWB_register_Asm_16 : 6262 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6263 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6264 rGPR:$Rm, pred:$p)>; 6265 def VLD3DUPdWB_register_Asm_32 : 6266 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6267 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6268 rGPR:$Rm, pred:$p)>; 6269 def VLD3DUPqWB_register_Asm_8 : 6270 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6271 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6272 rGPR:$Rm, pred:$p)>; 6273 def VLD3DUPqWB_register_Asm_16 : 6274 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6275 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6276 rGPR:$Rm, pred:$p)>; 6277 def VLD3DUPqWB_register_Asm_32 : 6278 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6279 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6280 rGPR:$Rm, pred:$p)>; 6281 6282 6283 // VLD3 single-lane pseudo-instructions. These need special handling for 6284 // the lane index that an InstAlias can't handle, so we use these instead. 6285 def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6286 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6287 def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6288 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6289 def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6290 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6291 def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6292 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6293 def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6294 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6295 6296 def VLD3LNdWB_fixed_Asm_8 : 6297 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6298 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6299 def VLD3LNdWB_fixed_Asm_16 : 6300 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6301 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6302 def VLD3LNdWB_fixed_Asm_32 : 6303 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6304 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6305 def VLD3LNqWB_fixed_Asm_16 : 6306 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6307 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6308 def VLD3LNqWB_fixed_Asm_32 : 6309 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6310 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6311 def VLD3LNdWB_register_Asm_8 : 6312 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6313 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, 6314 rGPR:$Rm, pred:$p)>; 6315 def VLD3LNdWB_register_Asm_16 : 6316 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6317 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, 6318 rGPR:$Rm, pred:$p)>; 6319 def VLD3LNdWB_register_Asm_32 : 6320 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6321 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, 6322 rGPR:$Rm, pred:$p)>; 6323 def VLD3LNqWB_register_Asm_16 : 6324 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6325 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, 6326 rGPR:$Rm, pred:$p)>; 6327 def VLD3LNqWB_register_Asm_32 : 6328 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6329 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, 6330 rGPR:$Rm, pred:$p)>; 6331 6332 // VLD3 multiple structure pseudo-instructions. These need special handling for 6333 // the vector operands that the normal instructions don't yet model. 6334 // FIXME: Remove these when the register classes and instructions are updated. 6335 def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6336 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6337 def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6338 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6339 def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6340 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6341 def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6342 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6343 def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6344 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6345 def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6346 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6347 6348 def VLD3dWB_fixed_Asm_8 : 6349 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6350 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6351 def VLD3dWB_fixed_Asm_16 : 6352 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6353 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6354 def VLD3dWB_fixed_Asm_32 : 6355 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6356 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6357 def VLD3qWB_fixed_Asm_8 : 6358 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6359 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6360 def VLD3qWB_fixed_Asm_16 : 6361 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6362 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6363 def VLD3qWB_fixed_Asm_32 : 6364 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6365 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6366 def VLD3dWB_register_Asm_8 : 6367 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6368 (ins VecListThreeD:$list, addrmode6:$addr, 6369 rGPR:$Rm, pred:$p)>; 6370 def VLD3dWB_register_Asm_16 : 6371 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6372 (ins VecListThreeD:$list, addrmode6:$addr, 6373 rGPR:$Rm, pred:$p)>; 6374 def VLD3dWB_register_Asm_32 : 6375 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6376 (ins VecListThreeD:$list, addrmode6:$addr, 6377 rGPR:$Rm, pred:$p)>; 6378 def VLD3qWB_register_Asm_8 : 6379 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6380 (ins VecListThreeQ:$list, addrmode6:$addr, 6381 rGPR:$Rm, pred:$p)>; 6382 def VLD3qWB_register_Asm_16 : 6383 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6384 (ins VecListThreeQ:$list, addrmode6:$addr, 6385 rGPR:$Rm, pred:$p)>; 6386 def VLD3qWB_register_Asm_32 : 6387 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6388 (ins VecListThreeQ:$list, addrmode6:$addr, 6389 rGPR:$Rm, pred:$p)>; 6390 6391 // VST3 single-lane pseudo-instructions. These need special handling for 6392 // the lane index that an InstAlias can't handle, so we use these instead. 6393 def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6394 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6395 def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6396 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6397 def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6398 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6399 def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6400 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6401 def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6402 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6403 6404 def VST3LNdWB_fixed_Asm_8 : 6405 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6406 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6407 def VST3LNdWB_fixed_Asm_16 : 6408 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6409 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6410 def VST3LNdWB_fixed_Asm_32 : 6411 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6412 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6413 def VST3LNqWB_fixed_Asm_16 : 6414 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6415 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6416 def VST3LNqWB_fixed_Asm_32 : 6417 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6418 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6419 def VST3LNdWB_register_Asm_8 : 6420 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6421 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, 6422 rGPR:$Rm, pred:$p)>; 6423 def VST3LNdWB_register_Asm_16 : 6424 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6425 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, 6426 rGPR:$Rm, pred:$p)>; 6427 def VST3LNdWB_register_Asm_32 : 6428 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6429 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, 6430 rGPR:$Rm, pred:$p)>; 6431 def VST3LNqWB_register_Asm_16 : 6432 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6433 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, 6434 rGPR:$Rm, pred:$p)>; 6435 def VST3LNqWB_register_Asm_32 : 6436 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6437 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, 6438 rGPR:$Rm, pred:$p)>; 6439 6440 6441 // VST3 multiple structure pseudo-instructions. These need special handling for 6442 // the vector operands that the normal instructions don't yet model. 6443 // FIXME: Remove these when the register classes and instructions are updated. 6444 def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6445 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6446 def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6447 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6448 def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6449 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6450 def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6451 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6452 def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6453 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6454 def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6455 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6456 6457 def VST3dWB_fixed_Asm_8 : 6458 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6459 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6460 def VST3dWB_fixed_Asm_16 : 6461 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6462 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6463 def VST3dWB_fixed_Asm_32 : 6464 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6465 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6466 def VST3qWB_fixed_Asm_8 : 6467 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6468 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6469 def VST3qWB_fixed_Asm_16 : 6470 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6471 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6472 def VST3qWB_fixed_Asm_32 : 6473 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6474 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6475 def VST3dWB_register_Asm_8 : 6476 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6477 (ins VecListThreeD:$list, addrmode6:$addr, 6478 rGPR:$Rm, pred:$p)>; 6479 def VST3dWB_register_Asm_16 : 6480 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6481 (ins VecListThreeD:$list, addrmode6:$addr, 6482 rGPR:$Rm, pred:$p)>; 6483 def VST3dWB_register_Asm_32 : 6484 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6485 (ins VecListThreeD:$list, addrmode6:$addr, 6486 rGPR:$Rm, pred:$p)>; 6487 def VST3qWB_register_Asm_8 : 6488 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6489 (ins VecListThreeQ:$list, addrmode6:$addr, 6490 rGPR:$Rm, pred:$p)>; 6491 def VST3qWB_register_Asm_16 : 6492 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6493 (ins VecListThreeQ:$list, addrmode6:$addr, 6494 rGPR:$Rm, pred:$p)>; 6495 def VST3qWB_register_Asm_32 : 6496 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6497 (ins VecListThreeQ:$list, addrmode6:$addr, 6498 rGPR:$Rm, pred:$p)>; 6499 6500 // VLD4 all-lanes pseudo-instructions. These need special handling for 6501 // the lane index that an InstAlias can't handle, so we use these instead. 6502 def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6503 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6504 def VLD4DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6505 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6506 def VLD4DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6507 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6508 def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6509 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6510 def VLD4DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6511 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6512 def VLD4DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6513 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6514 6515 def VLD4DUPdWB_fixed_Asm_8 : 6516 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6517 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6518 def VLD4DUPdWB_fixed_Asm_16 : 6519 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6520 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6521 def VLD4DUPdWB_fixed_Asm_32 : 6522 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6523 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6524 def VLD4DUPqWB_fixed_Asm_8 : 6525 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6526 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6527 def VLD4DUPqWB_fixed_Asm_16 : 6528 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6529 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6530 def VLD4DUPqWB_fixed_Asm_32 : 6531 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6532 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6533 def VLD4DUPdWB_register_Asm_8 : 6534 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6535 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6536 rGPR:$Rm, pred:$p)>; 6537 def VLD4DUPdWB_register_Asm_16 : 6538 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6539 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6540 rGPR:$Rm, pred:$p)>; 6541 def VLD4DUPdWB_register_Asm_32 : 6542 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6543 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6544 rGPR:$Rm, pred:$p)>; 6545 def VLD4DUPqWB_register_Asm_8 : 6546 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6547 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6548 rGPR:$Rm, pred:$p)>; 6549 def VLD4DUPqWB_register_Asm_16 : 6550 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6551 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6552 rGPR:$Rm, pred:$p)>; 6553 def VLD4DUPqWB_register_Asm_32 : 6554 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6555 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6556 rGPR:$Rm, pred:$p)>; 6557 6558 6559 // VLD4 single-lane pseudo-instructions. These need special handling for 6560 // the lane index that an InstAlias can't handle, so we use these instead. 6561 def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6562 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6563 def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6564 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6565 def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6566 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6567 def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6568 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6569 def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6570 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6571 6572 def VLD4LNdWB_fixed_Asm_8 : 6573 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6574 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6575 def VLD4LNdWB_fixed_Asm_16 : 6576 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6577 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6578 def VLD4LNdWB_fixed_Asm_32 : 6579 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6580 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6581 def VLD4LNqWB_fixed_Asm_16 : 6582 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6583 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6584 def VLD4LNqWB_fixed_Asm_32 : 6585 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6586 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6587 def VLD4LNdWB_register_Asm_8 : 6588 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6589 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, 6590 rGPR:$Rm, pred:$p)>; 6591 def VLD4LNdWB_register_Asm_16 : 6592 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6593 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, 6594 rGPR:$Rm, pred:$p)>; 6595 def VLD4LNdWB_register_Asm_32 : 6596 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6597 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, 6598 rGPR:$Rm, pred:$p)>; 6599 def VLD4LNqWB_register_Asm_16 : 6600 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6601 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, 6602 rGPR:$Rm, pred:$p)>; 6603 def VLD4LNqWB_register_Asm_32 : 6604 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6605 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, 6606 rGPR:$Rm, pred:$p)>; 6607 6608 6609 6610 // VLD4 multiple structure pseudo-instructions. These need special handling for 6611 // the vector operands that the normal instructions don't yet model. 6612 // FIXME: Remove these when the register classes and instructions are updated. 6613 def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6614 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6615 def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6616 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6617 def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6618 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6619 def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6620 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6621 def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6622 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6623 def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6624 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6625 6626 def VLD4dWB_fixed_Asm_8 : 6627 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6628 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6629 def VLD4dWB_fixed_Asm_16 : 6630 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6631 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6632 def VLD4dWB_fixed_Asm_32 : 6633 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6634 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6635 def VLD4qWB_fixed_Asm_8 : 6636 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6637 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6638 def VLD4qWB_fixed_Asm_16 : 6639 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6640 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6641 def VLD4qWB_fixed_Asm_32 : 6642 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6643 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6644 def VLD4dWB_register_Asm_8 : 6645 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6646 (ins VecListFourD:$list, addrmode6:$addr, 6647 rGPR:$Rm, pred:$p)>; 6648 def VLD4dWB_register_Asm_16 : 6649 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6650 (ins VecListFourD:$list, addrmode6:$addr, 6651 rGPR:$Rm, pred:$p)>; 6652 def VLD4dWB_register_Asm_32 : 6653 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6654 (ins VecListFourD:$list, addrmode6:$addr, 6655 rGPR:$Rm, pred:$p)>; 6656 def VLD4qWB_register_Asm_8 : 6657 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6658 (ins VecListFourQ:$list, addrmode6:$addr, 6659 rGPR:$Rm, pred:$p)>; 6660 def VLD4qWB_register_Asm_16 : 6661 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6662 (ins VecListFourQ:$list, addrmode6:$addr, 6663 rGPR:$Rm, pred:$p)>; 6664 def VLD4qWB_register_Asm_32 : 6665 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6666 (ins VecListFourQ:$list, addrmode6:$addr, 6667 rGPR:$Rm, pred:$p)>; 6668 6669 // VST4 single-lane pseudo-instructions. These need special handling for 6670 // the lane index that an InstAlias can't handle, so we use these instead. 6671 def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6672 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6673 def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6674 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6675 def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6676 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6677 def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6678 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6679 def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6680 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6681 6682 def VST4LNdWB_fixed_Asm_8 : 6683 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6684 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6685 def VST4LNdWB_fixed_Asm_16 : 6686 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6687 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6688 def VST4LNdWB_fixed_Asm_32 : 6689 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6690 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6691 def VST4LNqWB_fixed_Asm_16 : 6692 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6693 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6694 def VST4LNqWB_fixed_Asm_32 : 6695 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6696 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6697 def VST4LNdWB_register_Asm_8 : 6698 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6699 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, 6700 rGPR:$Rm, pred:$p)>; 6701 def VST4LNdWB_register_Asm_16 : 6702 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6703 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, 6704 rGPR:$Rm, pred:$p)>; 6705 def VST4LNdWB_register_Asm_32 : 6706 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6707 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, 6708 rGPR:$Rm, pred:$p)>; 6709 def VST4LNqWB_register_Asm_16 : 6710 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6711 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, 6712 rGPR:$Rm, pred:$p)>; 6713 def VST4LNqWB_register_Asm_32 : 6714 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6715 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, 6716 rGPR:$Rm, pred:$p)>; 6717 6718 6719 // VST4 multiple structure pseudo-instructions. These need special handling for 6720 // the vector operands that the normal instructions don't yet model. 6721 // FIXME: Remove these when the register classes and instructions are updated. 6722 def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6723 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6724 def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6725 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6726 def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6727 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6728 def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6729 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6730 def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6731 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6732 def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6733 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6734 6735 def VST4dWB_fixed_Asm_8 : 6736 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6737 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6738 def VST4dWB_fixed_Asm_16 : 6739 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6740 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6741 def VST4dWB_fixed_Asm_32 : 6742 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6743 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6744 def VST4qWB_fixed_Asm_8 : 6745 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6746 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6747 def VST4qWB_fixed_Asm_16 : 6748 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6749 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6750 def VST4qWB_fixed_Asm_32 : 6751 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6752 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6753 def VST4dWB_register_Asm_8 : 6754 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6755 (ins VecListFourD:$list, addrmode6:$addr, 6756 rGPR:$Rm, pred:$p)>; 6757 def VST4dWB_register_Asm_16 : 6758 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6759 (ins VecListFourD:$list, addrmode6:$addr, 6760 rGPR:$Rm, pred:$p)>; 6761 def VST4dWB_register_Asm_32 : 6762 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6763 (ins VecListFourD:$list, addrmode6:$addr, 6764 rGPR:$Rm, pred:$p)>; 6765 def VST4qWB_register_Asm_8 : 6766 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6767 (ins VecListFourQ:$list, addrmode6:$addr, 6768 rGPR:$Rm, pred:$p)>; 6769 def VST4qWB_register_Asm_16 : 6770 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6771 (ins VecListFourQ:$list, addrmode6:$addr, 6772 rGPR:$Rm, pred:$p)>; 6773 def VST4qWB_register_Asm_32 : 6774 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6775 (ins VecListFourQ:$list, addrmode6:$addr, 6776 rGPR:$Rm, pred:$p)>; 6777 6778 // VMOV takes an optional datatype suffix 6779 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 6780 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 6781 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 6782 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 6783 6784 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 6785 // D-register versions. 6786 def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 6787 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6788 def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 6789 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6790 def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 6791 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6792 def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 6793 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6794 def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 6795 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6796 def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 6797 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6798 def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 6799 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6800 // Q-register versions. 6801 def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 6802 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6803 def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 6804 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6805 def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 6806 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6807 def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 6808 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6809 def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 6810 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6811 def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 6812 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6813 def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 6814 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6815 6816 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 6817 // D-register versions. 6818 def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 6819 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6820 def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 6821 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6822 def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 6823 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6824 def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 6825 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6826 def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 6827 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6828 def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 6829 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6830 def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 6831 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6832 // Q-register versions. 6833 def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 6834 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6835 def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 6836 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6837 def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 6838 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6839 def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 6840 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6841 def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 6842 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6843 def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 6844 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6845 def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 6846 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6847 6848 // Two-operand variants for VEXT 6849 def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", 6850 (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>; 6851 def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", 6852 (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>; 6853 def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", 6854 (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>; 6855 6856 def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", 6857 (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>; 6858 def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", 6859 (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>; 6860 def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", 6861 (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>; 6862 def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm", 6863 (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>; 6864 6865 // Two-operand variants for VQDMULH 6866 def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", 6867 (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6868 def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", 6869 (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6870 6871 def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", 6872 (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6873 def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", 6874 (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6875 6876 // Two-operand variants for VMAX. 6877 def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm", 6878 (VMAXsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6879 def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm", 6880 (VMAXsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6881 def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm", 6882 (VMAXsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6883 def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm", 6884 (VMAXuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6885 def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm", 6886 (VMAXuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6887 def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm", 6888 (VMAXuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6889 def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm", 6890 (VMAXfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6891 6892 def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm", 6893 (VMAXsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6894 def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm", 6895 (VMAXsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6896 def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm", 6897 (VMAXsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6898 def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm", 6899 (VMAXuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6900 def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm", 6901 (VMAXuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6902 def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm", 6903 (VMAXuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6904 def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm", 6905 (VMAXfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6906 6907 // Two-operand variants for VMIN. 6908 def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm", 6909 (VMINsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6910 def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm", 6911 (VMINsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6912 def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm", 6913 (VMINsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6914 def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm", 6915 (VMINuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6916 def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm", 6917 (VMINuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6918 def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm", 6919 (VMINuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6920 def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm", 6921 (VMINfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6922 6923 def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm", 6924 (VMINsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6925 def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm", 6926 (VMINsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6927 def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm", 6928 (VMINsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6929 def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm", 6930 (VMINuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6931 def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm", 6932 (VMINuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6933 def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm", 6934 (VMINuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6935 def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm", 6936 (VMINfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6937 6938 // Two-operand variants for VPADD. 6939 def : NEONInstAlias<"vpadd${p}.i8 $Vdn, $Vm", 6940 (VPADDi8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6941 def : NEONInstAlias<"vpadd${p}.i16 $Vdn, $Vm", 6942 (VPADDi16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6943 def : NEONInstAlias<"vpadd${p}.i32 $Vdn, $Vm", 6944 (VPADDi32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6945 def : NEONInstAlias<"vpadd${p}.f32 $Vdn, $Vm", 6946 (VPADDf DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6947 6948 // Two-operand variants for VSRA. 6949 // Signed. 6950 def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm", 6951 (VSRAsv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; 6952 def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm", 6953 (VSRAsv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; 6954 def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm", 6955 (VSRAsv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; 6956 def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm", 6957 (VSRAsv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; 6958 6959 def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm", 6960 (VSRAsv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; 6961 def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm", 6962 (VSRAsv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; 6963 def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm", 6964 (VSRAsv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; 6965 def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm", 6966 (VSRAsv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; 6967 6968 // Unsigned. 6969 def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm", 6970 (VSRAuv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; 6971 def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm", 6972 (VSRAuv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; 6973 def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm", 6974 (VSRAuv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; 6975 def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm", 6976 (VSRAuv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; 6977 6978 def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm", 6979 (VSRAuv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; 6980 def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm", 6981 (VSRAuv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; 6982 def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm", 6983 (VSRAuv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; 6984 def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm", 6985 (VSRAuv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; 6986 6987 // Two-operand variants for VSRI. 6988 def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm", 6989 (VSRIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; 6990 def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm", 6991 (VSRIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; 6992 def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm", 6993 (VSRIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; 6994 def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm", 6995 (VSRIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; 6996 6997 def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm", 6998 (VSRIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; 6999 def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm", 7000 (VSRIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; 7001 def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm", 7002 (VSRIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; 7003 def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm", 7004 (VSRIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; 7005 7006 // Two-operand variants for VSLI. 7007 def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm", 7008 (VSLIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; 7009 def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm", 7010 (VSLIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; 7011 def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm", 7012 (VSLIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; 7013 def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm", 7014 (VSLIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; 7015 7016 def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm", 7017 (VSLIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; 7018 def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm", 7019 (VSLIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; 7020 def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm", 7021 (VSLIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; 7022 def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm", 7023 (VSLIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; 7024 7025 // Two-operand variants for VHSUB. 7026 // Signed. 7027 def : NEONInstAlias<"vhsub${p}.s8 $Vdn, $Vm", 7028 (VHSUBsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7029 def : NEONInstAlias<"vhsub${p}.s16 $Vdn, $Vm", 7030 (VHSUBsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7031 def : NEONInstAlias<"vhsub${p}.s32 $Vdn, $Vm", 7032 (VHSUBsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7033 7034 def : NEONInstAlias<"vhsub${p}.s8 $Vdn, $Vm", 7035 (VHSUBsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7036 def : NEONInstAlias<"vhsub${p}.s16 $Vdn, $Vm", 7037 (VHSUBsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7038 def : NEONInstAlias<"vhsub${p}.s32 $Vdn, $Vm", 7039 (VHSUBsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7040 7041 // Unsigned. 7042 def : NEONInstAlias<"vhsub${p}.u8 $Vdn, $Vm", 7043 (VHSUBuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7044 def : NEONInstAlias<"vhsub${p}.u16 $Vdn, $Vm", 7045 (VHSUBuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7046 def : NEONInstAlias<"vhsub${p}.u32 $Vdn, $Vm", 7047 (VHSUBuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7048 7049 def : NEONInstAlias<"vhsub${p}.u8 $Vdn, $Vm", 7050 (VHSUBuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7051 def : NEONInstAlias<"vhsub${p}.u16 $Vdn, $Vm", 7052 (VHSUBuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7053 def : NEONInstAlias<"vhsub${p}.u32 $Vdn, $Vm", 7054 (VHSUBuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7055 7056 7057 // Two-operand variants for VHADD. 7058 // Signed. 7059 def : NEONInstAlias<"vhadd${p}.s8 $Vdn, $Vm", 7060 (VHADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7061 def : NEONInstAlias<"vhadd${p}.s16 $Vdn, $Vm", 7062 (VHADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7063 def : NEONInstAlias<"vhadd${p}.s32 $Vdn, $Vm", 7064 (VHADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7065 7066 def : NEONInstAlias<"vhadd${p}.s8 $Vdn, $Vm", 7067 (VHADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7068 def : NEONInstAlias<"vhadd${p}.s16 $Vdn, $Vm", 7069 (VHADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7070 def : NEONInstAlias<"vhadd${p}.s32 $Vdn, $Vm", 7071 (VHADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7072 7073 // Unsigned. 7074 def : NEONInstAlias<"vhadd${p}.u8 $Vdn, $Vm", 7075 (VHADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7076 def : NEONInstAlias<"vhadd${p}.u16 $Vdn, $Vm", 7077 (VHADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7078 def : NEONInstAlias<"vhadd${p}.u32 $Vdn, $Vm", 7079 (VHADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7080 7081 def : NEONInstAlias<"vhadd${p}.u8 $Vdn, $Vm", 7082 (VHADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7083 def : NEONInstAlias<"vhadd${p}.u16 $Vdn, $Vm", 7084 (VHADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7085 def : NEONInstAlias<"vhadd${p}.u32 $Vdn, $Vm", 7086 (VHADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7087 7088 // Two-operand variants for VRHADD. 7089 // Signed. 7090 def : NEONInstAlias<"vrhadd${p}.s8 $Vdn, $Rm", 7091 (VRHADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; 7092 def : NEONInstAlias<"vrhadd${p}.s16 $Vdn, $Rm", 7093 (VRHADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; 7094 def : NEONInstAlias<"vrhadd${p}.s32 $Vdn, $Rm", 7095 (VRHADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; 7096 7097 def : NEONInstAlias<"vrhadd${p}.s8 $Vdn, $Rm", 7098 (VRHADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; 7099 def : NEONInstAlias<"vrhadd${p}.s16 $Vdn, $Rm", 7100 (VRHADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; 7101 def : NEONInstAlias<"vrhadd${p}.s32 $Vdn, $Rm", 7102 (VRHADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; 7103 7104 // Unsigned. 7105 def : NEONInstAlias<"vrhadd${p}.u8 $Vdn, $Rm", 7106 (VRHADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; 7107 def : NEONInstAlias<"vrhadd${p}.u16 $Vdn, $Rm", 7108 (VRHADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; 7109 def : NEONInstAlias<"vrhadd${p}.u32 $Vdn, $Rm", 7110 (VRHADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; 7111 7112 def : NEONInstAlias<"vrhadd${p}.u8 $Vdn, $Rm", 7113 (VRHADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; 7114 def : NEONInstAlias<"vrhadd${p}.u16 $Vdn, $Rm", 7115 (VRHADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; 7116 def : NEONInstAlias<"vrhadd${p}.u32 $Vdn, $Rm", 7117 (VRHADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; 7118 7119 // VSWP allows, but does not require, a type suffix. 7120 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 7121 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 7122 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 7123 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 7124 7125 // VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 7126 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 7127 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7128 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 7129 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7130 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 7131 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7132 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 7133 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7134 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 7135 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7136 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 7137 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7138 7139 // "vmov Rd, #-imm" can be handled via "vmvn". 7140 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 7141 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7142 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 7143 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7144 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 7145 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7146 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 7147 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7148 7149 // 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 7150 // these should restrict to just the Q register variants, but the register 7151 // classes are enough to match correctly regardless, so we keep it simple 7152 // and just use MnemonicAlias. 7153 def : NEONMnemonicAlias<"vbicq", "vbic">; 7154 def : NEONMnemonicAlias<"vandq", "vand">; 7155 def : NEONMnemonicAlias<"veorq", "veor">; 7156 def : NEONMnemonicAlias<"vorrq", "vorr">; 7157 7158 def : NEONMnemonicAlias<"vmovq", "vmov">; 7159 def : NEONMnemonicAlias<"vmvnq", "vmvn">; 7160 // Explicit versions for floating point so that the FPImm variants get 7161 // handled early. The parser gets confused otherwise. 7162 def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 7163 def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 7164 7165 def : NEONMnemonicAlias<"vaddq", "vadd">; 7166 def : NEONMnemonicAlias<"vsubq", "vsub">; 7167 7168 def : NEONMnemonicAlias<"vminq", "vmin">; 7169 def : NEONMnemonicAlias<"vmaxq", "vmax">; 7170 7171 def : NEONMnemonicAlias<"vmulq", "vmul">; 7172 7173 def : NEONMnemonicAlias<"vabsq", "vabs">; 7174 7175 def : NEONMnemonicAlias<"vshlq", "vshl">; 7176 def : NEONMnemonicAlias<"vshrq", "vshr">; 7177 7178 def : NEONMnemonicAlias<"vcvtq", "vcvt">; 7179 7180 def : NEONMnemonicAlias<"vcleq", "vcle">; 7181 def : NEONMnemonicAlias<"vceqq", "vceq">; 7182 7183 def : NEONMnemonicAlias<"vzipq", "vzip">; 7184 def : NEONMnemonicAlias<"vswpq", "vswp">; 7185 7186 def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 7187 def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 7188 7189 7190 // Alias for loading floating point immediates that aren't representable 7191 // using the vmov.f32 encoding but the bitpattern is representable using 7192 // the .i32 encoding. 7193 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 7194 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 7195 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 7196 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 7197