1 //===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file describes the ARM NEON instruction set. 11 // 12 //===----------------------------------------------------------------------===// 13 14 15 //===----------------------------------------------------------------------===// 16 // NEON-specific Operands. 17 //===----------------------------------------------------------------------===// 18 def nModImm : Operand<i32> { 19 let PrintMethod = "printNEONModImmOperand"; 20 } 21 22 def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 23 def nImmSplatI8 : Operand<i32> { 24 let PrintMethod = "printNEONModImmOperand"; 25 let ParserMatchClass = nImmSplatI8AsmOperand; 26 } 27 def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 28 def nImmSplatI16 : Operand<i32> { 29 let PrintMethod = "printNEONModImmOperand"; 30 let ParserMatchClass = nImmSplatI16AsmOperand; 31 } 32 def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 33 def nImmSplatI32 : Operand<i32> { 34 let PrintMethod = "printNEONModImmOperand"; 35 let ParserMatchClass = nImmSplatI32AsmOperand; 36 } 37 def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 38 def nImmVMOVI32 : Operand<i32> { 39 let PrintMethod = "printNEONModImmOperand"; 40 let ParserMatchClass = nImmVMOVI32AsmOperand; 41 } 42 43 def nImmVMOVI16AsmOperandByteReplicate : 44 AsmOperandClass { 45 let Name = "NEONi16vmovByteReplicate"; 46 let PredicateMethod = "isNEONi16ByteReplicate"; 47 let RenderMethod = "addNEONvmovByteReplicateOperands"; 48 } 49 def nImmVMOVI32AsmOperandByteReplicate : 50 AsmOperandClass { 51 let Name = "NEONi32vmovByteReplicate"; 52 let PredicateMethod = "isNEONi32ByteReplicate"; 53 let RenderMethod = "addNEONvmovByteReplicateOperands"; 54 } 55 def nImmVMVNI16AsmOperandByteReplicate : 56 AsmOperandClass { 57 let Name = "NEONi16invByteReplicate"; 58 let PredicateMethod = "isNEONi16ByteReplicate"; 59 let RenderMethod = "addNEONinvByteReplicateOperands"; 60 } 61 def nImmVMVNI32AsmOperandByteReplicate : 62 AsmOperandClass { 63 let Name = "NEONi32invByteReplicate"; 64 let PredicateMethod = "isNEONi32ByteReplicate"; 65 let RenderMethod = "addNEONinvByteReplicateOperands"; 66 } 67 68 def nImmVMOVI16ByteReplicate : Operand<i32> { 69 let PrintMethod = "printNEONModImmOperand"; 70 let ParserMatchClass = nImmVMOVI16AsmOperandByteReplicate; 71 } 72 def nImmVMOVI32ByteReplicate : Operand<i32> { 73 let PrintMethod = "printNEONModImmOperand"; 74 let ParserMatchClass = nImmVMOVI32AsmOperandByteReplicate; 75 } 76 def nImmVMVNI16ByteReplicate : Operand<i32> { 77 let PrintMethod = "printNEONModImmOperand"; 78 let ParserMatchClass = nImmVMVNI16AsmOperandByteReplicate; 79 } 80 def nImmVMVNI32ByteReplicate : Operand<i32> { 81 let PrintMethod = "printNEONModImmOperand"; 82 let ParserMatchClass = nImmVMVNI32AsmOperandByteReplicate; 83 } 84 85 def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 86 def nImmVMOVI32Neg : Operand<i32> { 87 let PrintMethod = "printNEONModImmOperand"; 88 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 89 } 90 def nImmVMOVF32 : Operand<i32> { 91 let PrintMethod = "printFPImmOperand"; 92 let ParserMatchClass = FPImmOperand; 93 } 94 def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 95 def nImmSplatI64 : Operand<i32> { 96 let PrintMethod = "printNEONModImmOperand"; 97 let ParserMatchClass = nImmSplatI64AsmOperand; 98 } 99 100 def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 101 def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 102 def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 103 def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 104 return ((uint64_t)Imm) < 8; 105 }]> { 106 let ParserMatchClass = VectorIndex8Operand; 107 let PrintMethod = "printVectorIndex"; 108 let MIOperandInfo = (ops i32imm); 109 } 110 def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 111 return ((uint64_t)Imm) < 4; 112 }]> { 113 let ParserMatchClass = VectorIndex16Operand; 114 let PrintMethod = "printVectorIndex"; 115 let MIOperandInfo = (ops i32imm); 116 } 117 def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 118 return ((uint64_t)Imm) < 2; 119 }]> { 120 let ParserMatchClass = VectorIndex32Operand; 121 let PrintMethod = "printVectorIndex"; 122 let MIOperandInfo = (ops i32imm); 123 } 124 125 // Register list of one D register. 126 def VecListOneDAsmOperand : AsmOperandClass { 127 let Name = "VecListOneD"; 128 let ParserMethod = "parseVectorList"; 129 let RenderMethod = "addVecListOperands"; 130 } 131 def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 132 let ParserMatchClass = VecListOneDAsmOperand; 133 } 134 // Register list of two sequential D registers. 135 def VecListDPairAsmOperand : AsmOperandClass { 136 let Name = "VecListDPair"; 137 let ParserMethod = "parseVectorList"; 138 let RenderMethod = "addVecListOperands"; 139 } 140 def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 141 let ParserMatchClass = VecListDPairAsmOperand; 142 } 143 // Register list of three sequential D registers. 144 def VecListThreeDAsmOperand : AsmOperandClass { 145 let Name = "VecListThreeD"; 146 let ParserMethod = "parseVectorList"; 147 let RenderMethod = "addVecListOperands"; 148 } 149 def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 150 let ParserMatchClass = VecListThreeDAsmOperand; 151 } 152 // Register list of four sequential D registers. 153 def VecListFourDAsmOperand : AsmOperandClass { 154 let Name = "VecListFourD"; 155 let ParserMethod = "parseVectorList"; 156 let RenderMethod = "addVecListOperands"; 157 } 158 def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 159 let ParserMatchClass = VecListFourDAsmOperand; 160 } 161 // Register list of two D registers spaced by 2 (two sequential Q registers). 162 def VecListDPairSpacedAsmOperand : AsmOperandClass { 163 let Name = "VecListDPairSpaced"; 164 let ParserMethod = "parseVectorList"; 165 let RenderMethod = "addVecListOperands"; 166 } 167 def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 168 let ParserMatchClass = VecListDPairSpacedAsmOperand; 169 } 170 // Register list of three D registers spaced by 2 (three Q registers). 171 def VecListThreeQAsmOperand : AsmOperandClass { 172 let Name = "VecListThreeQ"; 173 let ParserMethod = "parseVectorList"; 174 let RenderMethod = "addVecListOperands"; 175 } 176 def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 177 let ParserMatchClass = VecListThreeQAsmOperand; 178 } 179 // Register list of three D registers spaced by 2 (three Q registers). 180 def VecListFourQAsmOperand : AsmOperandClass { 181 let Name = "VecListFourQ"; 182 let ParserMethod = "parseVectorList"; 183 let RenderMethod = "addVecListOperands"; 184 } 185 def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 186 let ParserMatchClass = VecListFourQAsmOperand; 187 } 188 189 // Register list of one D register, with "all lanes" subscripting. 190 def VecListOneDAllLanesAsmOperand : AsmOperandClass { 191 let Name = "VecListOneDAllLanes"; 192 let ParserMethod = "parseVectorList"; 193 let RenderMethod = "addVecListOperands"; 194 } 195 def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 196 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 197 } 198 // Register list of two D registers, with "all lanes" subscripting. 199 def VecListDPairAllLanesAsmOperand : AsmOperandClass { 200 let Name = "VecListDPairAllLanes"; 201 let ParserMethod = "parseVectorList"; 202 let RenderMethod = "addVecListOperands"; 203 } 204 def VecListDPairAllLanes : RegisterOperand<DPair, 205 "printVectorListTwoAllLanes"> { 206 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 207 } 208 // Register list of two D registers spaced by 2 (two sequential Q registers). 209 def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 210 let Name = "VecListDPairSpacedAllLanes"; 211 let ParserMethod = "parseVectorList"; 212 let RenderMethod = "addVecListOperands"; 213 } 214 def VecListDPairSpacedAllLanes : RegisterOperand<DPair, 215 "printVectorListTwoSpacedAllLanes"> { 216 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 217 } 218 // Register list of three D registers, with "all lanes" subscripting. 219 def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 220 let Name = "VecListThreeDAllLanes"; 221 let ParserMethod = "parseVectorList"; 222 let RenderMethod = "addVecListOperands"; 223 } 224 def VecListThreeDAllLanes : RegisterOperand<DPR, 225 "printVectorListThreeAllLanes"> { 226 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 227 } 228 // Register list of three D registers spaced by 2 (three sequential Q regs). 229 def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 230 let Name = "VecListThreeQAllLanes"; 231 let ParserMethod = "parseVectorList"; 232 let RenderMethod = "addVecListOperands"; 233 } 234 def VecListThreeQAllLanes : RegisterOperand<DPR, 235 "printVectorListThreeSpacedAllLanes"> { 236 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 237 } 238 // Register list of four D registers, with "all lanes" subscripting. 239 def VecListFourDAllLanesAsmOperand : AsmOperandClass { 240 let Name = "VecListFourDAllLanes"; 241 let ParserMethod = "parseVectorList"; 242 let RenderMethod = "addVecListOperands"; 243 } 244 def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 245 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 246 } 247 // Register list of four D registers spaced by 2 (four sequential Q regs). 248 def VecListFourQAllLanesAsmOperand : AsmOperandClass { 249 let Name = "VecListFourQAllLanes"; 250 let ParserMethod = "parseVectorList"; 251 let RenderMethod = "addVecListOperands"; 252 } 253 def VecListFourQAllLanes : RegisterOperand<DPR, 254 "printVectorListFourSpacedAllLanes"> { 255 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 256 } 257 258 259 // Register list of one D register, with byte lane subscripting. 260 def VecListOneDByteIndexAsmOperand : AsmOperandClass { 261 let Name = "VecListOneDByteIndexed"; 262 let ParserMethod = "parseVectorList"; 263 let RenderMethod = "addVecListIndexedOperands"; 264 } 265 def VecListOneDByteIndexed : Operand<i32> { 266 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 267 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 268 } 269 // ...with half-word lane subscripting. 270 def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 271 let Name = "VecListOneDHWordIndexed"; 272 let ParserMethod = "parseVectorList"; 273 let RenderMethod = "addVecListIndexedOperands"; 274 } 275 def VecListOneDHWordIndexed : Operand<i32> { 276 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 277 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 278 } 279 // ...with word lane subscripting. 280 def VecListOneDWordIndexAsmOperand : AsmOperandClass { 281 let Name = "VecListOneDWordIndexed"; 282 let ParserMethod = "parseVectorList"; 283 let RenderMethod = "addVecListIndexedOperands"; 284 } 285 def VecListOneDWordIndexed : Operand<i32> { 286 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 287 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 288 } 289 290 // Register list of two D registers with byte lane subscripting. 291 def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 292 let Name = "VecListTwoDByteIndexed"; 293 let ParserMethod = "parseVectorList"; 294 let RenderMethod = "addVecListIndexedOperands"; 295 } 296 def VecListTwoDByteIndexed : Operand<i32> { 297 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 298 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 299 } 300 // ...with half-word lane subscripting. 301 def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 302 let Name = "VecListTwoDHWordIndexed"; 303 let ParserMethod = "parseVectorList"; 304 let RenderMethod = "addVecListIndexedOperands"; 305 } 306 def VecListTwoDHWordIndexed : Operand<i32> { 307 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 308 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 309 } 310 // ...with word lane subscripting. 311 def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 312 let Name = "VecListTwoDWordIndexed"; 313 let ParserMethod = "parseVectorList"; 314 let RenderMethod = "addVecListIndexedOperands"; 315 } 316 def VecListTwoDWordIndexed : Operand<i32> { 317 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 318 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 319 } 320 // Register list of two Q registers with half-word lane subscripting. 321 def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 322 let Name = "VecListTwoQHWordIndexed"; 323 let ParserMethod = "parseVectorList"; 324 let RenderMethod = "addVecListIndexedOperands"; 325 } 326 def VecListTwoQHWordIndexed : Operand<i32> { 327 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 328 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 329 } 330 // ...with word lane subscripting. 331 def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 332 let Name = "VecListTwoQWordIndexed"; 333 let ParserMethod = "parseVectorList"; 334 let RenderMethod = "addVecListIndexedOperands"; 335 } 336 def VecListTwoQWordIndexed : Operand<i32> { 337 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 338 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 339 } 340 341 342 // Register list of three D registers with byte lane subscripting. 343 def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 344 let Name = "VecListThreeDByteIndexed"; 345 let ParserMethod = "parseVectorList"; 346 let RenderMethod = "addVecListIndexedOperands"; 347 } 348 def VecListThreeDByteIndexed : Operand<i32> { 349 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 350 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 351 } 352 // ...with half-word lane subscripting. 353 def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 354 let Name = "VecListThreeDHWordIndexed"; 355 let ParserMethod = "parseVectorList"; 356 let RenderMethod = "addVecListIndexedOperands"; 357 } 358 def VecListThreeDHWordIndexed : Operand<i32> { 359 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 360 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 361 } 362 // ...with word lane subscripting. 363 def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 364 let Name = "VecListThreeDWordIndexed"; 365 let ParserMethod = "parseVectorList"; 366 let RenderMethod = "addVecListIndexedOperands"; 367 } 368 def VecListThreeDWordIndexed : Operand<i32> { 369 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 370 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 371 } 372 // Register list of three Q registers with half-word lane subscripting. 373 def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 374 let Name = "VecListThreeQHWordIndexed"; 375 let ParserMethod = "parseVectorList"; 376 let RenderMethod = "addVecListIndexedOperands"; 377 } 378 def VecListThreeQHWordIndexed : Operand<i32> { 379 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 380 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 381 } 382 // ...with word lane subscripting. 383 def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 384 let Name = "VecListThreeQWordIndexed"; 385 let ParserMethod = "parseVectorList"; 386 let RenderMethod = "addVecListIndexedOperands"; 387 } 388 def VecListThreeQWordIndexed : Operand<i32> { 389 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 390 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 391 } 392 393 // Register list of four D registers with byte lane subscripting. 394 def VecListFourDByteIndexAsmOperand : AsmOperandClass { 395 let Name = "VecListFourDByteIndexed"; 396 let ParserMethod = "parseVectorList"; 397 let RenderMethod = "addVecListIndexedOperands"; 398 } 399 def VecListFourDByteIndexed : Operand<i32> { 400 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 401 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 402 } 403 // ...with half-word lane subscripting. 404 def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 405 let Name = "VecListFourDHWordIndexed"; 406 let ParserMethod = "parseVectorList"; 407 let RenderMethod = "addVecListIndexedOperands"; 408 } 409 def VecListFourDHWordIndexed : Operand<i32> { 410 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 411 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 412 } 413 // ...with word lane subscripting. 414 def VecListFourDWordIndexAsmOperand : AsmOperandClass { 415 let Name = "VecListFourDWordIndexed"; 416 let ParserMethod = "parseVectorList"; 417 let RenderMethod = "addVecListIndexedOperands"; 418 } 419 def VecListFourDWordIndexed : Operand<i32> { 420 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 421 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 422 } 423 // Register list of four Q registers with half-word lane subscripting. 424 def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 425 let Name = "VecListFourQHWordIndexed"; 426 let ParserMethod = "parseVectorList"; 427 let RenderMethod = "addVecListIndexedOperands"; 428 } 429 def VecListFourQHWordIndexed : Operand<i32> { 430 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 431 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 432 } 433 // ...with word lane subscripting. 434 def VecListFourQWordIndexAsmOperand : AsmOperandClass { 435 let Name = "VecListFourQWordIndexed"; 436 let ParserMethod = "parseVectorList"; 437 let RenderMethod = "addVecListIndexedOperands"; 438 } 439 def VecListFourQWordIndexed : Operand<i32> { 440 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 441 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 442 } 443 444 def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 445 return cast<LoadSDNode>(N)->getAlignment() >= 8; 446 }]>; 447 def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 448 (store node:$val, node:$ptr), [{ 449 return cast<StoreSDNode>(N)->getAlignment() >= 8; 450 }]>; 451 def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 452 return cast<LoadSDNode>(N)->getAlignment() == 4; 453 }]>; 454 def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 455 (store node:$val, node:$ptr), [{ 456 return cast<StoreSDNode>(N)->getAlignment() == 4; 457 }]>; 458 def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 459 return cast<LoadSDNode>(N)->getAlignment() == 2; 460 }]>; 461 def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 462 (store node:$val, node:$ptr), [{ 463 return cast<StoreSDNode>(N)->getAlignment() == 2; 464 }]>; 465 def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 466 return cast<LoadSDNode>(N)->getAlignment() == 1; 467 }]>; 468 def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 469 (store node:$val, node:$ptr), [{ 470 return cast<StoreSDNode>(N)->getAlignment() == 1; 471 }]>; 472 def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 473 return cast<LoadSDNode>(N)->getAlignment() < 4; 474 }]>; 475 def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 476 (store node:$val, node:$ptr), [{ 477 return cast<StoreSDNode>(N)->getAlignment() < 4; 478 }]>; 479 480 //===----------------------------------------------------------------------===// 481 // NEON-specific DAG Nodes. 482 //===----------------------------------------------------------------------===// 483 484 def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 485 def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; 486 487 def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 488 def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; 489 def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 490 def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; 491 def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; 492 def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 493 def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 494 def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; 495 def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; 496 def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 497 def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 498 499 // Types for vector shift by immediates. The "SHX" version is for long and 500 // narrow operations where the source and destination vectors have different 501 // types. The "SHINS" version is for shift and insert operations. 502 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 503 SDTCisVT<2, i32>]>; 504 def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 505 SDTCisVT<2, i32>]>; 506 def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 507 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 508 509 def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 510 def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 511 def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 512 def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 513 514 def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 515 def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 516 def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 517 518 def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 519 def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 520 def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 521 def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 522 def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 523 def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 524 525 def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 526 def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 527 def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 528 529 def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 530 def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 531 532 def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 533 SDTCisVT<2, i32>]>; 534 def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 535 def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 536 537 def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 538 def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; 539 def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; 540 def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; 541 542 def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 543 SDTCisVT<2, i32>]>; 544 def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; 545 def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; 546 547 def NEONvbsl : SDNode<"ARMISD::VBSL", 548 SDTypeProfile<1, 3, [SDTCisVec<0>, 549 SDTCisSameAs<0, 1>, 550 SDTCisSameAs<0, 2>, 551 SDTCisSameAs<0, 3>]>>; 552 553 def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 554 555 // VDUPLANE can produce a quad-register result from a double-register source, 556 // so the result is not constrained to match the source. 557 def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 558 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 559 SDTCisVT<2, i32>]>>; 560 561 def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 562 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 563 def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 564 565 def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 566 def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 567 def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 568 def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 569 570 def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 571 SDTCisSameAs<0, 2>, 572 SDTCisSameAs<0, 3>]>; 573 def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 574 def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 575 def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 576 577 def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 578 SDTCisSameAs<1, 2>]>; 579 def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; 580 def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; 581 582 def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, 583 SDTCisSameAs<0, 2>]>; 584 def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; 585 def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; 586 587 def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 588 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 589 unsigned EltBits = 0; 590 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 591 return (EltBits == 32 && EltVal == 0); 592 }]>; 593 594 def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 595 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 596 unsigned EltBits = 0; 597 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 598 return (EltBits == 8 && EltVal == 0xff); 599 }]>; 600 601 //===----------------------------------------------------------------------===// 602 // NEON load / store instructions 603 //===----------------------------------------------------------------------===// 604 605 // Use VLDM to load a Q register as a D register pair. 606 // This is a pseudo instruction that is expanded to VLDMD after reg alloc. 607 def VLDMQIA 608 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 609 IIC_fpLoad_m, "", 610 [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; 611 612 // Use VSTM to store a Q register as a D register pair. 613 // This is a pseudo instruction that is expanded to VSTMD after reg alloc. 614 def VSTMQIA 615 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 616 IIC_fpStore_m, "", 617 [(store (v2f64 DPair:$src), GPR:$Rn)]>; 618 619 // Classes for VLD* pseudo-instructions with multi-register operands. 620 // These are expanded to real instructions after register allocation. 621 class VLDQPseudo<InstrItinClass itin> 622 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 623 class VLDQWBPseudo<InstrItinClass itin> 624 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 625 (ins addrmode6:$addr, am6offset:$offset), itin, 626 "$addr.addr = $wb">; 627 class VLDQWBfixedPseudo<InstrItinClass itin> 628 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 629 (ins addrmode6:$addr), itin, 630 "$addr.addr = $wb">; 631 class VLDQWBregisterPseudo<InstrItinClass itin> 632 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 633 (ins addrmode6:$addr, rGPR:$offset), itin, 634 "$addr.addr = $wb">; 635 636 class VLDQQPseudo<InstrItinClass itin> 637 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 638 class VLDQQWBPseudo<InstrItinClass itin> 639 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 640 (ins addrmode6:$addr, am6offset:$offset), itin, 641 "$addr.addr = $wb">; 642 class VLDQQWBfixedPseudo<InstrItinClass itin> 643 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 644 (ins addrmode6:$addr), itin, 645 "$addr.addr = $wb">; 646 class VLDQQWBregisterPseudo<InstrItinClass itin> 647 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 648 (ins addrmode6:$addr, rGPR:$offset), itin, 649 "$addr.addr = $wb">; 650 651 652 class VLDQQQQPseudo<InstrItinClass itin> 653 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 654 "$src = $dst">; 655 class VLDQQQQWBPseudo<InstrItinClass itin> 656 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 657 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 658 "$addr.addr = $wb, $src = $dst">; 659 660 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 661 662 // VLD1 : Vector Load (multiple single elements) 663 class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> 664 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 665 (ins AddrMode:$Rn), IIC_VLD1, 666 "vld1", Dt, "$Vd, $Rn", "", []> { 667 let Rm = 0b1111; 668 let Inst{4} = Rn{4}; 669 let DecoderMethod = "DecodeVLDST1Instruction"; 670 } 671 class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode> 672 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 673 (ins AddrMode:$Rn), IIC_VLD1x2, 674 "vld1", Dt, "$Vd, $Rn", "", []> { 675 let Rm = 0b1111; 676 let Inst{5-4} = Rn{5-4}; 677 let DecoderMethod = "DecodeVLDST1Instruction"; 678 } 679 680 def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; 681 def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; 682 def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; 683 def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; 684 685 def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; 686 def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; 687 def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; 688 def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; 689 690 // ...with address register writeback: 691 multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 692 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 693 (ins AddrMode:$Rn), IIC_VLD1u, 694 "vld1", Dt, "$Vd, $Rn!", 695 "$Rn.addr = $wb", []> { 696 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 697 let Inst{4} = Rn{4}; 698 let DecoderMethod = "DecodeVLDST1Instruction"; 699 } 700 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 701 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, 702 "vld1", Dt, "$Vd, $Rn, $Rm", 703 "$Rn.addr = $wb", []> { 704 let Inst{4} = Rn{4}; 705 let DecoderMethod = "DecodeVLDST1Instruction"; 706 } 707 } 708 multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 709 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 710 (ins AddrMode:$Rn), IIC_VLD1x2u, 711 "vld1", Dt, "$Vd, $Rn!", 712 "$Rn.addr = $wb", []> { 713 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 714 let Inst{5-4} = Rn{5-4}; 715 let DecoderMethod = "DecodeVLDST1Instruction"; 716 } 717 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 718 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 719 "vld1", Dt, "$Vd, $Rn, $Rm", 720 "$Rn.addr = $wb", []> { 721 let Inst{5-4} = Rn{5-4}; 722 let DecoderMethod = "DecodeVLDST1Instruction"; 723 } 724 } 725 726 defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; 727 defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; 728 defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; 729 defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; 730 defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 731 defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 732 defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 733 defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 734 735 // ...with 3 registers 736 class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode> 737 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 738 (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, 739 "$Vd, $Rn", "", []> { 740 let Rm = 0b1111; 741 let Inst{4} = Rn{4}; 742 let DecoderMethod = "DecodeVLDST1Instruction"; 743 } 744 multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 745 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 746 (ins AddrMode:$Rn), IIC_VLD1x2u, 747 "vld1", Dt, "$Vd, $Rn!", 748 "$Rn.addr = $wb", []> { 749 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 750 let Inst{4} = Rn{4}; 751 let DecoderMethod = "DecodeVLDST1Instruction"; 752 } 753 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 754 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 755 "vld1", Dt, "$Vd, $Rn, $Rm", 756 "$Rn.addr = $wb", []> { 757 let Inst{4} = Rn{4}; 758 let DecoderMethod = "DecodeVLDST1Instruction"; 759 } 760 } 761 762 def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; 763 def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; 764 def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; 765 def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; 766 767 defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; 768 defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; 769 defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; 770 defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; 771 772 def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; 773 def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>; 774 def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>; 775 776 // ...with 4 registers 777 class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode> 778 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 779 (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, 780 "$Vd, $Rn", "", []> { 781 let Rm = 0b1111; 782 let Inst{5-4} = Rn{5-4}; 783 let DecoderMethod = "DecodeVLDST1Instruction"; 784 } 785 multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 786 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 787 (ins AddrMode:$Rn), IIC_VLD1x2u, 788 "vld1", Dt, "$Vd, $Rn!", 789 "$Rn.addr = $wb", []> { 790 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 791 let Inst{5-4} = Rn{5-4}; 792 let DecoderMethod = "DecodeVLDST1Instruction"; 793 } 794 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 795 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 796 "vld1", Dt, "$Vd, $Rn, $Rm", 797 "$Rn.addr = $wb", []> { 798 let Inst{5-4} = Rn{5-4}; 799 let DecoderMethod = "DecodeVLDST1Instruction"; 800 } 801 } 802 803 def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 804 def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 805 def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 806 def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 807 808 defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 809 defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 810 defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 811 defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 812 813 def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; 814 def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>; 815 def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>; 816 817 // VLD2 : Vector Load (multiple 2-element structures) 818 class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 819 InstrItinClass itin, Operand AddrMode> 820 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 821 (ins AddrMode:$Rn), itin, 822 "vld2", Dt, "$Vd, $Rn", "", []> { 823 let Rm = 0b1111; 824 let Inst{5-4} = Rn{5-4}; 825 let DecoderMethod = "DecodeVLDST2Instruction"; 826 } 827 828 def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, 829 addrmode6align64or128>; 830 def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, 831 addrmode6align64or128>; 832 def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, 833 addrmode6align64or128>; 834 835 def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, 836 addrmode6align64or128or256>; 837 def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, 838 addrmode6align64or128or256>; 839 def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, 840 addrmode6align64or128or256>; 841 842 def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; 843 def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; 844 def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; 845 846 // ...with address register writeback: 847 multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 848 RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { 849 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 850 (ins AddrMode:$Rn), itin, 851 "vld2", Dt, "$Vd, $Rn!", 852 "$Rn.addr = $wb", []> { 853 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 854 let Inst{5-4} = Rn{5-4}; 855 let DecoderMethod = "DecodeVLDST2Instruction"; 856 } 857 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 858 (ins AddrMode:$Rn, rGPR:$Rm), itin, 859 "vld2", Dt, "$Vd, $Rn, $Rm", 860 "$Rn.addr = $wb", []> { 861 let Inst{5-4} = Rn{5-4}; 862 let DecoderMethod = "DecodeVLDST2Instruction"; 863 } 864 } 865 866 defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, 867 addrmode6align64or128>; 868 defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, 869 addrmode6align64or128>; 870 defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, 871 addrmode6align64or128>; 872 873 defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, 874 addrmode6align64or128or256>; 875 defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, 876 addrmode6align64or128or256>; 877 defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, 878 addrmode6align64or128or256>; 879 880 def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 881 def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 882 def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 883 def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 884 def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 885 def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 886 887 // ...with double-spaced registers 888 def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, 889 addrmode6align64or128>; 890 def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, 891 addrmode6align64or128>; 892 def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, 893 addrmode6align64or128>; 894 defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, 895 addrmode6align64or128>; 896 defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, 897 addrmode6align64or128>; 898 defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, 899 addrmode6align64or128>; 900 901 // VLD3 : Vector Load (multiple 3-element structures) 902 class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 903 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 904 (ins addrmode6:$Rn), IIC_VLD3, 905 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { 906 let Rm = 0b1111; 907 let Inst{4} = Rn{4}; 908 let DecoderMethod = "DecodeVLDST3Instruction"; 909 } 910 911 def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 912 def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 913 def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 914 915 def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>; 916 def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>; 917 def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>; 918 919 // ...with address register writeback: 920 class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 921 : NLdSt<0, 0b10, op11_8, op7_4, 922 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 923 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 924 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 925 "$Rn.addr = $wb", []> { 926 let Inst{4} = Rn{4}; 927 let DecoderMethod = "DecodeVLDST3Instruction"; 928 } 929 930 def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 931 def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 932 def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 933 934 def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 935 def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 936 def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 937 938 // ...with double-spaced registers: 939 def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 940 def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 941 def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 942 def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 943 def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 944 def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 945 946 def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 947 def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 948 def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 949 950 // ...alternate versions to be allocated odd register numbers: 951 def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 952 def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 953 def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 954 955 def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 956 def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 957 def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 958 959 // VLD4 : Vector Load (multiple 4-element structures) 960 class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 961 : NLdSt<0, 0b10, op11_8, op7_4, 962 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 963 (ins addrmode6:$Rn), IIC_VLD4, 964 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { 965 let Rm = 0b1111; 966 let Inst{5-4} = Rn{5-4}; 967 let DecoderMethod = "DecodeVLDST4Instruction"; 968 } 969 970 def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 971 def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 972 def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 973 974 def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>; 975 def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>; 976 def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>; 977 978 // ...with address register writeback: 979 class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 980 : NLdSt<0, 0b10, op11_8, op7_4, 981 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 982 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 983 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 984 "$Rn.addr = $wb", []> { 985 let Inst{5-4} = Rn{5-4}; 986 let DecoderMethod = "DecodeVLDST4Instruction"; 987 } 988 989 def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 990 def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 991 def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 992 993 def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 994 def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 995 def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 996 997 // ...with double-spaced registers: 998 def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 999 def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 1000 def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 1001 def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 1002 def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 1003 def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 1004 1005 def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1006 def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1007 def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1008 1009 // ...alternate versions to be allocated odd register numbers: 1010 def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 1011 def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 1012 def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 1013 1014 def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1015 def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1016 def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1017 1018 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1019 1020 // Classes for VLD*LN pseudo-instructions with multi-register operands. 1021 // These are expanded to real instructions after register allocation. 1022 class VLDQLNPseudo<InstrItinClass itin> 1023 : PseudoNLdSt<(outs QPR:$dst), 1024 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1025 itin, "$src = $dst">; 1026 class VLDQLNWBPseudo<InstrItinClass itin> 1027 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 1028 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1029 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1030 class VLDQQLNPseudo<InstrItinClass itin> 1031 : PseudoNLdSt<(outs QQPR:$dst), 1032 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1033 itin, "$src = $dst">; 1034 class VLDQQLNWBPseudo<InstrItinClass itin> 1035 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 1036 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1037 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1038 class VLDQQQQLNPseudo<InstrItinClass itin> 1039 : PseudoNLdSt<(outs QQQQPR:$dst), 1040 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1041 itin, "$src = $dst">; 1042 class VLDQQQQLNWBPseudo<InstrItinClass itin> 1043 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 1044 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1045 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1046 1047 // VLD1LN : Vector Load (single element to one lane) 1048 class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1049 PatFrag LoadOp> 1050 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1051 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 1052 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1053 "$src = $Vd", 1054 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1055 (i32 (LoadOp addrmode6:$Rn)), 1056 imm:$lane))]> { 1057 let Rm = 0b1111; 1058 let DecoderMethod = "DecodeVLD1LN"; 1059 } 1060 class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1061 PatFrag LoadOp> 1062 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1063 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1064 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1065 "$src = $Vd", 1066 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1067 (i32 (LoadOp addrmode6oneL32:$Rn)), 1068 imm:$lane))]> { 1069 let Rm = 0b1111; 1070 let DecoderMethod = "DecodeVLD1LN"; 1071 } 1072 class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { 1073 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1074 (i32 (LoadOp addrmode6:$addr)), 1075 imm:$lane))]; 1076 } 1077 1078 def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1079 let Inst{7-5} = lane{2-0}; 1080 } 1081 def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1082 let Inst{7-6} = lane{1-0}; 1083 let Inst{5-4} = Rn{5-4}; 1084 } 1085 def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1086 let Inst{7} = lane{0}; 1087 let Inst{5-4} = Rn{5-4}; 1088 } 1089 1090 def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1091 def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1092 def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1093 1094 def : Pat<(vector_insert (v2f32 DPR:$src), 1095 (f32 (load addrmode6:$addr)), imm:$lane), 1096 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1097 def : Pat<(vector_insert (v4f32 QPR:$src), 1098 (f32 (load addrmode6:$addr)), imm:$lane), 1099 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1100 1101 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 1102 1103 // ...with address register writeback: 1104 class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1105 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1106 (ins addrmode6:$Rn, am6offset:$Rm, 1107 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1108 "\\{$Vd[$lane]\\}, $Rn$Rm", 1109 "$src = $Vd, $Rn.addr = $wb", []> { 1110 let DecoderMethod = "DecodeVLD1LN"; 1111 } 1112 1113 def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1114 let Inst{7-5} = lane{2-0}; 1115 } 1116 def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1117 let Inst{7-6} = lane{1-0}; 1118 let Inst{4} = Rn{4}; 1119 } 1120 def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1121 let Inst{7} = lane{0}; 1122 let Inst{5} = Rn{4}; 1123 let Inst{4} = Rn{4}; 1124 } 1125 1126 def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1127 def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1128 def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1129 1130 // VLD2LN : Vector Load (single 2-element structure to one lane) 1131 class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1132 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1133 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1134 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1135 "$src1 = $Vd, $src2 = $dst2", []> { 1136 let Rm = 0b1111; 1137 let Inst{4} = Rn{4}; 1138 let DecoderMethod = "DecodeVLD2LN"; 1139 } 1140 1141 def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1142 let Inst{7-5} = lane{2-0}; 1143 } 1144 def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1145 let Inst{7-6} = lane{1-0}; 1146 } 1147 def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1148 let Inst{7} = lane{0}; 1149 } 1150 1151 def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1152 def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1153 def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1154 1155 // ...with double-spaced registers: 1156 def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1157 let Inst{7-6} = lane{1-0}; 1158 } 1159 def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1160 let Inst{7} = lane{0}; 1161 } 1162 1163 def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1164 def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1165 1166 // ...with address register writeback: 1167 class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1168 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1169 (ins addrmode6:$Rn, am6offset:$Rm, 1170 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1171 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1172 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1173 let Inst{4} = Rn{4}; 1174 let DecoderMethod = "DecodeVLD2LN"; 1175 } 1176 1177 def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1178 let Inst{7-5} = lane{2-0}; 1179 } 1180 def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1181 let Inst{7-6} = lane{1-0}; 1182 } 1183 def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1184 let Inst{7} = lane{0}; 1185 } 1186 1187 def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1188 def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1189 def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1190 1191 def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1192 let Inst{7-6} = lane{1-0}; 1193 } 1194 def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1195 let Inst{7} = lane{0}; 1196 } 1197 1198 def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1199 def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1200 1201 // VLD3LN : Vector Load (single 3-element structure to one lane) 1202 class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1203 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1204 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1205 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1206 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1207 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { 1208 let Rm = 0b1111; 1209 let DecoderMethod = "DecodeVLD3LN"; 1210 } 1211 1212 def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1213 let Inst{7-5} = lane{2-0}; 1214 } 1215 def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1216 let Inst{7-6} = lane{1-0}; 1217 } 1218 def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1219 let Inst{7} = lane{0}; 1220 } 1221 1222 def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1223 def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1224 def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1225 1226 // ...with double-spaced registers: 1227 def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1228 let Inst{7-6} = lane{1-0}; 1229 } 1230 def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1231 let Inst{7} = lane{0}; 1232 } 1233 1234 def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1235 def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1236 1237 // ...with address register writeback: 1238 class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1239 : NLdStLn<1, 0b10, op11_8, op7_4, 1240 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1241 (ins addrmode6:$Rn, am6offset:$Rm, 1242 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1243 IIC_VLD3lnu, "vld3", Dt, 1244 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1245 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1246 []> { 1247 let DecoderMethod = "DecodeVLD3LN"; 1248 } 1249 1250 def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1251 let Inst{7-5} = lane{2-0}; 1252 } 1253 def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1254 let Inst{7-6} = lane{1-0}; 1255 } 1256 def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1257 let Inst{7} = lane{0}; 1258 } 1259 1260 def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1261 def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1262 def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1263 1264 def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1265 let Inst{7-6} = lane{1-0}; 1266 } 1267 def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1268 let Inst{7} = lane{0}; 1269 } 1270 1271 def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1272 def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1273 1274 // VLD4LN : Vector Load (single 4-element structure to one lane) 1275 class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1276 : NLdStLn<1, 0b10, op11_8, op7_4, 1277 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1278 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1279 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1280 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1281 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { 1282 let Rm = 0b1111; 1283 let Inst{4} = Rn{4}; 1284 let DecoderMethod = "DecodeVLD4LN"; 1285 } 1286 1287 def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1288 let Inst{7-5} = lane{2-0}; 1289 } 1290 def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1291 let Inst{7-6} = lane{1-0}; 1292 } 1293 def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1294 let Inst{7} = lane{0}; 1295 let Inst{5} = Rn{5}; 1296 } 1297 1298 def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1299 def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1300 def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1301 1302 // ...with double-spaced registers: 1303 def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1304 let Inst{7-6} = lane{1-0}; 1305 } 1306 def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1307 let Inst{7} = lane{0}; 1308 let Inst{5} = Rn{5}; 1309 } 1310 1311 def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1312 def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1313 1314 // ...with address register writeback: 1315 class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1316 : NLdStLn<1, 0b10, op11_8, op7_4, 1317 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1318 (ins addrmode6:$Rn, am6offset:$Rm, 1319 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1320 IIC_VLD4lnu, "vld4", Dt, 1321 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1322 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1323 []> { 1324 let Inst{4} = Rn{4}; 1325 let DecoderMethod = "DecodeVLD4LN" ; 1326 } 1327 1328 def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1329 let Inst{7-5} = lane{2-0}; 1330 } 1331 def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1332 let Inst{7-6} = lane{1-0}; 1333 } 1334 def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1335 let Inst{7} = lane{0}; 1336 let Inst{5} = Rn{5}; 1337 } 1338 1339 def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1340 def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1341 def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1342 1343 def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1344 let Inst{7-6} = lane{1-0}; 1345 } 1346 def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1347 let Inst{7} = lane{0}; 1348 let Inst{5} = Rn{5}; 1349 } 1350 1351 def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1352 def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1353 1354 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1355 1356 // VLD1DUP : Vector Load (single element to all lanes) 1357 class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1358 Operand AddrMode> 1359 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1360 (ins AddrMode:$Rn), 1361 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1362 [(set VecListOneDAllLanes:$Vd, 1363 (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { 1364 let Rm = 0b1111; 1365 let Inst{4} = Rn{4}; 1366 let DecoderMethod = "DecodeVLD1DupInstruction"; 1367 } 1368 def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, 1369 addrmode6dupalignNone>; 1370 def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, 1371 addrmode6dupalign16>; 1372 def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, 1373 addrmode6dupalign32>; 1374 1375 def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1376 (VLD1DUPd32 addrmode6:$addr)>; 1377 1378 class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1379 Operand AddrMode> 1380 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1381 (ins AddrMode:$Rn), IIC_VLD1dup, 1382 "vld1", Dt, "$Vd, $Rn", "", 1383 [(set VecListDPairAllLanes:$Vd, 1384 (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { 1385 let Rm = 0b1111; 1386 let Inst{4} = Rn{4}; 1387 let DecoderMethod = "DecodeVLD1DupInstruction"; 1388 } 1389 1390 def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, 1391 addrmode6dupalignNone>; 1392 def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, 1393 addrmode6dupalign16>; 1394 def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, 1395 addrmode6dupalign32>; 1396 1397 def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1398 (VLD1DUPq32 addrmode6:$addr)>; 1399 1400 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 1401 // ...with address register writeback: 1402 multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1403 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1404 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1405 (ins AddrMode:$Rn), IIC_VLD1dupu, 1406 "vld1", Dt, "$Vd, $Rn!", 1407 "$Rn.addr = $wb", []> { 1408 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1409 let Inst{4} = Rn{4}; 1410 let DecoderMethod = "DecodeVLD1DupInstruction"; 1411 } 1412 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1413 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1414 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1415 "vld1", Dt, "$Vd, $Rn, $Rm", 1416 "$Rn.addr = $wb", []> { 1417 let Inst{4} = Rn{4}; 1418 let DecoderMethod = "DecodeVLD1DupInstruction"; 1419 } 1420 } 1421 multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1422 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1423 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1424 (ins AddrMode:$Rn), IIC_VLD1dupu, 1425 "vld1", Dt, "$Vd, $Rn!", 1426 "$Rn.addr = $wb", []> { 1427 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1428 let Inst{4} = Rn{4}; 1429 let DecoderMethod = "DecodeVLD1DupInstruction"; 1430 } 1431 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1432 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1433 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1434 "vld1", Dt, "$Vd, $Rn, $Rm", 1435 "$Rn.addr = $wb", []> { 1436 let Inst{4} = Rn{4}; 1437 let DecoderMethod = "DecodeVLD1DupInstruction"; 1438 } 1439 } 1440 1441 defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; 1442 defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; 1443 defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; 1444 1445 defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; 1446 defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; 1447 defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; 1448 1449 // VLD2DUP : Vector Load (single 2-element structure to all lanes) 1450 class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> 1451 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1452 (ins AddrMode:$Rn), IIC_VLD2dup, 1453 "vld2", Dt, "$Vd, $Rn", "", []> { 1454 let Rm = 0b1111; 1455 let Inst{4} = Rn{4}; 1456 let DecoderMethod = "DecodeVLD2DupInstruction"; 1457 } 1458 1459 def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, 1460 addrmode6dupalign16>; 1461 def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, 1462 addrmode6dupalign32>; 1463 def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, 1464 addrmode6dupalign64>; 1465 1466 // HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or 1467 // "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". 1468 // ...with double-spaced registers 1469 def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, 1470 addrmode6dupalign16>; 1471 def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1472 addrmode6dupalign32>; 1473 def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1474 addrmode6dupalign64>; 1475 1476 // ...with address register writeback: 1477 multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy, 1478 Operand AddrMode> { 1479 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1480 (outs VdTy:$Vd, GPR:$wb), 1481 (ins AddrMode:$Rn), IIC_VLD2dupu, 1482 "vld2", Dt, "$Vd, $Rn!", 1483 "$Rn.addr = $wb", []> { 1484 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1485 let Inst{4} = Rn{4}; 1486 let DecoderMethod = "DecodeVLD2DupInstruction"; 1487 } 1488 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1489 (outs VdTy:$Vd, GPR:$wb), 1490 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1491 "vld2", Dt, "$Vd, $Rn, $Rm", 1492 "$Rn.addr = $wb", []> { 1493 let Inst{4} = Rn{4}; 1494 let DecoderMethod = "DecodeVLD2DupInstruction"; 1495 } 1496 } 1497 1498 defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, 1499 addrmode6dupalign16>; 1500 defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, 1501 addrmode6dupalign32>; 1502 defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, 1503 addrmode6dupalign64>; 1504 1505 defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, 1506 addrmode6dupalign16>; 1507 defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1508 addrmode6dupalign32>; 1509 defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1510 addrmode6dupalign64>; 1511 1512 // VLD3DUP : Vector Load (single 3-element structure to all lanes) 1513 class VLD3DUP<bits<4> op7_4, string Dt> 1514 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1515 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1516 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { 1517 let Rm = 0b1111; 1518 let Inst{4} = 0; 1519 let DecoderMethod = "DecodeVLD3DupInstruction"; 1520 } 1521 1522 def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1523 def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1524 def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1525 1526 def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1527 def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1528 def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1529 1530 // ...with double-spaced registers (not used for codegen): 1531 def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1532 def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1533 def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1534 1535 // ...with address register writeback: 1536 class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> 1537 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1538 (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1539 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1540 "$Rn.addr = $wb", []> { 1541 let Inst{4} = 0; 1542 let DecoderMethod = "DecodeVLD3DupInstruction"; 1543 } 1544 1545 def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; 1546 def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; 1547 def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; 1548 1549 def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; 1550 def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; 1551 def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; 1552 1553 def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1554 def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1555 def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1556 1557 // VLD4DUP : Vector Load (single 4-element structure to all lanes) 1558 class VLD4DUP<bits<4> op7_4, string Dt> 1559 : NLdSt<1, 0b10, 0b1111, op7_4, 1560 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1561 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1562 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1563 let Rm = 0b1111; 1564 let Inst{4} = Rn{4}; 1565 let DecoderMethod = "DecodeVLD4DupInstruction"; 1566 } 1567 1568 def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1569 def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1570 def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1571 1572 def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1573 def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1574 def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1575 1576 // ...with double-spaced registers (not used for codegen): 1577 def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1578 def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1579 def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1580 1581 // ...with address register writeback: 1582 class VLD4DUPWB<bits<4> op7_4, string Dt> 1583 : NLdSt<1, 0b10, 0b1111, op7_4, 1584 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1585 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1586 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1587 "$Rn.addr = $wb", []> { 1588 let Inst{4} = Rn{4}; 1589 let DecoderMethod = "DecodeVLD4DupInstruction"; 1590 } 1591 1592 def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1593 def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1594 def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1595 1596 def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1597 def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1598 def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1599 1600 def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1601 def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1602 def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1603 1604 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1605 1606 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 1607 1608 // Classes for VST* pseudo-instructions with multi-register operands. 1609 // These are expanded to real instructions after register allocation. 1610 class VSTQPseudo<InstrItinClass itin> 1611 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1612 class VSTQWBPseudo<InstrItinClass itin> 1613 : PseudoNLdSt<(outs GPR:$wb), 1614 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1615 "$addr.addr = $wb">; 1616 class VSTQWBfixedPseudo<InstrItinClass itin> 1617 : PseudoNLdSt<(outs GPR:$wb), 1618 (ins addrmode6:$addr, QPR:$src), itin, 1619 "$addr.addr = $wb">; 1620 class VSTQWBregisterPseudo<InstrItinClass itin> 1621 : PseudoNLdSt<(outs GPR:$wb), 1622 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1623 "$addr.addr = $wb">; 1624 class VSTQQPseudo<InstrItinClass itin> 1625 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1626 class VSTQQWBPseudo<InstrItinClass itin> 1627 : PseudoNLdSt<(outs GPR:$wb), 1628 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1629 "$addr.addr = $wb">; 1630 class VSTQQWBfixedPseudo<InstrItinClass itin> 1631 : PseudoNLdSt<(outs GPR:$wb), 1632 (ins addrmode6:$addr, QQPR:$src), itin, 1633 "$addr.addr = $wb">; 1634 class VSTQQWBregisterPseudo<InstrItinClass itin> 1635 : PseudoNLdSt<(outs GPR:$wb), 1636 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1637 "$addr.addr = $wb">; 1638 1639 class VSTQQQQPseudo<InstrItinClass itin> 1640 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1641 class VSTQQQQWBPseudo<InstrItinClass itin> 1642 : PseudoNLdSt<(outs GPR:$wb), 1643 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1644 "$addr.addr = $wb">; 1645 1646 // VST1 : Vector Store (multiple single elements) 1647 class VST1D<bits<4> op7_4, string Dt, Operand AddrMode> 1648 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), 1649 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { 1650 let Rm = 0b1111; 1651 let Inst{4} = Rn{4}; 1652 let DecoderMethod = "DecodeVLDST1Instruction"; 1653 } 1654 class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode> 1655 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), 1656 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { 1657 let Rm = 0b1111; 1658 let Inst{5-4} = Rn{5-4}; 1659 let DecoderMethod = "DecodeVLDST1Instruction"; 1660 } 1661 1662 def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; 1663 def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; 1664 def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; 1665 def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; 1666 1667 def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; 1668 def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; 1669 def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; 1670 def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; 1671 1672 // ...with address register writeback: 1673 multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1674 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1675 (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1676 "vst1", Dt, "$Vd, $Rn!", 1677 "$Rn.addr = $wb", []> { 1678 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1679 let Inst{4} = Rn{4}; 1680 let DecoderMethod = "DecodeVLDST1Instruction"; 1681 } 1682 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1683 (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1684 IIC_VLD1u, 1685 "vst1", Dt, "$Vd, $Rn, $Rm", 1686 "$Rn.addr = $wb", []> { 1687 let Inst{4} = Rn{4}; 1688 let DecoderMethod = "DecodeVLDST1Instruction"; 1689 } 1690 } 1691 multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1692 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1693 (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1694 "vst1", Dt, "$Vd, $Rn!", 1695 "$Rn.addr = $wb", []> { 1696 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1697 let Inst{5-4} = Rn{5-4}; 1698 let DecoderMethod = "DecodeVLDST1Instruction"; 1699 } 1700 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1701 (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1702 IIC_VLD1x2u, 1703 "vst1", Dt, "$Vd, $Rn, $Rm", 1704 "$Rn.addr = $wb", []> { 1705 let Inst{5-4} = Rn{5-4}; 1706 let DecoderMethod = "DecodeVLDST1Instruction"; 1707 } 1708 } 1709 1710 defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; 1711 defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; 1712 defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; 1713 defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; 1714 1715 defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 1716 defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 1717 defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 1718 defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 1719 1720 // ...with 3 registers 1721 class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode> 1722 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1723 (ins AddrMode:$Rn, VecListThreeD:$Vd), 1724 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { 1725 let Rm = 0b1111; 1726 let Inst{4} = Rn{4}; 1727 let DecoderMethod = "DecodeVLDST1Instruction"; 1728 } 1729 multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1730 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1731 (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1732 "vst1", Dt, "$Vd, $Rn!", 1733 "$Rn.addr = $wb", []> { 1734 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1735 let Inst{5-4} = Rn{5-4}; 1736 let DecoderMethod = "DecodeVLDST1Instruction"; 1737 } 1738 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1739 (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1740 IIC_VLD1x3u, 1741 "vst1", Dt, "$Vd, $Rn, $Rm", 1742 "$Rn.addr = $wb", []> { 1743 let Inst{5-4} = Rn{5-4}; 1744 let DecoderMethod = "DecodeVLDST1Instruction"; 1745 } 1746 } 1747 1748 def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; 1749 def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; 1750 def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; 1751 def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; 1752 1753 defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; 1754 defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; 1755 defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; 1756 defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; 1757 1758 def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; 1759 def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>; 1760 def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; 1761 1762 // ...with 4 registers 1763 class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode> 1764 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1765 (ins AddrMode:$Rn, VecListFourD:$Vd), 1766 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1767 []> { 1768 let Rm = 0b1111; 1769 let Inst{5-4} = Rn{5-4}; 1770 let DecoderMethod = "DecodeVLDST1Instruction"; 1771 } 1772 multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1773 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1774 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1775 "vst1", Dt, "$Vd, $Rn!", 1776 "$Rn.addr = $wb", []> { 1777 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1778 let Inst{5-4} = Rn{5-4}; 1779 let DecoderMethod = "DecodeVLDST1Instruction"; 1780 } 1781 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1782 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1783 IIC_VLD1x4u, 1784 "vst1", Dt, "$Vd, $Rn, $Rm", 1785 "$Rn.addr = $wb", []> { 1786 let Inst{5-4} = Rn{5-4}; 1787 let DecoderMethod = "DecodeVLDST1Instruction"; 1788 } 1789 } 1790 1791 def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 1792 def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 1793 def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 1794 def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 1795 1796 defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1797 defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1798 defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1799 defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 1800 1801 def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; 1802 def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>; 1803 def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; 1804 1805 // VST2 : Vector Store (multiple 2-element structures) 1806 class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1807 InstrItinClass itin, Operand AddrMode> 1808 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), 1809 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1810 let Rm = 0b1111; 1811 let Inst{5-4} = Rn{5-4}; 1812 let DecoderMethod = "DecodeVLDST2Instruction"; 1813 } 1814 1815 def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, 1816 addrmode6align64or128>; 1817 def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, 1818 addrmode6align64or128>; 1819 def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, 1820 addrmode6align64or128>; 1821 1822 def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, 1823 addrmode6align64or128or256>; 1824 def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, 1825 addrmode6align64or128or256>; 1826 def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, 1827 addrmode6align64or128or256>; 1828 1829 def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; 1830 def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; 1831 def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; 1832 1833 // ...with address register writeback: 1834 multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1835 RegisterOperand VdTy, Operand AddrMode> { 1836 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1837 (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, 1838 "vst2", Dt, "$Vd, $Rn!", 1839 "$Rn.addr = $wb", []> { 1840 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1841 let Inst{5-4} = Rn{5-4}; 1842 let DecoderMethod = "DecodeVLDST2Instruction"; 1843 } 1844 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1845 (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1846 "vst2", Dt, "$Vd, $Rn, $Rm", 1847 "$Rn.addr = $wb", []> { 1848 let Inst{5-4} = Rn{5-4}; 1849 let DecoderMethod = "DecodeVLDST2Instruction"; 1850 } 1851 } 1852 multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1853 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1854 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1855 "vst2", Dt, "$Vd, $Rn!", 1856 "$Rn.addr = $wb", []> { 1857 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1858 let Inst{5-4} = Rn{5-4}; 1859 let DecoderMethod = "DecodeVLDST2Instruction"; 1860 } 1861 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1862 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1863 IIC_VLD1u, 1864 "vst2", Dt, "$Vd, $Rn, $Rm", 1865 "$Rn.addr = $wb", []> { 1866 let Inst{5-4} = Rn{5-4}; 1867 let DecoderMethod = "DecodeVLDST2Instruction"; 1868 } 1869 } 1870 1871 defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, 1872 addrmode6align64or128>; 1873 defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, 1874 addrmode6align64or128>; 1875 defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, 1876 addrmode6align64or128>; 1877 1878 defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1879 defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1880 defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1881 1882 def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1883 def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1884 def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1885 def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1886 def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1887 def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1888 1889 // ...with double-spaced registers 1890 def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, 1891 addrmode6align64or128>; 1892 def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, 1893 addrmode6align64or128>; 1894 def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, 1895 addrmode6align64or128>; 1896 defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, 1897 addrmode6align64or128>; 1898 defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, 1899 addrmode6align64or128>; 1900 defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, 1901 addrmode6align64or128>; 1902 1903 // VST3 : Vector Store (multiple 3-element structures) 1904 class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1905 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1906 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1907 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { 1908 let Rm = 0b1111; 1909 let Inst{4} = Rn{4}; 1910 let DecoderMethod = "DecodeVLDST3Instruction"; 1911 } 1912 1913 def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1914 def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1915 def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1916 1917 def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>; 1918 def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>; 1919 def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>; 1920 1921 // ...with address register writeback: 1922 class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1923 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1924 (ins addrmode6:$Rn, am6offset:$Rm, 1925 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 1926 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1927 "$Rn.addr = $wb", []> { 1928 let Inst{4} = Rn{4}; 1929 let DecoderMethod = "DecodeVLDST3Instruction"; 1930 } 1931 1932 def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 1933 def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 1934 def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 1935 1936 def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1937 def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1938 def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1939 1940 // ...with double-spaced registers: 1941 def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 1942 def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 1943 def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 1944 def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 1945 def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 1946 def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 1947 1948 def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1949 def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1950 def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1951 1952 // ...alternate versions to be allocated odd register numbers: 1953 def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1954 def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1955 def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1956 1957 def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1958 def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1959 def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1960 1961 // VST4 : Vector Store (multiple 4-element structures) 1962 class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 1963 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1964 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 1965 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 1966 "", []> { 1967 let Rm = 0b1111; 1968 let Inst{5-4} = Rn{5-4}; 1969 let DecoderMethod = "DecodeVLDST4Instruction"; 1970 } 1971 1972 def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 1973 def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 1974 def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 1975 1976 def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>; 1977 def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>; 1978 def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>; 1979 1980 // ...with address register writeback: 1981 class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1982 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1983 (ins addrmode6:$Rn, am6offset:$Rm, 1984 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 1985 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 1986 "$Rn.addr = $wb", []> { 1987 let Inst{5-4} = Rn{5-4}; 1988 let DecoderMethod = "DecodeVLDST4Instruction"; 1989 } 1990 1991 def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 1992 def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 1993 def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 1994 1995 def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1996 def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1997 def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1998 1999 // ...with double-spaced registers: 2000 def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 2001 def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 2002 def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 2003 def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 2004 def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 2005 def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 2006 2007 def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2008 def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2009 def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2010 2011 // ...alternate versions to be allocated odd register numbers: 2012 def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>; 2013 def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>; 2014 def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>; 2015 2016 def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2017 def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2018 def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2019 2020 } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 2021 2022 // Classes for VST*LN pseudo-instructions with multi-register operands. 2023 // These are expanded to real instructions after register allocation. 2024 class VSTQLNPseudo<InstrItinClass itin> 2025 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 2026 itin, "">; 2027 class VSTQLNWBPseudo<InstrItinClass itin> 2028 : PseudoNLdSt<(outs GPR:$wb), 2029 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 2030 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2031 class VSTQQLNPseudo<InstrItinClass itin> 2032 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 2033 itin, "">; 2034 class VSTQQLNWBPseudo<InstrItinClass itin> 2035 : PseudoNLdSt<(outs GPR:$wb), 2036 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 2037 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2038 class VSTQQQQLNPseudo<InstrItinClass itin> 2039 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 2040 itin, "">; 2041 class VSTQQQQLNWBPseudo<InstrItinClass itin> 2042 : PseudoNLdSt<(outs GPR:$wb), 2043 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 2044 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2045 2046 // VST1LN : Vector Store (single element from one lane) 2047 class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2048 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 2049 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2050 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 2051 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 2052 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { 2053 let Rm = 0b1111; 2054 let DecoderMethod = "DecodeVST1LN"; 2055 } 2056 class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2057 : VSTQLNPseudo<IIC_VST1ln> { 2058 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2059 addrmode6:$addr)]; 2060 } 2061 2062 def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 2063 NEONvgetlaneu, addrmode6> { 2064 let Inst{7-5} = lane{2-0}; 2065 } 2066 def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 2067 NEONvgetlaneu, addrmode6> { 2068 let Inst{7-6} = lane{1-0}; 2069 let Inst{4} = Rn{4}; 2070 } 2071 2072 def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 2073 addrmode6oneL32> { 2074 let Inst{7} = lane{0}; 2075 let Inst{5-4} = Rn{5-4}; 2076 } 2077 2078 def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>; 2079 def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>; 2080 def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 2081 2082 def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 2083 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 2084 def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 2085 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2086 2087 // ...with address register writeback: 2088 class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2089 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 2090 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2091 (ins AdrMode:$Rn, am6offset:$Rm, 2092 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 2093 "\\{$Vd[$lane]\\}, $Rn$Rm", 2094 "$Rn.addr = $wb", 2095 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 2096 AdrMode:$Rn, am6offset:$Rm))]> { 2097 let DecoderMethod = "DecodeVST1LN"; 2098 } 2099 class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2100 : VSTQLNWBPseudo<IIC_VST1lnu> { 2101 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2102 addrmode6:$addr, am6offset:$offset))]; 2103 } 2104 2105 def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2106 NEONvgetlaneu, addrmode6> { 2107 let Inst{7-5} = lane{2-0}; 2108 } 2109 def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2110 NEONvgetlaneu, addrmode6> { 2111 let Inst{7-6} = lane{1-0}; 2112 let Inst{4} = Rn{4}; 2113 } 2114 def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2115 extractelt, addrmode6oneL32> { 2116 let Inst{7} = lane{0}; 2117 let Inst{5-4} = Rn{5-4}; 2118 } 2119 2120 def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; 2121 def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; 2122 def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2123 2124 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 2125 2126 // VST2LN : Vector Store (single 2-element structure from one lane) 2127 class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2128 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2129 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2130 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2131 "", []> { 2132 let Rm = 0b1111; 2133 let Inst{4} = Rn{4}; 2134 let DecoderMethod = "DecodeVST2LN"; 2135 } 2136 2137 def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2138 let Inst{7-5} = lane{2-0}; 2139 } 2140 def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2141 let Inst{7-6} = lane{1-0}; 2142 } 2143 def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2144 let Inst{7} = lane{0}; 2145 } 2146 2147 def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2148 def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2149 def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2150 2151 // ...with double-spaced registers: 2152 def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2153 let Inst{7-6} = lane{1-0}; 2154 let Inst{4} = Rn{4}; 2155 } 2156 def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2157 let Inst{7} = lane{0}; 2158 let Inst{4} = Rn{4}; 2159 } 2160 2161 def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2162 def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2163 2164 // ...with address register writeback: 2165 class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2166 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2167 (ins addrmode6:$Rn, am6offset:$Rm, 2168 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2169 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2170 "$Rn.addr = $wb", []> { 2171 let Inst{4} = Rn{4}; 2172 let DecoderMethod = "DecodeVST2LN"; 2173 } 2174 2175 def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2176 let Inst{7-5} = lane{2-0}; 2177 } 2178 def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2179 let Inst{7-6} = lane{1-0}; 2180 } 2181 def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2182 let Inst{7} = lane{0}; 2183 } 2184 2185 def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2186 def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2187 def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2188 2189 def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2190 let Inst{7-6} = lane{1-0}; 2191 } 2192 def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2193 let Inst{7} = lane{0}; 2194 } 2195 2196 def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2197 def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2198 2199 // VST3LN : Vector Store (single 3-element structure from one lane) 2200 class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2201 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2202 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2203 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2204 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { 2205 let Rm = 0b1111; 2206 let DecoderMethod = "DecodeVST3LN"; 2207 } 2208 2209 def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2210 let Inst{7-5} = lane{2-0}; 2211 } 2212 def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2213 let Inst{7-6} = lane{1-0}; 2214 } 2215 def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2216 let Inst{7} = lane{0}; 2217 } 2218 2219 def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2220 def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2221 def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2222 2223 // ...with double-spaced registers: 2224 def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2225 let Inst{7-6} = lane{1-0}; 2226 } 2227 def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2228 let Inst{7} = lane{0}; 2229 } 2230 2231 def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2232 def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2233 2234 // ...with address register writeback: 2235 class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2236 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2237 (ins addrmode6:$Rn, am6offset:$Rm, 2238 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2239 IIC_VST3lnu, "vst3", Dt, 2240 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2241 "$Rn.addr = $wb", []> { 2242 let DecoderMethod = "DecodeVST3LN"; 2243 } 2244 2245 def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2246 let Inst{7-5} = lane{2-0}; 2247 } 2248 def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2249 let Inst{7-6} = lane{1-0}; 2250 } 2251 def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2252 let Inst{7} = lane{0}; 2253 } 2254 2255 def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2256 def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2257 def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2258 2259 def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2260 let Inst{7-6} = lane{1-0}; 2261 } 2262 def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2263 let Inst{7} = lane{0}; 2264 } 2265 2266 def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2267 def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2268 2269 // VST4LN : Vector Store (single 4-element structure from one lane) 2270 class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2271 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2272 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2273 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2274 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2275 "", []> { 2276 let Rm = 0b1111; 2277 let Inst{4} = Rn{4}; 2278 let DecoderMethod = "DecodeVST4LN"; 2279 } 2280 2281 def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2282 let Inst{7-5} = lane{2-0}; 2283 } 2284 def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2285 let Inst{7-6} = lane{1-0}; 2286 } 2287 def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2288 let Inst{7} = lane{0}; 2289 let Inst{5} = Rn{5}; 2290 } 2291 2292 def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2293 def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2294 def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2295 2296 // ...with double-spaced registers: 2297 def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2298 let Inst{7-6} = lane{1-0}; 2299 } 2300 def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2301 let Inst{7} = lane{0}; 2302 let Inst{5} = Rn{5}; 2303 } 2304 2305 def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2306 def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2307 2308 // ...with address register writeback: 2309 class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2310 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2311 (ins addrmode6:$Rn, am6offset:$Rm, 2312 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2313 IIC_VST4lnu, "vst4", Dt, 2314 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2315 "$Rn.addr = $wb", []> { 2316 let Inst{4} = Rn{4}; 2317 let DecoderMethod = "DecodeVST4LN"; 2318 } 2319 2320 def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2321 let Inst{7-5} = lane{2-0}; 2322 } 2323 def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2324 let Inst{7-6} = lane{1-0}; 2325 } 2326 def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2327 let Inst{7} = lane{0}; 2328 let Inst{5} = Rn{5}; 2329 } 2330 2331 def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2332 def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2333 def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2334 2335 def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2336 let Inst{7-6} = lane{1-0}; 2337 } 2338 def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2339 let Inst{7} = lane{0}; 2340 let Inst{5} = Rn{5}; 2341 } 2342 2343 def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2344 def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2345 2346 } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 2347 2348 // Use vld1/vst1 for unaligned f64 load / store 2349 def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2350 (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; 2351 def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2352 (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2353 def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2354 (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; 2355 def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2356 (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2357 def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2358 (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; 2359 def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2360 (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; 2361 2362 // Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2363 // load / store if it's legal. 2364 def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2365 (VLD1q64 addrmode6:$addr)>; 2366 def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2367 (VST1q64 addrmode6:$addr, QPR:$value)>; 2368 def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2369 (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>; 2370 def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2371 (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2372 def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2373 (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; 2374 def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2375 (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2376 def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2377 (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>; 2378 def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2379 (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2380 2381 //===----------------------------------------------------------------------===// 2382 // NEON pattern fragments 2383 //===----------------------------------------------------------------------===// 2384 2385 // Extract D sub-registers of Q registers. 2386 def DSubReg_i8_reg : SDNodeXForm<imm, [{ 2387 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2388 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); 2389 }]>; 2390 def DSubReg_i16_reg : SDNodeXForm<imm, [{ 2391 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2392 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); 2393 }]>; 2394 def DSubReg_i32_reg : SDNodeXForm<imm, [{ 2395 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2396 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); 2397 }]>; 2398 def DSubReg_f64_reg : SDNodeXForm<imm, [{ 2399 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2400 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); 2401 }]>; 2402 2403 // Extract S sub-registers of Q/D registers. 2404 def SSubReg_f32_reg : SDNodeXForm<imm, [{ 2405 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 2406 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); 2407 }]>; 2408 2409 // Translate lane numbers from Q registers to D subregs. 2410 def SubReg_i8_lane : SDNodeXForm<imm, [{ 2411 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 2412 }]>; 2413 def SubReg_i16_lane : SDNodeXForm<imm, [{ 2414 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 2415 }]>; 2416 def SubReg_i32_lane : SDNodeXForm<imm, [{ 2417 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 2418 }]>; 2419 2420 //===----------------------------------------------------------------------===// 2421 // Instruction Classes 2422 //===----------------------------------------------------------------------===// 2423 2424 // Basic 2-register operations: double- and quad-register. 2425 class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2426 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2427 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2428 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2429 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2430 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2431 class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2432 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2433 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2434 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2435 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2436 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2437 2438 // Basic 2-register intrinsics, both double- and quad-register. 2439 class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2440 bits<2> op17_16, bits<5> op11_7, bit op4, 2441 InstrItinClass itin, string OpcodeStr, string Dt, 2442 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2443 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2444 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2445 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2446 class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2447 bits<2> op17_16, bits<5> op11_7, bit op4, 2448 InstrItinClass itin, string OpcodeStr, string Dt, 2449 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2450 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2451 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2452 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2453 2454 // Same as above, but not predicated. 2455 class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, 2456 InstrItinClass itin, string OpcodeStr, string Dt, 2457 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2458 : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), 2459 itin, OpcodeStr, Dt, 2460 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2461 2462 class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, 2463 InstrItinClass itin, string OpcodeStr, string Dt, 2464 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2465 : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), 2466 itin, OpcodeStr, Dt, 2467 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2468 2469 // Similar to NV2VQIntnp with some more encoding bits exposed (crypto). 2470 class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2471 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2472 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2473 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), 2474 itin, OpcodeStr, Dt, 2475 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2476 2477 // Same as N2VQIntXnp but with Vd as a src register. 2478 class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2479 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2480 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2481 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, 2482 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), 2483 itin, OpcodeStr, Dt, 2484 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { 2485 let Constraints = "$src = $Vd"; 2486 } 2487 2488 // Narrow 2-register operations. 2489 class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2490 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2491 InstrItinClass itin, string OpcodeStr, string Dt, 2492 ValueType TyD, ValueType TyQ, SDNode OpNode> 2493 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2494 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2495 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2496 2497 // Narrow 2-register intrinsics. 2498 class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2499 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2500 InstrItinClass itin, string OpcodeStr, string Dt, 2501 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2502 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2503 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2504 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2505 2506 // Long 2-register operations (currently only used for VMOVL). 2507 class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2508 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2509 InstrItinClass itin, string OpcodeStr, string Dt, 2510 ValueType TyQ, ValueType TyD, SDNode OpNode> 2511 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2512 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2513 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2514 2515 // Long 2-register intrinsics. 2516 class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2517 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2518 InstrItinClass itin, string OpcodeStr, string Dt, 2519 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2520 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2521 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2522 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2523 2524 // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2525 class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2526 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2527 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2528 OpcodeStr, Dt, "$Vd, $Vm", 2529 "$src1 = $Vd, $src2 = $Vm", []>; 2530 class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2531 InstrItinClass itin, string OpcodeStr, string Dt> 2532 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2533 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2534 "$src1 = $Vd, $src2 = $Vm", []>; 2535 2536 // Basic 3-register operations: double- and quad-register. 2537 class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2538 InstrItinClass itin, string OpcodeStr, string Dt, 2539 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2540 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2541 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2542 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2543 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2544 // All of these have a two-operand InstAlias. 2545 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2546 let isCommutable = Commutable; 2547 } 2548 // Same as N3VD but no data type. 2549 class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2550 InstrItinClass itin, string OpcodeStr, 2551 ValueType ResTy, ValueType OpTy, 2552 SDNode OpNode, bit Commutable> 2553 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2554 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2555 OpcodeStr, "$Vd, $Vn, $Vm", "", 2556 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2557 // All of these have a two-operand InstAlias. 2558 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2559 let isCommutable = Commutable; 2560 } 2561 2562 class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2563 InstrItinClass itin, string OpcodeStr, string Dt, 2564 ValueType Ty, SDNode ShOp> 2565 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2566 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2567 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2568 [(set (Ty DPR:$Vd), 2569 (Ty (ShOp (Ty DPR:$Vn), 2570 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2571 // All of these have a two-operand InstAlias. 2572 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2573 let isCommutable = 0; 2574 } 2575 class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2576 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2577 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2578 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2579 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2580 [(set (Ty DPR:$Vd), 2581 (Ty (ShOp (Ty DPR:$Vn), 2582 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2583 // All of these have a two-operand InstAlias. 2584 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2585 let isCommutable = 0; 2586 } 2587 2588 class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2589 InstrItinClass itin, string OpcodeStr, string Dt, 2590 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2591 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2592 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2593 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2594 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2595 // All of these have a two-operand InstAlias. 2596 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2597 let isCommutable = Commutable; 2598 } 2599 class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2600 InstrItinClass itin, string OpcodeStr, 2601 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2602 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2603 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2604 OpcodeStr, "$Vd, $Vn, $Vm", "", 2605 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2606 // All of these have a two-operand InstAlias. 2607 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2608 let isCommutable = Commutable; 2609 } 2610 class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2611 InstrItinClass itin, string OpcodeStr, string Dt, 2612 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2613 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2614 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2615 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2616 [(set (ResTy QPR:$Vd), 2617 (ResTy (ShOp (ResTy QPR:$Vn), 2618 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2619 imm:$lane)))))]> { 2620 // All of these have a two-operand InstAlias. 2621 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2622 let isCommutable = 0; 2623 } 2624 class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2625 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2626 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2627 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2628 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2629 [(set (ResTy QPR:$Vd), 2630 (ResTy (ShOp (ResTy QPR:$Vn), 2631 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2632 imm:$lane)))))]> { 2633 // All of these have a two-operand InstAlias. 2634 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2635 let isCommutable = 0; 2636 } 2637 2638 // Basic 3-register intrinsics, both double- and quad-register. 2639 class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2640 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2641 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2642 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2643 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2644 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2645 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2646 // All of these have a two-operand InstAlias. 2647 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2648 let isCommutable = Commutable; 2649 } 2650 2651 class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2652 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2653 string Dt, ValueType ResTy, ValueType OpTy, 2654 SDPatternOperator IntOp, bit Commutable> 2655 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2656 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 2657 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2658 2659 class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2660 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2661 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2662 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2663 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2664 [(set (Ty DPR:$Vd), 2665 (Ty (IntOp (Ty DPR:$Vn), 2666 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2667 imm:$lane)))))]> { 2668 let isCommutable = 0; 2669 } 2670 2671 class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2672 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2673 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2674 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2675 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2676 [(set (Ty DPR:$Vd), 2677 (Ty (IntOp (Ty DPR:$Vn), 2678 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2679 let isCommutable = 0; 2680 } 2681 class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2682 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2683 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2684 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2685 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2686 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2687 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2688 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2689 let isCommutable = 0; 2690 } 2691 2692 class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2693 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2694 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2695 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2696 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2697 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2698 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2699 // All of these have a two-operand InstAlias. 2700 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2701 let isCommutable = Commutable; 2702 } 2703 2704 class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2705 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2706 string Dt, ValueType ResTy, ValueType OpTy, 2707 SDPatternOperator IntOp, bit Commutable> 2708 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2709 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, 2710 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2711 2712 // Same as N3VQIntnp but with Vd as a src register. 2713 class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2714 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2715 string Dt, ValueType ResTy, ValueType OpTy, 2716 SDPatternOperator IntOp, bit Commutable> 2717 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2718 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), 2719 f, itin, OpcodeStr, Dt, 2720 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), 2721 (OpTy QPR:$Vm))))]> { 2722 let Constraints = "$src = $Vd"; 2723 } 2724 2725 class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2726 string OpcodeStr, string Dt, 2727 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2728 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2729 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2730 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2731 [(set (ResTy QPR:$Vd), 2732 (ResTy (IntOp (ResTy QPR:$Vn), 2733 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2734 imm:$lane)))))]> { 2735 let isCommutable = 0; 2736 } 2737 class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2738 string OpcodeStr, string Dt, 2739 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2740 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2741 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2742 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2743 [(set (ResTy QPR:$Vd), 2744 (ResTy (IntOp (ResTy QPR:$Vn), 2745 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2746 imm:$lane)))))]> { 2747 let isCommutable = 0; 2748 } 2749 class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2750 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2751 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2752 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2753 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2754 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2755 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2756 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2757 let isCommutable = 0; 2758 } 2759 2760 // Multiply-Add/Sub operations: double- and quad-register. 2761 class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2762 InstrItinClass itin, string OpcodeStr, string Dt, 2763 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2764 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2765 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2766 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2767 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2768 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2769 2770 class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2771 string OpcodeStr, string Dt, 2772 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2773 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2774 (outs DPR:$Vd), 2775 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2776 NVMulSLFrm, itin, 2777 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2778 [(set (Ty DPR:$Vd), 2779 (Ty (ShOp (Ty DPR:$src1), 2780 (Ty (MulOp DPR:$Vn, 2781 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2782 imm:$lane)))))))]>; 2783 class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2784 string OpcodeStr, string Dt, 2785 ValueType Ty, SDNode MulOp, SDNode ShOp> 2786 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2787 (outs DPR:$Vd), 2788 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2789 NVMulSLFrm, itin, 2790 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2791 [(set (Ty DPR:$Vd), 2792 (Ty (ShOp (Ty DPR:$src1), 2793 (Ty (MulOp DPR:$Vn, 2794 (Ty (NEONvduplane (Ty DPR_8:$Vm), 2795 imm:$lane)))))))]>; 2796 2797 class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2798 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2799 SDPatternOperator MulOp, SDPatternOperator OpNode> 2800 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2801 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2802 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2803 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2804 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2805 class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2806 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2807 SDPatternOperator MulOp, SDPatternOperator ShOp> 2808 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2809 (outs QPR:$Vd), 2810 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2811 NVMulSLFrm, itin, 2812 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2813 [(set (ResTy QPR:$Vd), 2814 (ResTy (ShOp (ResTy QPR:$src1), 2815 (ResTy (MulOp QPR:$Vn, 2816 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2817 imm:$lane)))))))]>; 2818 class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2819 string OpcodeStr, string Dt, 2820 ValueType ResTy, ValueType OpTy, 2821 SDNode MulOp, SDNode ShOp> 2822 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2823 (outs QPR:$Vd), 2824 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2825 NVMulSLFrm, itin, 2826 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2827 [(set (ResTy QPR:$Vd), 2828 (ResTy (ShOp (ResTy QPR:$src1), 2829 (ResTy (MulOp QPR:$Vn, 2830 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2831 imm:$lane)))))))]>; 2832 2833 // Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2834 class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2835 InstrItinClass itin, string OpcodeStr, string Dt, 2836 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2837 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2838 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2839 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2840 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2841 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2842 class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2843 InstrItinClass itin, string OpcodeStr, string Dt, 2844 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2845 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2846 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2847 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2848 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2849 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2850 2851 // Neon 3-argument intrinsics, both double- and quad-register. 2852 // The destination register is also used as the first source operand register. 2853 class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2854 InstrItinClass itin, string OpcodeStr, string Dt, 2855 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2856 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2857 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2858 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2859 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2860 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2861 class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2862 InstrItinClass itin, string OpcodeStr, string Dt, 2863 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2864 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2865 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2866 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2867 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2868 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2869 2870 // Long Multiply-Add/Sub operations. 2871 class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2872 InstrItinClass itin, string OpcodeStr, string Dt, 2873 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2874 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2875 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2876 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2877 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2878 (TyQ (MulOp (TyD DPR:$Vn), 2879 (TyD DPR:$Vm)))))]>; 2880 class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2881 InstrItinClass itin, string OpcodeStr, string Dt, 2882 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2883 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2884 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2885 NVMulSLFrm, itin, 2886 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2887 [(set QPR:$Vd, 2888 (OpNode (TyQ QPR:$src1), 2889 (TyQ (MulOp (TyD DPR:$Vn), 2890 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm), 2891 imm:$lane))))))]>; 2892 class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2893 InstrItinClass itin, string OpcodeStr, string Dt, 2894 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2895 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2896 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2897 NVMulSLFrm, itin, 2898 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2899 [(set QPR:$Vd, 2900 (OpNode (TyQ QPR:$src1), 2901 (TyQ (MulOp (TyD DPR:$Vn), 2902 (TyD (NEONvduplane (TyD DPR_8:$Vm), 2903 imm:$lane))))))]>; 2904 2905 // Long Intrinsic-Op vector operations with explicit extend (VABAL). 2906 class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2907 InstrItinClass itin, string OpcodeStr, string Dt, 2908 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2909 SDNode OpNode> 2910 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2911 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2912 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2913 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2914 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2915 (TyD DPR:$Vm)))))))]>; 2916 2917 // Neon Long 3-argument intrinsic. The destination register is 2918 // a quad-register and is also used as the first source operand register. 2919 class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2920 InstrItinClass itin, string OpcodeStr, string Dt, 2921 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2922 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2923 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2924 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2925 [(set QPR:$Vd, 2926 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 2927 class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2928 string OpcodeStr, string Dt, 2929 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2930 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2931 (outs QPR:$Vd), 2932 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2933 NVMulSLFrm, itin, 2934 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2935 [(set (ResTy QPR:$Vd), 2936 (ResTy (IntOp (ResTy QPR:$src1), 2937 (OpTy DPR:$Vn), 2938 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2939 imm:$lane)))))]>; 2940 class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2941 InstrItinClass itin, string OpcodeStr, string Dt, 2942 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2943 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2944 (outs QPR:$Vd), 2945 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2946 NVMulSLFrm, itin, 2947 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2948 [(set (ResTy QPR:$Vd), 2949 (ResTy (IntOp (ResTy QPR:$src1), 2950 (OpTy DPR:$Vn), 2951 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2952 imm:$lane)))))]>; 2953 2954 // Narrowing 3-register intrinsics. 2955 class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2956 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 2957 SDPatternOperator IntOp, bit Commutable> 2958 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2959 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 2960 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2961 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 2962 let isCommutable = Commutable; 2963 } 2964 2965 // Long 3-register operations. 2966 class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2967 InstrItinClass itin, string OpcodeStr, string Dt, 2968 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 2969 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2970 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2971 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2972 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2973 let isCommutable = Commutable; 2974 } 2975 2976 class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2977 InstrItinClass itin, string OpcodeStr, string Dt, 2978 ValueType TyQ, ValueType TyD, SDNode OpNode> 2979 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2980 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2981 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2982 [(set QPR:$Vd, 2983 (TyQ (OpNode (TyD DPR:$Vn), 2984 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 2985 class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2986 InstrItinClass itin, string OpcodeStr, string Dt, 2987 ValueType TyQ, ValueType TyD, SDNode OpNode> 2988 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2989 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2990 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2991 [(set QPR:$Vd, 2992 (TyQ (OpNode (TyD DPR:$Vn), 2993 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 2994 2995 // Long 3-register operations with explicitly extended operands. 2996 class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2997 InstrItinClass itin, string OpcodeStr, string Dt, 2998 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 2999 bit Commutable> 3000 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3001 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3002 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3003 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 3004 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3005 let isCommutable = Commutable; 3006 } 3007 3008 // Long 3-register intrinsics with explicit extend (VABDL). 3009 class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3010 InstrItinClass itin, string OpcodeStr, string Dt, 3011 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 3012 bit Commutable> 3013 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3014 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3015 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3016 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3017 (TyD DPR:$Vm))))))]> { 3018 let isCommutable = Commutable; 3019 } 3020 3021 // Long 3-register intrinsics. 3022 class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3023 InstrItinClass itin, string OpcodeStr, string Dt, 3024 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 3025 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3026 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3027 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3028 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3029 let isCommutable = Commutable; 3030 } 3031 3032 // Same as above, but not predicated. 3033 class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 3034 bit op4, InstrItinClass itin, string OpcodeStr, 3035 string Dt, ValueType ResTy, ValueType OpTy, 3036 SDPatternOperator IntOp, bit Commutable> 3037 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 3038 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 3039 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 3040 3041 class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3042 string OpcodeStr, string Dt, 3043 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3044 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3045 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3046 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3047 [(set (ResTy QPR:$Vd), 3048 (ResTy (IntOp (OpTy DPR:$Vn), 3049 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 3050 imm:$lane)))))]>; 3051 class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3052 InstrItinClass itin, string OpcodeStr, string Dt, 3053 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3054 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3055 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3056 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3057 [(set (ResTy QPR:$Vd), 3058 (ResTy (IntOp (OpTy DPR:$Vn), 3059 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 3060 imm:$lane)))))]>; 3061 3062 // Wide 3-register operations. 3063 class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3064 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 3065 SDNode OpNode, SDNode ExtOp, bit Commutable> 3066 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3067 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 3068 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3069 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 3070 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3071 // All of these have a two-operand InstAlias. 3072 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3073 let isCommutable = Commutable; 3074 } 3075 3076 // Pairwise long 2-register intrinsics, both double- and quad-register. 3077 class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3078 bits<2> op17_16, bits<5> op11_7, bit op4, 3079 string OpcodeStr, string Dt, 3080 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3081 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 3082 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3083 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 3084 class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3085 bits<2> op17_16, bits<5> op11_7, bit op4, 3086 string OpcodeStr, string Dt, 3087 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3088 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 3089 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3090 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 3091 3092 // Pairwise long 2-register accumulate intrinsics, 3093 // both double- and quad-register. 3094 // The destination register is also used as the first source operand register. 3095 class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3096 bits<2> op17_16, bits<5> op11_7, bit op4, 3097 string OpcodeStr, string Dt, 3098 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3099 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 3100 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 3101 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3102 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 3103 class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3104 bits<2> op17_16, bits<5> op11_7, bit op4, 3105 string OpcodeStr, string Dt, 3106 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3107 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 3108 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 3109 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3110 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 3111 3112 // Shift by immediate, 3113 // both double- and quad-register. 3114 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3115 class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3116 Format f, InstrItinClass itin, Operand ImmTy, 3117 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3118 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3119 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 3120 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3121 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 3122 class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3123 Format f, InstrItinClass itin, Operand ImmTy, 3124 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3125 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3126 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 3127 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3128 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 3129 } 3130 3131 // Long shift by immediate. 3132 class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3133 string OpcodeStr, string Dt, 3134 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3135 SDPatternOperator OpNode> 3136 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3137 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 3138 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3139 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>; 3140 3141 // Narrow shift by immediate. 3142 class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3143 InstrItinClass itin, string OpcodeStr, string Dt, 3144 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3145 SDPatternOperator OpNode> 3146 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3147 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 3148 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3149 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3150 (i32 ImmTy:$SIMM))))]>; 3151 3152 // Shift right by immediate and accumulate, 3153 // both double- and quad-register. 3154 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3155 class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3156 Operand ImmTy, string OpcodeStr, string Dt, 3157 ValueType Ty, SDNode ShOp> 3158 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3159 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3160 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3161 [(set DPR:$Vd, (Ty (add DPR:$src1, 3162 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3163 class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3164 Operand ImmTy, string OpcodeStr, string Dt, 3165 ValueType Ty, SDNode ShOp> 3166 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3167 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3168 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3169 [(set QPR:$Vd, (Ty (add QPR:$src1, 3170 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3171 } 3172 3173 // Shift by immediate and insert, 3174 // both double- and quad-register. 3175 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3176 class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3177 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3178 ValueType Ty,SDNode ShOp> 3179 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3180 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3181 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3182 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3183 class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3184 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3185 ValueType Ty,SDNode ShOp> 3186 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3187 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3188 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3189 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3190 } 3191 3192 // Convert, with fractional bits immediate, 3193 // both double- and quad-register. 3194 class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3195 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3196 SDPatternOperator IntOp> 3197 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3198 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3199 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3200 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3201 class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3202 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3203 SDPatternOperator IntOp> 3204 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3205 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3206 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3207 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3208 3209 //===----------------------------------------------------------------------===// 3210 // Multiclasses 3211 //===----------------------------------------------------------------------===// 3212 3213 // Abbreviations used in multiclass suffixes: 3214 // Q = quarter int (8 bit) elements 3215 // H = half int (16 bit) elements 3216 // S = single int (32 bit) elements 3217 // D = double int (64 bit) elements 3218 3219 // Neon 2-register vector operations and intrinsics. 3220 3221 // Neon 2-register comparisons. 3222 // source operand element sizes of 8, 16 and 32 bits: 3223 multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3224 bits<5> op11_7, bit op4, string opc, string Dt, 3225 string asm, SDNode OpNode> { 3226 // 64-bit vector types. 3227 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3228 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3229 opc, !strconcat(Dt, "8"), asm, "", 3230 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; 3231 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3232 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3233 opc, !strconcat(Dt, "16"), asm, "", 3234 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; 3235 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3236 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3237 opc, !strconcat(Dt, "32"), asm, "", 3238 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; 3239 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3240 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3241 opc, "f32", asm, "", 3242 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { 3243 let Inst{10} = 1; // overwrite F = 1 3244 } 3245 3246 // 128-bit vector types. 3247 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3248 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3249 opc, !strconcat(Dt, "8"), asm, "", 3250 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; 3251 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3252 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3253 opc, !strconcat(Dt, "16"), asm, "", 3254 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; 3255 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3256 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3257 opc, !strconcat(Dt, "32"), asm, "", 3258 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; 3259 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3260 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3261 opc, "f32", asm, "", 3262 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { 3263 let Inst{10} = 1; // overwrite F = 1 3264 } 3265 } 3266 3267 3268 // Neon 2-register vector intrinsics, 3269 // element sizes of 8, 16 and 32 bits: 3270 multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3271 bits<5> op11_7, bit op4, 3272 InstrItinClass itinD, InstrItinClass itinQ, 3273 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3274 // 64-bit vector types. 3275 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3276 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3277 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3278 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3279 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3280 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3281 3282 // 128-bit vector types. 3283 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3284 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3285 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3286 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3287 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3288 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3289 } 3290 3291 3292 // Neon Narrowing 2-register vector operations, 3293 // source operand element sizes of 16, 32 and 64 bits: 3294 multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3295 bits<5> op11_7, bit op6, bit op4, 3296 InstrItinClass itin, string OpcodeStr, string Dt, 3297 SDNode OpNode> { 3298 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3299 itin, OpcodeStr, !strconcat(Dt, "16"), 3300 v8i8, v8i16, OpNode>; 3301 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3302 itin, OpcodeStr, !strconcat(Dt, "32"), 3303 v4i16, v4i32, OpNode>; 3304 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3305 itin, OpcodeStr, !strconcat(Dt, "64"), 3306 v2i32, v2i64, OpNode>; 3307 } 3308 3309 // Neon Narrowing 2-register vector intrinsics, 3310 // source operand element sizes of 16, 32 and 64 bits: 3311 multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3312 bits<5> op11_7, bit op6, bit op4, 3313 InstrItinClass itin, string OpcodeStr, string Dt, 3314 SDPatternOperator IntOp> { 3315 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3316 itin, OpcodeStr, !strconcat(Dt, "16"), 3317 v8i8, v8i16, IntOp>; 3318 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3319 itin, OpcodeStr, !strconcat(Dt, "32"), 3320 v4i16, v4i32, IntOp>; 3321 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3322 itin, OpcodeStr, !strconcat(Dt, "64"), 3323 v2i32, v2i64, IntOp>; 3324 } 3325 3326 3327 // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3328 // source operand element sizes of 16, 32 and 64 bits: 3329 multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3330 string OpcodeStr, string Dt, SDNode OpNode> { 3331 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3332 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3333 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3334 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3335 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3336 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3337 } 3338 3339 3340 // Neon 3-register vector operations. 3341 3342 // First with only element sizes of 8, 16 and 32 bits: 3343 multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3344 InstrItinClass itinD16, InstrItinClass itinD32, 3345 InstrItinClass itinQ16, InstrItinClass itinQ32, 3346 string OpcodeStr, string Dt, 3347 SDNode OpNode, bit Commutable = 0> { 3348 // 64-bit vector types. 3349 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3350 OpcodeStr, !strconcat(Dt, "8"), 3351 v8i8, v8i8, OpNode, Commutable>; 3352 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3353 OpcodeStr, !strconcat(Dt, "16"), 3354 v4i16, v4i16, OpNode, Commutable>; 3355 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3356 OpcodeStr, !strconcat(Dt, "32"), 3357 v2i32, v2i32, OpNode, Commutable>; 3358 3359 // 128-bit vector types. 3360 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3361 OpcodeStr, !strconcat(Dt, "8"), 3362 v16i8, v16i8, OpNode, Commutable>; 3363 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3364 OpcodeStr, !strconcat(Dt, "16"), 3365 v8i16, v8i16, OpNode, Commutable>; 3366 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3367 OpcodeStr, !strconcat(Dt, "32"), 3368 v4i32, v4i32, OpNode, Commutable>; 3369 } 3370 3371 multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3372 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3373 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3374 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3375 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3376 v4i32, v2i32, ShOp>; 3377 } 3378 3379 // ....then also with element size 64 bits: 3380 multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3381 InstrItinClass itinD, InstrItinClass itinQ, 3382 string OpcodeStr, string Dt, 3383 SDNode OpNode, bit Commutable = 0> 3384 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3385 OpcodeStr, Dt, OpNode, Commutable> { 3386 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3387 OpcodeStr, !strconcat(Dt, "64"), 3388 v1i64, v1i64, OpNode, Commutable>; 3389 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3390 OpcodeStr, !strconcat(Dt, "64"), 3391 v2i64, v2i64, OpNode, Commutable>; 3392 } 3393 3394 3395 // Neon 3-register vector intrinsics. 3396 3397 // First with only element sizes of 16 and 32 bits: 3398 multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3399 InstrItinClass itinD16, InstrItinClass itinD32, 3400 InstrItinClass itinQ16, InstrItinClass itinQ32, 3401 string OpcodeStr, string Dt, 3402 SDPatternOperator IntOp, bit Commutable = 0> { 3403 // 64-bit vector types. 3404 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3405 OpcodeStr, !strconcat(Dt, "16"), 3406 v4i16, v4i16, IntOp, Commutable>; 3407 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3408 OpcodeStr, !strconcat(Dt, "32"), 3409 v2i32, v2i32, IntOp, Commutable>; 3410 3411 // 128-bit vector types. 3412 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3413 OpcodeStr, !strconcat(Dt, "16"), 3414 v8i16, v8i16, IntOp, Commutable>; 3415 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3416 OpcodeStr, !strconcat(Dt, "32"), 3417 v4i32, v4i32, IntOp, Commutable>; 3418 } 3419 multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3420 InstrItinClass itinD16, InstrItinClass itinD32, 3421 InstrItinClass itinQ16, InstrItinClass itinQ32, 3422 string OpcodeStr, string Dt, 3423 SDPatternOperator IntOp> { 3424 // 64-bit vector types. 3425 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3426 OpcodeStr, !strconcat(Dt, "16"), 3427 v4i16, v4i16, IntOp>; 3428 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3429 OpcodeStr, !strconcat(Dt, "32"), 3430 v2i32, v2i32, IntOp>; 3431 3432 // 128-bit vector types. 3433 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3434 OpcodeStr, !strconcat(Dt, "16"), 3435 v8i16, v8i16, IntOp>; 3436 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3437 OpcodeStr, !strconcat(Dt, "32"), 3438 v4i32, v4i32, IntOp>; 3439 } 3440 3441 multiclass N3VIntSL_HS<bits<4> op11_8, 3442 InstrItinClass itinD16, InstrItinClass itinD32, 3443 InstrItinClass itinQ16, InstrItinClass itinQ32, 3444 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3445 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3446 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3447 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3448 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3449 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3450 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3451 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3452 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3453 } 3454 3455 // ....then also with element size of 8 bits: 3456 multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3457 InstrItinClass itinD16, InstrItinClass itinD32, 3458 InstrItinClass itinQ16, InstrItinClass itinQ32, 3459 string OpcodeStr, string Dt, 3460 SDPatternOperator IntOp, bit Commutable = 0> 3461 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3462 OpcodeStr, Dt, IntOp, Commutable> { 3463 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3464 OpcodeStr, !strconcat(Dt, "8"), 3465 v8i8, v8i8, IntOp, Commutable>; 3466 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3467 OpcodeStr, !strconcat(Dt, "8"), 3468 v16i8, v16i8, IntOp, Commutable>; 3469 } 3470 multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3471 InstrItinClass itinD16, InstrItinClass itinD32, 3472 InstrItinClass itinQ16, InstrItinClass itinQ32, 3473 string OpcodeStr, string Dt, 3474 SDPatternOperator IntOp> 3475 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3476 OpcodeStr, Dt, IntOp> { 3477 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3478 OpcodeStr, !strconcat(Dt, "8"), 3479 v8i8, v8i8, IntOp>; 3480 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3481 OpcodeStr, !strconcat(Dt, "8"), 3482 v16i8, v16i8, IntOp>; 3483 } 3484 3485 3486 // ....then also with element size of 64 bits: 3487 multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3488 InstrItinClass itinD16, InstrItinClass itinD32, 3489 InstrItinClass itinQ16, InstrItinClass itinQ32, 3490 string OpcodeStr, string Dt, 3491 SDPatternOperator IntOp, bit Commutable = 0> 3492 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3493 OpcodeStr, Dt, IntOp, Commutable> { 3494 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3495 OpcodeStr, !strconcat(Dt, "64"), 3496 v1i64, v1i64, IntOp, Commutable>; 3497 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3498 OpcodeStr, !strconcat(Dt, "64"), 3499 v2i64, v2i64, IntOp, Commutable>; 3500 } 3501 multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3502 InstrItinClass itinD16, InstrItinClass itinD32, 3503 InstrItinClass itinQ16, InstrItinClass itinQ32, 3504 string OpcodeStr, string Dt, 3505 SDPatternOperator IntOp> 3506 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3507 OpcodeStr, Dt, IntOp> { 3508 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3509 OpcodeStr, !strconcat(Dt, "64"), 3510 v1i64, v1i64, IntOp>; 3511 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3512 OpcodeStr, !strconcat(Dt, "64"), 3513 v2i64, v2i64, IntOp>; 3514 } 3515 3516 // Neon Narrowing 3-register vector intrinsics, 3517 // source operand element sizes of 16, 32 and 64 bits: 3518 multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3519 string OpcodeStr, string Dt, 3520 SDPatternOperator IntOp, bit Commutable = 0> { 3521 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3522 OpcodeStr, !strconcat(Dt, "16"), 3523 v8i8, v8i16, IntOp, Commutable>; 3524 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3525 OpcodeStr, !strconcat(Dt, "32"), 3526 v4i16, v4i32, IntOp, Commutable>; 3527 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3528 OpcodeStr, !strconcat(Dt, "64"), 3529 v2i32, v2i64, IntOp, Commutable>; 3530 } 3531 3532 3533 // Neon Long 3-register vector operations. 3534 3535 multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3536 InstrItinClass itin16, InstrItinClass itin32, 3537 string OpcodeStr, string Dt, 3538 SDNode OpNode, bit Commutable = 0> { 3539 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3540 OpcodeStr, !strconcat(Dt, "8"), 3541 v8i16, v8i8, OpNode, Commutable>; 3542 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3543 OpcodeStr, !strconcat(Dt, "16"), 3544 v4i32, v4i16, OpNode, Commutable>; 3545 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3546 OpcodeStr, !strconcat(Dt, "32"), 3547 v2i64, v2i32, OpNode, Commutable>; 3548 } 3549 3550 multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3551 InstrItinClass itin, string OpcodeStr, string Dt, 3552 SDNode OpNode> { 3553 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3554 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3555 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3556 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3557 } 3558 3559 multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3560 InstrItinClass itin16, InstrItinClass itin32, 3561 string OpcodeStr, string Dt, 3562 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3563 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3564 OpcodeStr, !strconcat(Dt, "8"), 3565 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3566 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3567 OpcodeStr, !strconcat(Dt, "16"), 3568 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3569 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3570 OpcodeStr, !strconcat(Dt, "32"), 3571 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3572 } 3573 3574 // Neon Long 3-register vector intrinsics. 3575 3576 // First with only element sizes of 16 and 32 bits: 3577 multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3578 InstrItinClass itin16, InstrItinClass itin32, 3579 string OpcodeStr, string Dt, 3580 SDPatternOperator IntOp, bit Commutable = 0> { 3581 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3582 OpcodeStr, !strconcat(Dt, "16"), 3583 v4i32, v4i16, IntOp, Commutable>; 3584 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3585 OpcodeStr, !strconcat(Dt, "32"), 3586 v2i64, v2i32, IntOp, Commutable>; 3587 } 3588 3589 multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3590 InstrItinClass itin, string OpcodeStr, string Dt, 3591 SDPatternOperator IntOp> { 3592 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3593 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3594 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3595 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3596 } 3597 3598 // ....then also with element size of 8 bits: 3599 multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3600 InstrItinClass itin16, InstrItinClass itin32, 3601 string OpcodeStr, string Dt, 3602 SDPatternOperator IntOp, bit Commutable = 0> 3603 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3604 IntOp, Commutable> { 3605 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3606 OpcodeStr, !strconcat(Dt, "8"), 3607 v8i16, v8i8, IntOp, Commutable>; 3608 } 3609 3610 // ....with explicit extend (VABDL). 3611 multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3612 InstrItinClass itin, string OpcodeStr, string Dt, 3613 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3614 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3615 OpcodeStr, !strconcat(Dt, "8"), 3616 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3617 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3618 OpcodeStr, !strconcat(Dt, "16"), 3619 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3620 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3621 OpcodeStr, !strconcat(Dt, "32"), 3622 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3623 } 3624 3625 3626 // Neon Wide 3-register vector intrinsics, 3627 // source operand element sizes of 8, 16 and 32 bits: 3628 multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3629 string OpcodeStr, string Dt, 3630 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3631 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3632 OpcodeStr, !strconcat(Dt, "8"), 3633 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3634 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3635 OpcodeStr, !strconcat(Dt, "16"), 3636 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3637 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3638 OpcodeStr, !strconcat(Dt, "32"), 3639 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3640 } 3641 3642 3643 // Neon Multiply-Op vector operations, 3644 // element sizes of 8, 16 and 32 bits: 3645 multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3646 InstrItinClass itinD16, InstrItinClass itinD32, 3647 InstrItinClass itinQ16, InstrItinClass itinQ32, 3648 string OpcodeStr, string Dt, SDNode OpNode> { 3649 // 64-bit vector types. 3650 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3651 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3652 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3653 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3654 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3655 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3656 3657 // 128-bit vector types. 3658 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3659 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3660 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3661 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3662 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3663 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3664 } 3665 3666 multiclass N3VMulOpSL_HS<bits<4> op11_8, 3667 InstrItinClass itinD16, InstrItinClass itinD32, 3668 InstrItinClass itinQ16, InstrItinClass itinQ32, 3669 string OpcodeStr, string Dt, SDNode ShOp> { 3670 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3671 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3672 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3673 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3674 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3675 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3676 mul, ShOp>; 3677 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3678 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3679 mul, ShOp>; 3680 } 3681 3682 // Neon Intrinsic-Op vector operations, 3683 // element sizes of 8, 16 and 32 bits: 3684 multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3685 InstrItinClass itinD, InstrItinClass itinQ, 3686 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3687 SDNode OpNode> { 3688 // 64-bit vector types. 3689 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3690 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3691 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3692 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3693 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3694 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3695 3696 // 128-bit vector types. 3697 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3698 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3699 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3700 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3701 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3702 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3703 } 3704 3705 // Neon 3-argument intrinsics, 3706 // element sizes of 8, 16 and 32 bits: 3707 multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3708 InstrItinClass itinD, InstrItinClass itinQ, 3709 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3710 // 64-bit vector types. 3711 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, 3712 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3713 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, 3714 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3715 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, 3716 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3717 3718 // 128-bit vector types. 3719 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, 3720 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3721 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, 3722 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3723 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, 3724 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3725 } 3726 3727 3728 // Neon Long Multiply-Op vector operations, 3729 // element sizes of 8, 16 and 32 bits: 3730 multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3731 InstrItinClass itin16, InstrItinClass itin32, 3732 string OpcodeStr, string Dt, SDNode MulOp, 3733 SDNode OpNode> { 3734 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3735 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3736 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3737 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3738 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3739 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3740 } 3741 3742 multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3743 string Dt, SDNode MulOp, SDNode OpNode> { 3744 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3745 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3746 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3747 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3748 } 3749 3750 3751 // Neon Long 3-argument intrinsics. 3752 3753 // First with only element sizes of 16 and 32 bits: 3754 multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3755 InstrItinClass itin16, InstrItinClass itin32, 3756 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3757 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3758 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3759 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3760 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3761 } 3762 3763 multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3764 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3765 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3766 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3767 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3768 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3769 } 3770 3771 // ....then also with element size of 8 bits: 3772 multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3773 InstrItinClass itin16, InstrItinClass itin32, 3774 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3775 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3776 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3777 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3778 } 3779 3780 // ....with explicit extend (VABAL). 3781 multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3782 InstrItinClass itin, string OpcodeStr, string Dt, 3783 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3784 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3785 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3786 IntOp, ExtOp, OpNode>; 3787 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3788 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3789 IntOp, ExtOp, OpNode>; 3790 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3791 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3792 IntOp, ExtOp, OpNode>; 3793 } 3794 3795 3796 // Neon Pairwise long 2-register intrinsics, 3797 // element sizes of 8, 16 and 32 bits: 3798 multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3799 bits<5> op11_7, bit op4, 3800 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3801 // 64-bit vector types. 3802 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3803 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3804 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3805 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3806 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3807 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3808 3809 // 128-bit vector types. 3810 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3811 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3812 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3813 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3814 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3815 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3816 } 3817 3818 3819 // Neon Pairwise long 2-register accumulate intrinsics, 3820 // element sizes of 8, 16 and 32 bits: 3821 multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3822 bits<5> op11_7, bit op4, 3823 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3824 // 64-bit vector types. 3825 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3826 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3827 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3828 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3829 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3830 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3831 3832 // 128-bit vector types. 3833 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3834 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3835 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3836 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3837 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3838 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3839 } 3840 3841 3842 // Neon 2-register vector shift by immediate, 3843 // with f of either N2RegVShLFrm or N2RegVShRFrm 3844 // element sizes of 8, 16, 32 and 64 bits: 3845 multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3846 InstrItinClass itin, string OpcodeStr, string Dt, 3847 SDNode OpNode> { 3848 // 64-bit vector types. 3849 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3850 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3851 let Inst{21-19} = 0b001; // imm6 = 001xxx 3852 } 3853 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3854 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3855 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3856 } 3857 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3858 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3859 let Inst{21} = 0b1; // imm6 = 1xxxxx 3860 } 3861 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3862 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3863 // imm6 = xxxxxx 3864 3865 // 128-bit vector types. 3866 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3867 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3868 let Inst{21-19} = 0b001; // imm6 = 001xxx 3869 } 3870 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3871 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3872 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3873 } 3874 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3875 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3876 let Inst{21} = 0b1; // imm6 = 1xxxxx 3877 } 3878 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3879 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3880 // imm6 = xxxxxx 3881 } 3882 multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3883 InstrItinClass itin, string OpcodeStr, string Dt, 3884 string baseOpc, SDNode OpNode> { 3885 // 64-bit vector types. 3886 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3887 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3888 let Inst{21-19} = 0b001; // imm6 = 001xxx 3889 } 3890 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3891 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3892 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3893 } 3894 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3895 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3896 let Inst{21} = 0b1; // imm6 = 1xxxxx 3897 } 3898 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3899 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3900 // imm6 = xxxxxx 3901 3902 // 128-bit vector types. 3903 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3904 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3905 let Inst{21-19} = 0b001; // imm6 = 001xxx 3906 } 3907 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3908 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3909 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3910 } 3911 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3912 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3913 let Inst{21} = 0b1; // imm6 = 1xxxxx 3914 } 3915 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3916 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3917 // imm6 = xxxxxx 3918 } 3919 3920 // Neon Shift-Accumulate vector operations, 3921 // element sizes of 8, 16, 32 and 64 bits: 3922 multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3923 string OpcodeStr, string Dt, SDNode ShOp> { 3924 // 64-bit vector types. 3925 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3926 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 3927 let Inst{21-19} = 0b001; // imm6 = 001xxx 3928 } 3929 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3930 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 3931 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3932 } 3933 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3934 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 3935 let Inst{21} = 0b1; // imm6 = 1xxxxx 3936 } 3937 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3938 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 3939 // imm6 = xxxxxx 3940 3941 // 128-bit vector types. 3942 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3943 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 3944 let Inst{21-19} = 0b001; // imm6 = 001xxx 3945 } 3946 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3947 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 3948 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3949 } 3950 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3951 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 3952 let Inst{21} = 0b1; // imm6 = 1xxxxx 3953 } 3954 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3955 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 3956 // imm6 = xxxxxx 3957 } 3958 3959 // Neon Shift-Insert vector operations, 3960 // with f of either N2RegVShLFrm or N2RegVShRFrm 3961 // element sizes of 8, 16, 32 and 64 bits: 3962 multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3963 string OpcodeStr> { 3964 // 64-bit vector types. 3965 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3966 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> { 3967 let Inst{21-19} = 0b001; // imm6 = 001xxx 3968 } 3969 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3970 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> { 3971 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3972 } 3973 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3974 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> { 3975 let Inst{21} = 0b1; // imm6 = 1xxxxx 3976 } 3977 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 3978 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>; 3979 // imm6 = xxxxxx 3980 3981 // 128-bit vector types. 3982 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3983 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> { 3984 let Inst{21-19} = 0b001; // imm6 = 001xxx 3985 } 3986 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3987 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> { 3988 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3989 } 3990 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3991 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> { 3992 let Inst{21} = 0b1; // imm6 = 1xxxxx 3993 } 3994 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 3995 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>; 3996 // imm6 = xxxxxx 3997 } 3998 multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3999 string OpcodeStr> { 4000 // 64-bit vector types. 4001 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4002 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> { 4003 let Inst{21-19} = 0b001; // imm6 = 001xxx 4004 } 4005 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4006 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> { 4007 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4008 } 4009 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4010 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> { 4011 let Inst{21} = 0b1; // imm6 = 1xxxxx 4012 } 4013 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4014 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>; 4015 // imm6 = xxxxxx 4016 4017 // 128-bit vector types. 4018 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4019 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> { 4020 let Inst{21-19} = 0b001; // imm6 = 001xxx 4021 } 4022 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4023 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> { 4024 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4025 } 4026 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4027 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> { 4028 let Inst{21} = 0b1; // imm6 = 1xxxxx 4029 } 4030 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4031 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>; 4032 // imm6 = xxxxxx 4033 } 4034 4035 // Neon Shift Long operations, 4036 // element sizes of 8, 16, 32 bits: 4037 multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4038 bit op4, string OpcodeStr, string Dt, 4039 SDPatternOperator OpNode> { 4040 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4041 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 4042 let Inst{21-19} = 0b001; // imm6 = 001xxx 4043 } 4044 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4045 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 4046 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4047 } 4048 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4049 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 4050 let Inst{21} = 0b1; // imm6 = 1xxxxx 4051 } 4052 } 4053 4054 // Neon Shift Narrow operations, 4055 // element sizes of 16, 32, 64 bits: 4056 multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4057 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 4058 SDPatternOperator OpNode> { 4059 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4060 OpcodeStr, !strconcat(Dt, "16"), 4061 v8i8, v8i16, shr_imm8, OpNode> { 4062 let Inst{21-19} = 0b001; // imm6 = 001xxx 4063 } 4064 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4065 OpcodeStr, !strconcat(Dt, "32"), 4066 v4i16, v4i32, shr_imm16, OpNode> { 4067 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4068 } 4069 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4070 OpcodeStr, !strconcat(Dt, "64"), 4071 v2i32, v2i64, shr_imm32, OpNode> { 4072 let Inst{21} = 0b1; // imm6 = 1xxxxx 4073 } 4074 } 4075 4076 //===----------------------------------------------------------------------===// 4077 // Instruction Definitions. 4078 //===----------------------------------------------------------------------===// 4079 4080 // Vector Add Operations. 4081 4082 // VADD : Vector Add (integer and floating-point) 4083 defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 4084 add, 1>; 4085 def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 4086 v2f32, v2f32, fadd, 1>; 4087 def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 4088 v4f32, v4f32, fadd, 1>; 4089 // VADDL : Vector Add Long (Q = D + D) 4090 defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4091 "vaddl", "s", add, sext, 1>; 4092 defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4093 "vaddl", "u", add, zext, 1>; 4094 // VADDW : Vector Add Wide (Q = Q + D) 4095 defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 4096 defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 4097 // VHADD : Vector Halving Add 4098 defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 4099 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4100 "vhadd", "s", int_arm_neon_vhadds, 1>; 4101 defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 4102 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4103 "vhadd", "u", int_arm_neon_vhaddu, 1>; 4104 // VRHADD : Vector Rounding Halving Add 4105 defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 4106 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4107 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 4108 defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 4109 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4110 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 4111 // VQADD : Vector Saturating Add 4112 defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 4113 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4114 "vqadd", "s", int_arm_neon_vqadds, 1>; 4115 defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 4116 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4117 "vqadd", "u", int_arm_neon_vqaddu, 1>; 4118 // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 4119 defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; 4120 // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 4121 defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 4122 int_arm_neon_vraddhn, 1>; 4123 4124 def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4125 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; 4126 def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4127 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; 4128 def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4129 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; 4130 4131 // Vector Multiply Operations. 4132 4133 // VMUL : Vector Multiply (integer, polynomial and floating-point) 4134 defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 4135 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 4136 def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 4137 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 4138 def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 4139 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 4140 def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 4141 v2f32, v2f32, fmul, 1>; 4142 def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 4143 v4f32, v4f32, fmul, 1>; 4144 defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 4145 def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 4146 def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 4147 v2f32, fmul>; 4148 4149 def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 4150 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 4151 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 4152 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4153 (DSubReg_i16_reg imm:$lane))), 4154 (SubReg_i16_lane imm:$lane)))>; 4155 def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 4156 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 4157 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4158 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4159 (DSubReg_i32_reg imm:$lane))), 4160 (SubReg_i32_lane imm:$lane)))>; 4161 def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4162 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 4163 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4164 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4165 (DSubReg_i32_reg imm:$lane))), 4166 (SubReg_i32_lane imm:$lane)))>; 4167 4168 4169 def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), 4170 (VMULslfd DPR:$Rn, 4171 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4172 (i32 0))>; 4173 def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), 4174 (VMULslfq QPR:$Rn, 4175 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4176 (i32 0))>; 4177 4178 4179 // VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4180 defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4181 IIC_VMULi16Q, IIC_VMULi32Q, 4182 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4183 defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4184 IIC_VMULi16Q, IIC_VMULi32Q, 4185 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4186 def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4187 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4188 imm:$lane)))), 4189 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4190 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4191 (DSubReg_i16_reg imm:$lane))), 4192 (SubReg_i16_lane imm:$lane)))>; 4193 def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4194 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4195 imm:$lane)))), 4196 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4197 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4198 (DSubReg_i32_reg imm:$lane))), 4199 (SubReg_i32_lane imm:$lane)))>; 4200 4201 // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4202 defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4203 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4204 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4205 defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4206 IIC_VMULi16Q, IIC_VMULi32Q, 4207 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4208 def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4209 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4210 imm:$lane)))), 4211 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4212 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4213 (DSubReg_i16_reg imm:$lane))), 4214 (SubReg_i16_lane imm:$lane)))>; 4215 def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4216 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4217 imm:$lane)))), 4218 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4219 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4220 (DSubReg_i32_reg imm:$lane))), 4221 (SubReg_i32_lane imm:$lane)))>; 4222 4223 // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4224 let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 4225 DecoderNamespace = "NEONData" in { 4226 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4227 "vmull", "s", NEONvmulls, 1>; 4228 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4229 "vmull", "u", NEONvmullu, 1>; 4230 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4231 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4232 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, 4233 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, 4234 Requires<[HasV8, HasCrypto]>; 4235 } 4236 defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; 4237 defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; 4238 4239 // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4240 defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4241 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4242 defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4243 "vqdmull", "s", int_arm_neon_vqdmull>; 4244 4245 // Vector Multiply-Accumulate and Multiply-Subtract Operations. 4246 4247 // VMLA : Vector Multiply Accumulate (integer and floating-point) 4248 defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4249 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4250 def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4251 v2f32, fmul_su, fadd_mlx>, 4252 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4253 def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4254 v4f32, fmul_su, fadd_mlx>, 4255 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4256 defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4257 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4258 def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4259 v2f32, fmul_su, fadd_mlx>, 4260 Requires<[HasNEON, UseFPVMLx]>; 4261 def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4262 v4f32, v2f32, fmul_su, fadd_mlx>, 4263 Requires<[HasNEON, UseFPVMLx]>; 4264 4265 def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4266 (mul (v8i16 QPR:$src2), 4267 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4268 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4269 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4270 (DSubReg_i16_reg imm:$lane))), 4271 (SubReg_i16_lane imm:$lane)))>; 4272 4273 def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4274 (mul (v4i32 QPR:$src2), 4275 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4276 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4277 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4278 (DSubReg_i32_reg imm:$lane))), 4279 (SubReg_i32_lane imm:$lane)))>; 4280 4281 def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4282 (fmul_su (v4f32 QPR:$src2), 4283 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4284 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4285 (v4f32 QPR:$src2), 4286 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4287 (DSubReg_i32_reg imm:$lane))), 4288 (SubReg_i32_lane imm:$lane)))>, 4289 Requires<[HasNEON, UseFPVMLx]>; 4290 4291 // VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4292 defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4293 "vmlal", "s", NEONvmulls, add>; 4294 defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4295 "vmlal", "u", NEONvmullu, add>; 4296 4297 defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; 4298 defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; 4299 4300 // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4301 defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4302 "vqdmlal", "s", null_frag>; 4303 defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; 4304 4305 def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4306 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4307 (v4i16 DPR:$Vm))))), 4308 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4309 def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4310 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4311 (v2i32 DPR:$Vm))))), 4312 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4313 def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4314 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4315 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4316 imm:$lane)))))), 4317 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4318 def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4319 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4320 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4321 imm:$lane)))))), 4322 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4323 4324 // VMLS : Vector Multiply Subtract (integer and floating-point) 4325 defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4326 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4327 def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4328 v2f32, fmul_su, fsub_mlx>, 4329 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4330 def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4331 v4f32, fmul_su, fsub_mlx>, 4332 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4333 defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4334 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4335 def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4336 v2f32, fmul_su, fsub_mlx>, 4337 Requires<[HasNEON, UseFPVMLx]>; 4338 def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4339 v4f32, v2f32, fmul_su, fsub_mlx>, 4340 Requires<[HasNEON, UseFPVMLx]>; 4341 4342 def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4343 (mul (v8i16 QPR:$src2), 4344 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4345 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4346 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4347 (DSubReg_i16_reg imm:$lane))), 4348 (SubReg_i16_lane imm:$lane)))>; 4349 4350 def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4351 (mul (v4i32 QPR:$src2), 4352 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4353 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4354 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4355 (DSubReg_i32_reg imm:$lane))), 4356 (SubReg_i32_lane imm:$lane)))>; 4357 4358 def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4359 (fmul_su (v4f32 QPR:$src2), 4360 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4361 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4362 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4363 (DSubReg_i32_reg imm:$lane))), 4364 (SubReg_i32_lane imm:$lane)))>, 4365 Requires<[HasNEON, UseFPVMLx]>; 4366 4367 // VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4368 defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4369 "vmlsl", "s", NEONvmulls, sub>; 4370 defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4371 "vmlsl", "u", NEONvmullu, sub>; 4372 4373 defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; 4374 defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; 4375 4376 // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4377 defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4378 "vqdmlsl", "s", null_frag>; 4379 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>; 4380 4381 def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4382 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4383 (v4i16 DPR:$Vm))))), 4384 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4385 def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4386 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4387 (v2i32 DPR:$Vm))))), 4388 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4389 def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4390 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4391 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4392 imm:$lane)))))), 4393 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4394 def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4395 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4396 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4397 imm:$lane)))))), 4398 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4399 4400 // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4401 def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4402 v2f32, fmul_su, fadd_mlx>, 4403 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4404 4405 def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4406 v4f32, fmul_su, fadd_mlx>, 4407 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4408 4409 // Fused Vector Multiply Subtract (floating-point) 4410 def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4411 v2f32, fmul_su, fsub_mlx>, 4412 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4413 def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4414 v4f32, fmul_su, fsub_mlx>, 4415 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4416 4417 // Match @llvm.fma.* intrinsics 4418 def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4419 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4420 Requires<[HasVFP4]>; 4421 def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4422 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4423 Requires<[HasVFP4]>; 4424 def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4425 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4426 Requires<[HasVFP4]>; 4427 def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4428 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4429 Requires<[HasVFP4]>; 4430 4431 // Vector Subtract Operations. 4432 4433 // VSUB : Vector Subtract (integer and floating-point) 4434 defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 4435 "vsub", "i", sub, 0>; 4436 def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 4437 v2f32, v2f32, fsub, 0>; 4438 def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 4439 v4f32, v4f32, fsub, 0>; 4440 // VSUBL : Vector Subtract Long (Q = D - D) 4441 defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4442 "vsubl", "s", sub, sext, 0>; 4443 defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4444 "vsubl", "u", sub, zext, 0>; 4445 // VSUBW : Vector Subtract Wide (Q = Q - D) 4446 defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 4447 defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 4448 // VHSUB : Vector Halving Subtract 4449 defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 4450 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4451 "vhsub", "s", int_arm_neon_vhsubs, 0>; 4452 defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 4453 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4454 "vhsub", "u", int_arm_neon_vhsubu, 0>; 4455 // VQSUB : Vector Saturing Subtract 4456 defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 4457 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4458 "vqsub", "s", int_arm_neon_vqsubs, 0>; 4459 defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 4460 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4461 "vqsub", "u", int_arm_neon_vqsubu, 0>; 4462 // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 4463 defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; 4464 // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 4465 defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 4466 int_arm_neon_vrsubhn, 0>; 4467 4468 def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4469 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; 4470 def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4471 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; 4472 def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4473 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; 4474 4475 // Vector Comparisons. 4476 4477 // VCEQ : Vector Compare Equal 4478 defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4479 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 4480 def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 4481 NEONvceq, 1>; 4482 def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 4483 NEONvceq, 1>; 4484 4485 let TwoOperandAliasConstraint = "$Vm = $Vd" in 4486 defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 4487 "$Vd, $Vm, #0", NEONvceqz>; 4488 4489 // VCGE : Vector Compare Greater Than or Equal 4490 defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4491 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 4492 defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4493 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 4494 def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 4495 NEONvcge, 0>; 4496 def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 4497 NEONvcge, 0>; 4498 4499 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4500 defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 4501 "$Vd, $Vm, #0", NEONvcgez>; 4502 defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 4503 "$Vd, $Vm, #0", NEONvclez>; 4504 } 4505 4506 // VCGT : Vector Compare Greater Than 4507 defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4508 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 4509 defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4510 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 4511 def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 4512 NEONvcgt, 0>; 4513 def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 4514 NEONvcgt, 0>; 4515 4516 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4517 defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 4518 "$Vd, $Vm, #0", NEONvcgtz>; 4519 defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 4520 "$Vd, $Vm, #0", NEONvcltz>; 4521 } 4522 4523 // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 4524 def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 4525 "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; 4526 def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 4527 "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; 4528 // VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 4529 def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 4530 "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; 4531 def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 4532 "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; 4533 // VTST : Vector Test Bits 4534 defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 4535 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 4536 4537 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 4538 (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4539 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 4540 (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4541 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 4542 (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4543 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 4544 (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4545 4546 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 4547 (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4548 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 4549 (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4550 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 4551 (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4552 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 4553 (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4554 4555 // Vector Bitwise Operations. 4556 4557 def vnotd : PatFrag<(ops node:$in), 4558 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 4559 def vnotq : PatFrag<(ops node:$in), 4560 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 4561 4562 4563 // VAND : Vector Bitwise AND 4564 def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 4565 v2i32, v2i32, and, 1>; 4566 def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 4567 v4i32, v4i32, and, 1>; 4568 4569 // VEOR : Vector Bitwise Exclusive OR 4570 def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 4571 v2i32, v2i32, xor, 1>; 4572 def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 4573 v4i32, v4i32, xor, 1>; 4574 4575 // VORR : Vector Bitwise OR 4576 def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 4577 v2i32, v2i32, or, 1>; 4578 def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 4579 v4i32, v4i32, or, 1>; 4580 4581 def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 4582 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4583 IIC_VMOVImm, 4584 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4585 [(set DPR:$Vd, 4586 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4587 let Inst{9} = SIMM{9}; 4588 } 4589 4590 def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 4591 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4592 IIC_VMOVImm, 4593 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4594 [(set DPR:$Vd, 4595 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4596 let Inst{10-9} = SIMM{10-9}; 4597 } 4598 4599 def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 4600 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4601 IIC_VMOVImm, 4602 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4603 [(set QPR:$Vd, 4604 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4605 let Inst{9} = SIMM{9}; 4606 } 4607 4608 def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 4609 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4610 IIC_VMOVImm, 4611 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4612 [(set QPR:$Vd, 4613 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4614 let Inst{10-9} = SIMM{10-9}; 4615 } 4616 4617 4618 // VBIC : Vector Bitwise Bit Clear (AND NOT) 4619 let TwoOperandAliasConstraint = "$Vn = $Vd" in { 4620 def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4621 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4622 "vbic", "$Vd, $Vn, $Vm", "", 4623 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 4624 (vnotd DPR:$Vm))))]>; 4625 def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4626 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4627 "vbic", "$Vd, $Vn, $Vm", "", 4628 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 4629 (vnotq QPR:$Vm))))]>; 4630 } 4631 4632 def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 4633 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4634 IIC_VMOVImm, 4635 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4636 [(set DPR:$Vd, 4637 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4638 let Inst{9} = SIMM{9}; 4639 } 4640 4641 def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 4642 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4643 IIC_VMOVImm, 4644 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4645 [(set DPR:$Vd, 4646 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4647 let Inst{10-9} = SIMM{10-9}; 4648 } 4649 4650 def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 4651 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4652 IIC_VMOVImm, 4653 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4654 [(set QPR:$Vd, 4655 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4656 let Inst{9} = SIMM{9}; 4657 } 4658 4659 def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 4660 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4661 IIC_VMOVImm, 4662 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4663 [(set QPR:$Vd, 4664 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4665 let Inst{10-9} = SIMM{10-9}; 4666 } 4667 4668 // VORN : Vector Bitwise OR NOT 4669 def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 4670 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4671 "vorn", "$Vd, $Vn, $Vm", "", 4672 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 4673 (vnotd DPR:$Vm))))]>; 4674 def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 4675 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4676 "vorn", "$Vd, $Vn, $Vm", "", 4677 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 4678 (vnotq QPR:$Vm))))]>; 4679 4680 // VMVN : Vector Bitwise NOT (Immediate) 4681 4682 let isReMaterializable = 1 in { 4683 4684 def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 4685 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4686 "vmvn", "i16", "$Vd, $SIMM", "", 4687 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { 4688 let Inst{9} = SIMM{9}; 4689 } 4690 4691 def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 4692 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4693 "vmvn", "i16", "$Vd, $SIMM", "", 4694 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { 4695 let Inst{9} = SIMM{9}; 4696 } 4697 4698 def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 4699 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4700 "vmvn", "i32", "$Vd, $SIMM", "", 4701 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { 4702 let Inst{11-8} = SIMM{11-8}; 4703 } 4704 4705 def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 4706 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4707 "vmvn", "i32", "$Vd, $SIMM", "", 4708 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { 4709 let Inst{11-8} = SIMM{11-8}; 4710 } 4711 } 4712 4713 // VMVN : Vector Bitwise NOT 4714 def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 4715 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 4716 "vmvn", "$Vd, $Vm", "", 4717 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 4718 def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 4719 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 4720 "vmvn", "$Vd, $Vm", "", 4721 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 4722 def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 4723 def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 4724 4725 // VBSL : Vector Bitwise Select 4726 def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4727 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4728 N3RegFrm, IIC_VCNTiD, 4729 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4730 [(set DPR:$Vd, 4731 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 4732 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 4733 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 4734 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4735 Requires<[HasNEON]>; 4736 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 4737 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 4738 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4739 Requires<[HasNEON]>; 4740 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 4741 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 4742 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4743 Requires<[HasNEON]>; 4744 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 4745 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 4746 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4747 Requires<[HasNEON]>; 4748 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 4749 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 4750 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4751 Requires<[HasNEON]>; 4752 4753 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 4754 (and DPR:$Vm, (vnotd DPR:$Vd)))), 4755 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 4756 Requires<[HasNEON]>; 4757 4758 def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 4759 (and DPR:$Vm, (vnotd DPR:$Vd)))), 4760 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 4761 Requires<[HasNEON]>; 4762 4763 def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4764 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4765 N3RegFrm, IIC_VCNTiQ, 4766 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4767 [(set QPR:$Vd, 4768 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 4769 4770 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 4771 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 4772 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4773 Requires<[HasNEON]>; 4774 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 4775 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 4776 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4777 Requires<[HasNEON]>; 4778 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 4779 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 4780 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4781 Requires<[HasNEON]>; 4782 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 4783 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 4784 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4785 Requires<[HasNEON]>; 4786 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 4787 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 4788 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4789 Requires<[HasNEON]>; 4790 4791 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 4792 (and QPR:$Vm, (vnotq QPR:$Vd)))), 4793 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 4794 Requires<[HasNEON]>; 4795 def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 4796 (and QPR:$Vm, (vnotq QPR:$Vd)))), 4797 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 4798 Requires<[HasNEON]>; 4799 4800 // VBIF : Vector Bitwise Insert if False 4801 // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 4802 // FIXME: This instruction's encoding MAY NOT BE correct. 4803 def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 4804 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4805 N3RegFrm, IIC_VBINiD, 4806 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4807 []>; 4808 def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 4809 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4810 N3RegFrm, IIC_VBINiQ, 4811 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4812 []>; 4813 4814 // VBIT : Vector Bitwise Insert if True 4815 // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 4816 // FIXME: This instruction's encoding MAY NOT BE correct. 4817 def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 4818 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4819 N3RegFrm, IIC_VBINiD, 4820 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4821 []>; 4822 def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 4823 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4824 N3RegFrm, IIC_VBINiQ, 4825 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4826 []>; 4827 4828 // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 4829 // for equivalent operations with different register constraints; it just 4830 // inserts copies. 4831 4832 // Vector Absolute Differences. 4833 4834 // VABD : Vector Absolute Difference 4835 defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 4836 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4837 "vabd", "s", int_arm_neon_vabds, 1>; 4838 defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 4839 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4840 "vabd", "u", int_arm_neon_vabdu, 1>; 4841 def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 4842 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 4843 def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 4844 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 4845 4846 // VABDL : Vector Absolute Difference Long (Q = | D - D |) 4847 defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 4848 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 4849 defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 4850 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 4851 4852 // VABA : Vector Absolute Difference and Accumulate 4853 defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4854 "vaba", "s", int_arm_neon_vabds, add>; 4855 defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4856 "vaba", "u", int_arm_neon_vabdu, add>; 4857 4858 // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 4859 defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 4860 "vabal", "s", int_arm_neon_vabds, zext, add>; 4861 defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 4862 "vabal", "u", int_arm_neon_vabdu, zext, add>; 4863 4864 // Vector Maximum and Minimum. 4865 4866 // VMAX : Vector Maximum 4867 defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 4868 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4869 "vmax", "s", int_arm_neon_vmaxs, 1>; 4870 defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 4871 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4872 "vmax", "u", int_arm_neon_vmaxu, 1>; 4873 def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 4874 "vmax", "f32", 4875 v2f32, v2f32, int_arm_neon_vmaxs, 1>; 4876 def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4877 "vmax", "f32", 4878 v4f32, v4f32, int_arm_neon_vmaxs, 1>; 4879 4880 // VMAXNM 4881 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 4882 def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, 4883 N3RegFrm, NoItinerary, "vmaxnm", "f32", 4884 v2f32, v2f32, int_arm_neon_vmaxnm, 1>, 4885 Requires<[HasV8, HasNEON]>; 4886 def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, 4887 N3RegFrm, NoItinerary, "vmaxnm", "f32", 4888 v4f32, v4f32, int_arm_neon_vmaxnm, 1>, 4889 Requires<[HasV8, HasNEON]>; 4890 } 4891 4892 // VMIN : Vector Minimum 4893 defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 4894 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4895 "vmin", "s", int_arm_neon_vmins, 1>; 4896 defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 4897 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4898 "vmin", "u", int_arm_neon_vminu, 1>; 4899 def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 4900 "vmin", "f32", 4901 v2f32, v2f32, int_arm_neon_vmins, 1>; 4902 def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4903 "vmin", "f32", 4904 v4f32, v4f32, int_arm_neon_vmins, 1>; 4905 4906 // VMINNM 4907 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 4908 def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, 4909 N3RegFrm, NoItinerary, "vminnm", "f32", 4910 v2f32, v2f32, int_arm_neon_vminnm, 1>, 4911 Requires<[HasV8, HasNEON]>; 4912 def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, 4913 N3RegFrm, NoItinerary, "vminnm", "f32", 4914 v4f32, v4f32, int_arm_neon_vminnm, 1>, 4915 Requires<[HasV8, HasNEON]>; 4916 } 4917 4918 // Vector Pairwise Operations. 4919 4920 // VPADD : Vector Pairwise Add 4921 def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4922 "vpadd", "i8", 4923 v8i8, v8i8, int_arm_neon_vpadd, 0>; 4924 def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4925 "vpadd", "i16", 4926 v4i16, v4i16, int_arm_neon_vpadd, 0>; 4927 def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4928 "vpadd", "i32", 4929 v2i32, v2i32, int_arm_neon_vpadd, 0>; 4930 def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 4931 IIC_VPBIND, "vpadd", "f32", 4932 v2f32, v2f32, int_arm_neon_vpadd, 0>; 4933 4934 // VPADDL : Vector Pairwise Add Long 4935 defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 4936 int_arm_neon_vpaddls>; 4937 defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 4938 int_arm_neon_vpaddlu>; 4939 4940 // VPADAL : Vector Pairwise Add and Accumulate Long 4941 defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 4942 int_arm_neon_vpadals>; 4943 defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 4944 int_arm_neon_vpadalu>; 4945 4946 // VPMAX : Vector Pairwise Maximum 4947 def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4948 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 4949 def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4950 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 4951 def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4952 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 4953 def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4954 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 4955 def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4956 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 4957 def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4958 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 4959 def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 4960 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 4961 4962 // VPMIN : Vector Pairwise Minimum 4963 def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4964 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 4965 def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4966 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 4967 def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4968 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 4969 def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4970 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 4971 def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4972 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 4973 def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4974 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 4975 def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 4976 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 4977 4978 // Vector Reciprocal and Reciprocal Square Root Estimate and Step. 4979 4980 // VRECPE : Vector Reciprocal Estimate 4981 def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4982 IIC_VUNAD, "vrecpe", "u32", 4983 v2i32, v2i32, int_arm_neon_vrecpe>; 4984 def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4985 IIC_VUNAQ, "vrecpe", "u32", 4986 v4i32, v4i32, int_arm_neon_vrecpe>; 4987 def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4988 IIC_VUNAD, "vrecpe", "f32", 4989 v2f32, v2f32, int_arm_neon_vrecpe>; 4990 def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4991 IIC_VUNAQ, "vrecpe", "f32", 4992 v4f32, v4f32, int_arm_neon_vrecpe>; 4993 4994 // VRECPS : Vector Reciprocal Step 4995 def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4996 IIC_VRECSD, "vrecps", "f32", 4997 v2f32, v2f32, int_arm_neon_vrecps, 1>; 4998 def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4999 IIC_VRECSQ, "vrecps", "f32", 5000 v4f32, v4f32, int_arm_neon_vrecps, 1>; 5001 5002 // VRSQRTE : Vector Reciprocal Square Root Estimate 5003 def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5004 IIC_VUNAD, "vrsqrte", "u32", 5005 v2i32, v2i32, int_arm_neon_vrsqrte>; 5006 def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5007 IIC_VUNAQ, "vrsqrte", "u32", 5008 v4i32, v4i32, int_arm_neon_vrsqrte>; 5009 def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5010 IIC_VUNAD, "vrsqrte", "f32", 5011 v2f32, v2f32, int_arm_neon_vrsqrte>; 5012 def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5013 IIC_VUNAQ, "vrsqrte", "f32", 5014 v4f32, v4f32, int_arm_neon_vrsqrte>; 5015 5016 // VRSQRTS : Vector Reciprocal Square Root Step 5017 def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5018 IIC_VRECSD, "vrsqrts", "f32", 5019 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 5020 def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5021 IIC_VRECSQ, "vrsqrts", "f32", 5022 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 5023 5024 // Vector Shifts. 5025 5026 // VSHL : Vector Shift 5027 defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 5028 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5029 "vshl", "s", int_arm_neon_vshifts>; 5030 defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 5031 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5032 "vshl", "u", int_arm_neon_vshiftu>; 5033 5034 // VSHL : Vector Shift Left (Immediate) 5035 defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; 5036 5037 // VSHR : Vector Shift Right (Immediate) 5038 defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", 5039 NEONvshrs>; 5040 defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", 5041 NEONvshru>; 5042 5043 // VSHLL : Vector Shift Left Long 5044 defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", 5045 PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (sext node:$LHS), node:$RHS)>>; 5046 defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", 5047 PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (zext node:$LHS), node:$RHS)>>; 5048 5049 // VSHLL : Vector Shift Left Long (with maximum shift count) 5050 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 5051 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 5052 ValueType OpTy, Operand ImmTy> 5053 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 5054 ResTy, OpTy, ImmTy, null_frag> { 5055 let Inst{21-16} = op21_16; 5056 let DecoderMethod = "DecodeVSHLMaxInstruction"; 5057 } 5058 def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 5059 v8i16, v8i8, imm8>; 5060 def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 5061 v4i32, v4i16, imm16>; 5062 def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 5063 v2i64, v2i32, imm32>; 5064 5065 def : Pat<(v8i16 (NEONvshl (zext (v8i8 DPR:$Rn)), (i32 8))), 5066 (VSHLLi8 DPR:$Rn, 8)>; 5067 def : Pat<(v4i32 (NEONvshl (zext (v4i16 DPR:$Rn)), (i32 16))), 5068 (VSHLLi16 DPR:$Rn, 16)>; 5069 def : Pat<(v2i64 (NEONvshl (zext (v2i32 DPR:$Rn)), (i32 32))), 5070 (VSHLLi32 DPR:$Rn, 32)>; 5071 def : Pat<(v8i16 (NEONvshl (sext (v8i8 DPR:$Rn)), (i32 8))), 5072 (VSHLLi8 DPR:$Rn, 8)>; 5073 def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))), 5074 (VSHLLi16 DPR:$Rn, 16)>; 5075 def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))), 5076 (VSHLLi32 DPR:$Rn, 32)>; 5077 5078 // VSHRN : Vector Shift Right and Narrow 5079 defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 5080 PatFrag<(ops node:$Rn, node:$amt), 5081 (trunc (NEONvshrs node:$Rn, node:$amt))>>; 5082 5083 def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))), 5084 (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>; 5085 def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))), 5086 (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>; 5087 def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))), 5088 (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>; 5089 5090 // VRSHL : Vector Rounding Shift 5091 defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 5092 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5093 "vrshl", "s", int_arm_neon_vrshifts>; 5094 defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 5095 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5096 "vrshl", "u", int_arm_neon_vrshiftu>; 5097 // VRSHR : Vector Rounding Shift Right 5098 defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", 5099 NEONvrshrs>; 5100 defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", 5101 NEONvrshru>; 5102 5103 // VRSHRN : Vector Rounding Shift Right and Narrow 5104 defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 5105 NEONvrshrn>; 5106 5107 // VQSHL : Vector Saturating Shift 5108 defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 5109 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5110 "vqshl", "s", int_arm_neon_vqshifts>; 5111 defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 5112 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5113 "vqshl", "u", int_arm_neon_vqshiftu>; 5114 // VQSHL : Vector Saturating Shift Left (Immediate) 5115 defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; 5116 defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; 5117 5118 // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 5119 defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; 5120 5121 // VQSHRN : Vector Saturating Shift Right and Narrow 5122 defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 5123 NEONvqshrns>; 5124 defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 5125 NEONvqshrnu>; 5126 5127 // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 5128 defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 5129 NEONvqshrnsu>; 5130 5131 // VQRSHL : Vector Saturating Rounding Shift 5132 defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 5133 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5134 "vqrshl", "s", int_arm_neon_vqrshifts>; 5135 defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 5136 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5137 "vqrshl", "u", int_arm_neon_vqrshiftu>; 5138 5139 // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 5140 defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 5141 NEONvqrshrns>; 5142 defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 5143 NEONvqrshrnu>; 5144 5145 // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 5146 defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 5147 NEONvqrshrnsu>; 5148 5149 // VSRA : Vector Shift Right and Accumulate 5150 defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 5151 defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 5152 // VRSRA : Vector Rounding Shift Right and Accumulate 5153 defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 5154 defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 5155 5156 // VSLI : Vector Shift Left and Insert 5157 defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 5158 5159 // VSRI : Vector Shift Right and Insert 5160 defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 5161 5162 // Vector Absolute and Saturating Absolute. 5163 5164 // VABS : Vector Absolute Value 5165 defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 5166 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", 5167 int_arm_neon_vabs>; 5168 def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 5169 "vabs", "f32", 5170 v2f32, v2f32, fabs>; 5171 def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 5172 "vabs", "f32", 5173 v4f32, v4f32, fabs>; 5174 5175 def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))), 5176 (v2i32 (bitconvert (v8i8 (add DPR:$src, 5177 (NEONvshrs DPR:$src, (i32 7))))))), 5178 (VABSv8i8 DPR:$src)>; 5179 def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))), 5180 (v2i32 (bitconvert (v4i16 (add DPR:$src, 5181 (NEONvshrs DPR:$src, (i32 15))))))), 5182 (VABSv4i16 DPR:$src)>; 5183 def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))), 5184 (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))), 5185 (VABSv2i32 DPR:$src)>; 5186 def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))), 5187 (v4i32 (bitconvert (v16i8 (add QPR:$src, 5188 (NEONvshrs QPR:$src, (i32 7))))))), 5189 (VABSv16i8 QPR:$src)>; 5190 def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))), 5191 (v4i32 (bitconvert (v8i16 (add QPR:$src, 5192 (NEONvshrs QPR:$src, (i32 15))))))), 5193 (VABSv8i16 QPR:$src)>; 5194 def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))), 5195 (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))), 5196 (VABSv4i32 QPR:$src)>; 5197 5198 // VQABS : Vector Saturating Absolute Value 5199 defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 5200 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 5201 int_arm_neon_vqabs>; 5202 5203 // Vector Negate. 5204 5205 def vnegd : PatFrag<(ops node:$in), 5206 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 5207 def vnegq : PatFrag<(ops node:$in), 5208 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 5209 5210 class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5211 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 5212 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 5213 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 5214 class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5215 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 5216 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 5217 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 5218 5219 // VNEG : Vector Negate (integer) 5220 def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 5221 def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 5222 def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 5223 def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 5224 def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 5225 def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 5226 5227 // VNEG : Vector Negate (floating-point) 5228 def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 5229 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 5230 "vneg", "f32", "$Vd, $Vm", "", 5231 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 5232 def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 5233 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 5234 "vneg", "f32", "$Vd, $Vm", "", 5235 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 5236 5237 def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 5238 def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 5239 def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 5240 def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 5241 def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 5242 def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 5243 5244 // VQNEG : Vector Saturating Negate 5245 defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 5246 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 5247 int_arm_neon_vqneg>; 5248 5249 // Vector Bit Counting Operations. 5250 5251 // VCLS : Vector Count Leading Sign Bits 5252 defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 5253 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 5254 int_arm_neon_vcls>; 5255 // VCLZ : Vector Count Leading Zeros 5256 defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 5257 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 5258 ctlz>; 5259 // VCNT : Vector Count One Bits 5260 def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5261 IIC_VCNTiD, "vcnt", "8", 5262 v8i8, v8i8, ctpop>; 5263 def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5264 IIC_VCNTiQ, "vcnt", "8", 5265 v16i8, v16i8, ctpop>; 5266 5267 // Vector Swap 5268 def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 5269 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 5270 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5271 []>; 5272 def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 5273 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 5274 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5275 []>; 5276 5277 // Vector Move Operations. 5278 5279 // VMOV : Vector Move (Register) 5280 def : NEONInstAlias<"vmov${p} $Vd, $Vm", 5281 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 5282 def : NEONInstAlias<"vmov${p} $Vd, $Vm", 5283 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 5284 5285 // VMOV : Vector Move (Immediate) 5286 5287 let isReMaterializable = 1 in { 5288 def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 5289 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5290 "vmov", "i8", "$Vd, $SIMM", "", 5291 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; 5292 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 5293 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5294 "vmov", "i8", "$Vd, $SIMM", "", 5295 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; 5296 5297 def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 5298 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5299 "vmov", "i16", "$Vd, $SIMM", "", 5300 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { 5301 let Inst{9} = SIMM{9}; 5302 } 5303 5304 def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 5305 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5306 "vmov", "i16", "$Vd, $SIMM", "", 5307 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { 5308 let Inst{9} = SIMM{9}; 5309 } 5310 5311 def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 5312 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5313 "vmov", "i32", "$Vd, $SIMM", "", 5314 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { 5315 let Inst{11-8} = SIMM{11-8}; 5316 } 5317 5318 def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 5319 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5320 "vmov", "i32", "$Vd, $SIMM", "", 5321 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { 5322 let Inst{11-8} = SIMM{11-8}; 5323 } 5324 5325 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 5326 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5327 "vmov", "i64", "$Vd, $SIMM", "", 5328 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; 5329 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 5330 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5331 "vmov", "i64", "$Vd, $SIMM", "", 5332 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; 5333 5334 def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 5335 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5336 "vmov", "f32", "$Vd, $SIMM", "", 5337 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; 5338 def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 5339 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5340 "vmov", "f32", "$Vd, $SIMM", "", 5341 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; 5342 } // isReMaterializable 5343 5344 // Add support for bytes replication feature, so it could be GAS compatible. 5345 // E.g. instructions below: 5346 // "vmov.i32 d0, 0xffffffff" 5347 // "vmov.i32 d0, 0xabababab" 5348 // "vmov.i16 d0, 0xabab" 5349 // are incorrect, but we could deal with such cases. 5350 // For last two instructions, for example, it should emit: 5351 // "vmov.i8 d0, 0xab" 5352 def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm", 5353 (VMOVv8i8 DPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>; 5354 def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm", 5355 (VMOVv8i8 DPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>; 5356 def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm", 5357 (VMOVv16i8 QPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>; 5358 def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm", 5359 (VMOVv16i8 QPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>; 5360 5361 // Also add same support for VMVN instructions. So instruction: 5362 // "vmvn.i32 d0, 0xabababab" 5363 // actually means: 5364 // "vmov.i8 d0, 0x54" 5365 def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm", 5366 (VMOVv8i8 DPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>; 5367 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm", 5368 (VMOVv8i8 DPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>; 5369 def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm", 5370 (VMOVv16i8 QPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>; 5371 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm", 5372 (VMOVv16i8 QPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>; 5373 5374 // On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" 5375 // require zero cycles to execute so they should be used wherever possible for 5376 // setting a register to zero. 5377 5378 // Even without these pseudo-insts we would probably end up with the correct 5379 // instruction, but we could not mark the general ones with "isAsCheapAsAMove" 5380 // since they are sometimes rather expensive (in general). 5381 5382 let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { 5383 def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, 5384 [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))], 5385 (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, 5386 Requires<[HasZCZ]>; 5387 def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, 5388 [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))], 5389 (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, 5390 Requires<[HasZCZ]>; 5391 } 5392 5393 // VMOV : Vector Get Lane (move scalar to ARM core register) 5394 5395 def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 5396 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5397 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 5398 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), 5399 imm:$lane))]> { 5400 let Inst{21} = lane{2}; 5401 let Inst{6-5} = lane{1-0}; 5402 } 5403 def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 5404 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5405 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 5406 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), 5407 imm:$lane))]> { 5408 let Inst{21} = lane{1}; 5409 let Inst{6} = lane{0}; 5410 } 5411 def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 5412 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5413 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 5414 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), 5415 imm:$lane))]> { 5416 let Inst{21} = lane{2}; 5417 let Inst{6-5} = lane{1-0}; 5418 } 5419 def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 5420 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5421 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 5422 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), 5423 imm:$lane))]> { 5424 let Inst{21} = lane{1}; 5425 let Inst{6} = lane{0}; 5426 } 5427 def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 5428 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 5429 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 5430 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 5431 imm:$lane))]>, 5432 Requires<[HasNEON, HasFastVGETLNi32]> { 5433 let Inst{21} = lane{0}; 5434 } 5435 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 5436 def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 5437 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5438 (DSubReg_i8_reg imm:$lane))), 5439 (SubReg_i8_lane imm:$lane))>; 5440 def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 5441 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5442 (DSubReg_i16_reg imm:$lane))), 5443 (SubReg_i16_lane imm:$lane))>; 5444 def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 5445 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5446 (DSubReg_i8_reg imm:$lane))), 5447 (SubReg_i8_lane imm:$lane))>; 5448 def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 5449 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5450 (DSubReg_i16_reg imm:$lane))), 5451 (SubReg_i16_lane imm:$lane))>; 5452 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5453 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 5454 (DSubReg_i32_reg imm:$lane))), 5455 (SubReg_i32_lane imm:$lane))>, 5456 Requires<[HasNEON, HasFastVGETLNi32]>; 5457 def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 5458 (COPY_TO_REGCLASS 5459 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5460 Requires<[HasNEON, HasSlowVGETLNi32]>; 5461 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5462 (COPY_TO_REGCLASS 5463 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5464 Requires<[HasNEON, HasSlowVGETLNi32]>; 5465 def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 5466 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 5467 (SSubReg_f32_reg imm:$src2))>; 5468 def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 5469 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 5470 (SSubReg_f32_reg imm:$src2))>; 5471 //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 5472 // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5473 def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 5474 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5475 5476 5477 // VMOV : Vector Set Lane (move ARM core register to scalar) 5478 5479 let Constraints = "$src1 = $V" in { 5480 def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 5481 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 5482 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 5483 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 5484 GPR:$R, imm:$lane))]> { 5485 let Inst{21} = lane{2}; 5486 let Inst{6-5} = lane{1-0}; 5487 } 5488 def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 5489 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 5490 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 5491 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 5492 GPR:$R, imm:$lane))]> { 5493 let Inst{21} = lane{1}; 5494 let Inst{6} = lane{0}; 5495 } 5496 def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 5497 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 5498 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 5499 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 5500 GPR:$R, imm:$lane))]> { 5501 let Inst{21} = lane{0}; 5502 } 5503 } 5504 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 5505 (v16i8 (INSERT_SUBREG QPR:$src1, 5506 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 5507 (DSubReg_i8_reg imm:$lane))), 5508 GPR:$src2, (SubReg_i8_lane imm:$lane))), 5509 (DSubReg_i8_reg imm:$lane)))>; 5510 def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 5511 (v8i16 (INSERT_SUBREG QPR:$src1, 5512 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 5513 (DSubReg_i16_reg imm:$lane))), 5514 GPR:$src2, (SubReg_i16_lane imm:$lane))), 5515 (DSubReg_i16_reg imm:$lane)))>; 5516 def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 5517 (v4i32 (INSERT_SUBREG QPR:$src1, 5518 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 5519 (DSubReg_i32_reg imm:$lane))), 5520 GPR:$src2, (SubReg_i32_lane imm:$lane))), 5521 (DSubReg_i32_reg imm:$lane)))>; 5522 5523 def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 5524 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 5525 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5526 def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 5527 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 5528 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5529 5530 //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5531 // (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5532 def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5533 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5534 5535 def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 5536 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5537 def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 5538 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 5539 def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 5540 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5541 5542 def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 5543 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5544 def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 5545 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5546 def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 5547 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5548 5549 def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 5550 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 5551 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5552 dsub_0)>; 5553 def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 5554 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 5555 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5556 dsub_0)>; 5557 def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 5558 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 5559 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5560 dsub_0)>; 5561 5562 // VDUP : Vector Duplicate (from ARM core register to all elements) 5563 5564 class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5565 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 5566 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5567 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5568 class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5569 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 5570 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5571 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5572 5573 def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 5574 def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 5575 def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 5576 Requires<[HasNEON, HasFastVDUP32]>; 5577 def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 5578 def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 5579 def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 5580 5581 // NEONvdup patterns for uarchs with fast VDUP.32. 5582 def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 5583 Requires<[HasNEON,HasFastVDUP32]>; 5584 def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; 5585 5586 // NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 5587 def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 5588 Requires<[HasNEON,HasSlowVDUP32]>; 5589 def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 5590 Requires<[HasNEON,HasSlowVDUP32]>; 5591 5592 // VDUP : Vector Duplicate Lane (from scalar to all elements) 5593 5594 class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 5595 ValueType Ty, Operand IdxTy> 5596 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 5597 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 5598 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>; 5599 5600 class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 5601 ValueType ResTy, ValueType OpTy, Operand IdxTy> 5602 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 5603 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 5604 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm), 5605 VectorIndex32:$lane)))]>; 5606 5607 // Inst{19-16} is partially specified depending on the element size. 5608 5609 def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 5610 bits<3> lane; 5611 let Inst{19-17} = lane{2-0}; 5612 } 5613 def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 5614 bits<2> lane; 5615 let Inst{19-18} = lane{1-0}; 5616 } 5617 def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 5618 bits<1> lane; 5619 let Inst{19} = lane{0}; 5620 } 5621 def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 5622 bits<3> lane; 5623 let Inst{19-17} = lane{2-0}; 5624 } 5625 def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 5626 bits<2> lane; 5627 let Inst{19-18} = lane{1-0}; 5628 } 5629 def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 5630 bits<1> lane; 5631 let Inst{19} = lane{0}; 5632 } 5633 5634 def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 5635 (VDUPLN32d DPR:$Vm, imm:$lane)>; 5636 5637 def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 5638 (VDUPLN32q DPR:$Vm, imm:$lane)>; 5639 5640 def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 5641 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 5642 (DSubReg_i8_reg imm:$lane))), 5643 (SubReg_i8_lane imm:$lane)))>; 5644 def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 5645 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 5646 (DSubReg_i16_reg imm:$lane))), 5647 (SubReg_i16_lane imm:$lane)))>; 5648 def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 5649 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 5650 (DSubReg_i32_reg imm:$lane))), 5651 (SubReg_i32_lane imm:$lane)))>; 5652 def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 5653 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 5654 (DSubReg_i32_reg imm:$lane))), 5655 (SubReg_i32_lane imm:$lane)))>; 5656 5657 def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))), 5658 (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 5659 SPR:$src, ssub_0), (i32 0)))>; 5660 def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))), 5661 (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 5662 SPR:$src, ssub_0), (i32 0)))>; 5663 5664 // VMOVN : Vector Narrowing Move 5665 defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 5666 "vmovn", "i", trunc>; 5667 // VQMOVN : Vector Saturating Narrowing Move 5668 defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 5669 "vqmovn", "s", int_arm_neon_vqmovns>; 5670 defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 5671 "vqmovn", "u", int_arm_neon_vqmovnu>; 5672 defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 5673 "vqmovun", "s", int_arm_neon_vqmovnsu>; 5674 // VMOVL : Vector Lengthening Move 5675 defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 5676 defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 5677 def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 5678 def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 5679 def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 5680 5681 // Vector Conversions. 5682 5683 // VCVT : Vector Convert Between Floating-Point and Integers 5684 def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 5685 v2i32, v2f32, fp_to_sint>; 5686 def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 5687 v2i32, v2f32, fp_to_uint>; 5688 def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 5689 v2f32, v2i32, sint_to_fp>; 5690 def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 5691 v2f32, v2i32, uint_to_fp>; 5692 5693 def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 5694 v4i32, v4f32, fp_to_sint>; 5695 def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 5696 v4i32, v4f32, fp_to_uint>; 5697 def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 5698 v4f32, v4i32, sint_to_fp>; 5699 def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 5700 v4f32, v4i32, uint_to_fp>; 5701 5702 // VCVT{A, N, P, M} 5703 multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, 5704 SDPatternOperator IntU> { 5705 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5706 def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 5707 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; 5708 def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 5709 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; 5710 def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 5711 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; 5712 def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 5713 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; 5714 } 5715 } 5716 5717 defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; 5718 defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; 5719 defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; 5720 defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; 5721 5722 // VCVT : Vector Convert Between Floating-Point and Fixed-Point. 5723 let DecoderMethod = "DecodeVCVTD" in { 5724 def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 5725 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 5726 def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 5727 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 5728 def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 5729 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 5730 def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 5731 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 5732 } 5733 5734 let DecoderMethod = "DecodeVCVTQ" in { 5735 def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 5736 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 5737 def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 5738 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 5739 def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 5740 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 5741 def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 5742 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 5743 } 5744 5745 def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", 5746 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 5747 def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", 5748 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 5749 def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", 5750 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 5751 def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", 5752 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 5753 5754 def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", 5755 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 5756 def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", 5757 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 5758 def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", 5759 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 5760 def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", 5761 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 5762 5763 5764 // VCVT : Vector Convert Between Half-Precision and Single-Precision. 5765 def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 5766 IIC_VUNAQ, "vcvt", "f16.f32", 5767 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 5768 Requires<[HasNEON, HasFP16]>; 5769 def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 5770 IIC_VUNAQ, "vcvt", "f32.f16", 5771 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 5772 Requires<[HasNEON, HasFP16]>; 5773 5774 // Vector Reverse. 5775 5776 // VREV64 : Vector Reverse elements within 64-bit doublewords 5777 5778 class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5779 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 5780 (ins DPR:$Vm), IIC_VMOVD, 5781 OpcodeStr, Dt, "$Vd, $Vm", "", 5782 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>; 5783 class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5784 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 5785 (ins QPR:$Vm), IIC_VMOVQ, 5786 OpcodeStr, Dt, "$Vd, $Vm", "", 5787 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>; 5788 5789 def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 5790 def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 5791 def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 5792 def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 5793 5794 def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 5795 def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 5796 def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 5797 def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; 5798 5799 // VREV32 : Vector Reverse elements within 32-bit words 5800 5801 class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5802 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 5803 (ins DPR:$Vm), IIC_VMOVD, 5804 OpcodeStr, Dt, "$Vd, $Vm", "", 5805 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>; 5806 class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5807 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 5808 (ins QPR:$Vm), IIC_VMOVQ, 5809 OpcodeStr, Dt, "$Vd, $Vm", "", 5810 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>; 5811 5812 def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 5813 def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 5814 5815 def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 5816 def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 5817 5818 // VREV16 : Vector Reverse elements within 16-bit halfwords 5819 5820 class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5821 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 5822 (ins DPR:$Vm), IIC_VMOVD, 5823 OpcodeStr, Dt, "$Vd, $Vm", "", 5824 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>; 5825 class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5826 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 5827 (ins QPR:$Vm), IIC_VMOVQ, 5828 OpcodeStr, Dt, "$Vd, $Vm", "", 5829 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>; 5830 5831 def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 5832 def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 5833 5834 // Other Vector Shuffles. 5835 5836 // Aligned extractions: really just dropping registers 5837 5838 class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 5839 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 5840 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>; 5841 5842 def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 5843 5844 def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 5845 5846 def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 5847 5848 def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 5849 5850 def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 5851 5852 5853 // VEXT : Vector Extract 5854 5855 5856 // All of these have a two-operand InstAlias. 5857 let TwoOperandAliasConstraint = "$Vn = $Vd" in { 5858 class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 5859 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 5860 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 5861 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 5862 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 5863 (Ty DPR:$Vm), imm:$index)))]> { 5864 bits<3> index; 5865 let Inst{11} = 0b0; 5866 let Inst{10-8} = index{2-0}; 5867 } 5868 5869 class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 5870 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 5871 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 5872 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 5873 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 5874 (Ty QPR:$Vm), imm:$index)))]> { 5875 bits<4> index; 5876 let Inst{11-8} = index{3-0}; 5877 } 5878 } 5879 5880 def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 5881 let Inst{10-8} = index{2-0}; 5882 } 5883 def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 5884 let Inst{10-9} = index{1-0}; 5885 let Inst{8} = 0b0; 5886 } 5887 def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 5888 let Inst{10} = index{0}; 5889 let Inst{9-8} = 0b00; 5890 } 5891 def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), 5892 (v2f32 DPR:$Vm), 5893 (i32 imm:$index))), 5894 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 5895 5896 def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 5897 let Inst{11-8} = index{3-0}; 5898 } 5899 def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 5900 let Inst{11-9} = index{2-0}; 5901 let Inst{8} = 0b0; 5902 } 5903 def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 5904 let Inst{11-10} = index{1-0}; 5905 let Inst{9-8} = 0b00; 5906 } 5907 def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 5908 let Inst{11} = index{0}; 5909 let Inst{10-8} = 0b000; 5910 } 5911 def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), 5912 (v4f32 QPR:$Vm), 5913 (i32 imm:$index))), 5914 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 5915 5916 // VTRN : Vector Transpose 5917 5918 def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 5919 def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 5920 def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 5921 5922 def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 5923 def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 5924 def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 5925 5926 // VUZP : Vector Unzip (Deinterleave) 5927 5928 def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 5929 def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 5930 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 5931 def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 5932 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 5933 5934 def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 5935 def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 5936 def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 5937 5938 // VZIP : Vector Zip (Interleave) 5939 5940 def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 5941 def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 5942 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 5943 def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 5944 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 5945 5946 def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 5947 def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 5948 def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 5949 5950 // Vector Table Lookup and Table Extension. 5951 5952 // VTBL : Vector Table Lookup 5953 let DecoderMethod = "DecodeTBLInstruction" in { 5954 def VTBL1 5955 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 5956 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 5957 "vtbl", "8", "$Vd, $Vn, $Vm", "", 5958 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 5959 let hasExtraSrcRegAllocReq = 1 in { 5960 def VTBL2 5961 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 5962 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 5963 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5964 def VTBL3 5965 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 5966 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 5967 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5968 def VTBL4 5969 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 5970 (ins VecListFourD:$Vn, DPR:$Vm), 5971 NVTBLFrm, IIC_VTB4, 5972 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5973 } // hasExtraSrcRegAllocReq = 1 5974 5975 def VTBL3Pseudo 5976 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 5977 def VTBL4Pseudo 5978 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 5979 5980 // VTBX : Vector Table Extension 5981 def VTBX1 5982 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 5983 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 5984 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 5985 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 5986 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 5987 let hasExtraSrcRegAllocReq = 1 in { 5988 def VTBX2 5989 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 5990 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 5991 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 5992 def VTBX3 5993 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 5994 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 5995 NVTBLFrm, IIC_VTBX3, 5996 "vtbx", "8", "$Vd, $Vn, $Vm", 5997 "$orig = $Vd", []>; 5998 def VTBX4 5999 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 6000 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 6001 "vtbx", "8", "$Vd, $Vn, $Vm", 6002 "$orig = $Vd", []>; 6003 } // hasExtraSrcRegAllocReq = 1 6004 6005 def VTBX3Pseudo 6006 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 6007 IIC_VTBX3, "$orig = $dst", []>; 6008 def VTBX4Pseudo 6009 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 6010 IIC_VTBX4, "$orig = $dst", []>; 6011 } // DecoderMethod = "DecodeTBLInstruction" 6012 6013 // VRINT : Vector Rounding 6014 multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { 6015 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 6016 def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary, 6017 !strconcat("vrint", op), "f32", 6018 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { 6019 let Inst{9-7} = op9_7; 6020 } 6021 def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary, 6022 !strconcat("vrint", op), "f32", 6023 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { 6024 let Inst{9-7} = op9_7; 6025 } 6026 } 6027 6028 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), 6029 (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>; 6030 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), 6031 (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>; 6032 } 6033 6034 defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; 6035 defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; 6036 defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; 6037 defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; 6038 defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; 6039 defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; 6040 6041 // Cryptography instructions 6042 let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 6043 DecoderNamespace = "v8Crypto", hasSideEffects = 0 in { 6044 class AES<string op, bit op7, bit op6, SDPatternOperator Int> 6045 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, 6046 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 6047 Requires<[HasV8, HasCrypto]>; 6048 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int> 6049 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, 6050 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 6051 Requires<[HasV8, HasCrypto]>; 6052 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 6053 SDPatternOperator Int> 6054 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, 6055 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 6056 Requires<[HasV8, HasCrypto]>; 6057 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 6058 SDPatternOperator Int> 6059 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, 6060 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 6061 Requires<[HasV8, HasCrypto]>; 6062 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int> 6063 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary, 6064 !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>, 6065 Requires<[HasV8, HasCrypto]>; 6066 } 6067 6068 def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; 6069 def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; 6070 def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; 6071 def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; 6072 6073 def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>; 6074 def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; 6075 def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; 6076 def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>; 6077 def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>; 6078 def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>; 6079 def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; 6080 def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; 6081 def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; 6082 def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; 6083 6084 def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), 6085 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG 6086 (SHA1H (SUBREG_TO_REG (i64 0), 6087 (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)), 6088 ssub_0)), 6089 ssub_0)), GPR)>; 6090 6091 def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 6092 (SHA1C v4i32:$hash_abcd, 6093 (SUBREG_TO_REG (i64 0), 6094 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 6095 ssub_0), 6096 v4i32:$wk)>; 6097 6098 def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 6099 (SHA1M v4i32:$hash_abcd, 6100 (SUBREG_TO_REG (i64 0), 6101 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 6102 ssub_0), 6103 v4i32:$wk)>; 6104 6105 def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 6106 (SHA1P v4i32:$hash_abcd, 6107 (SUBREG_TO_REG (i64 0), 6108 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 6109 ssub_0), 6110 v4i32:$wk)>; 6111 6112 //===----------------------------------------------------------------------===// 6113 // NEON instructions for single-precision FP math 6114 //===----------------------------------------------------------------------===// 6115 6116 class N2VSPat<SDNode OpNode, NeonI Inst> 6117 : NEONFPPat<(f32 (OpNode SPR:$a)), 6118 (EXTRACT_SUBREG 6119 (v2f32 (COPY_TO_REGCLASS (Inst 6120 (INSERT_SUBREG 6121 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6122 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 6123 6124 class N3VSPat<SDNode OpNode, NeonI Inst> 6125 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 6126 (EXTRACT_SUBREG 6127 (v2f32 (COPY_TO_REGCLASS (Inst 6128 (INSERT_SUBREG 6129 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6130 SPR:$a, ssub_0), 6131 (INSERT_SUBREG 6132 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6133 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 6134 6135 class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 6136 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 6137 (EXTRACT_SUBREG 6138 (v2f32 (COPY_TO_REGCLASS (Inst 6139 (INSERT_SUBREG 6140 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6141 SPR:$acc, ssub_0), 6142 (INSERT_SUBREG 6143 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6144 SPR:$a, ssub_0), 6145 (INSERT_SUBREG 6146 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6147 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 6148 6149 def : N3VSPat<fadd, VADDfd>; 6150 def : N3VSPat<fsub, VSUBfd>; 6151 def : N3VSPat<fmul, VMULfd>; 6152 def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 6153 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 6154 def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 6155 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 6156 def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 6157 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 6158 def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 6159 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 6160 def : N2VSPat<fabs, VABSfd>; 6161 def : N2VSPat<fneg, VNEGfd>; 6162 def : N3VSPat<NEONfmax, VMAXfd>; 6163 def : N3VSPat<NEONfmin, VMINfd>; 6164 def : N2VSPat<arm_ftosi, VCVTf2sd>; 6165 def : N2VSPat<arm_ftoui, VCVTf2ud>; 6166 def : N2VSPat<arm_sitof, VCVTs2fd>; 6167 def : N2VSPat<arm_uitof, VCVTu2fd>; 6168 6169 // Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 6170 def : Pat<(f32 (bitconvert GPR:$a)), 6171 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 6172 Requires<[HasNEON, DontUseVMOVSR]>; 6173 6174 //===----------------------------------------------------------------------===// 6175 // Non-Instruction Patterns 6176 //===----------------------------------------------------------------------===// 6177 6178 // bit_convert 6179 let Predicates = [IsLE] in { 6180 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 6181 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 6182 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 6183 } 6184 def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 6185 let Predicates = [IsLE] in { 6186 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 6187 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 6188 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 6189 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 6190 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 6191 } 6192 def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 6193 let Predicates = [IsLE] in { 6194 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 6195 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 6196 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 6197 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 6198 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 6199 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 6200 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 6201 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 6202 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 6203 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 6204 } 6205 def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 6206 let Predicates = [IsLE] in { 6207 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 6208 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 6209 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 6210 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 6211 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 6212 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 6213 } 6214 def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 6215 let Predicates = [IsLE] in { 6216 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 6217 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 6218 } 6219 6220 let Predicates = [IsLE] in { 6221 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 6222 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 6223 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 6224 } 6225 def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 6226 let Predicates = [IsLE] in { 6227 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 6228 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 6229 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 6230 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 6231 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 6232 } 6233 def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 6234 let Predicates = [IsLE] in { 6235 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 6236 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 6237 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 6238 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 6239 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 6240 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 6241 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 6242 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 6243 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 6244 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 6245 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 6246 } 6247 def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 6248 let Predicates = [IsLE] in { 6249 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 6250 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 6251 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 6252 } 6253 def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 6254 let Predicates = [IsLE] in { 6255 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 6256 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 6257 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 6258 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 6259 } 6260 6261 let Predicates = [IsBE] in { 6262 // 64 bit conversions 6263 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 6264 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 6265 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 6266 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 6267 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 6268 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 6269 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 6270 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 6271 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 6272 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 6273 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 6274 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 6275 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 6276 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; 6277 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; 6278 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; 6279 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; 6280 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; 6281 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 6282 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 6283 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 6284 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 6285 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 6286 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 6287 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 6288 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 6289 6290 // 128 bit conversions 6291 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 6292 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 6293 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 6294 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 6295 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 6296 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 6297 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 6298 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 6299 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 6300 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 6301 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 6302 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 6303 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 6304 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; 6305 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; 6306 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; 6307 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; 6308 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; 6309 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 6310 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 6311 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 6312 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 6313 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 6314 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 6315 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 6316 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 6317 } 6318 6319 // Fold extracting an element out of a v2i32 into a vfp register. 6320 def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 6321 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6322 6323 // Vector lengthening move with load, matching extending loads. 6324 6325 // extload, zextload and sextload for a standard lengthening load. Example: 6326 // Lengthen_Single<"8", "i16", "8"> = 6327 // Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 6328 // (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 6329 // (f64 (IMPLICIT_DEF)), (i32 0)))>; 6330 multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 6331 let AddedComplexity = 10 in { 6332 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6333 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 6334 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 6335 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6336 6337 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6338 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 6339 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 6340 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6341 6342 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6343 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 6344 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 6345 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6346 } 6347 } 6348 6349 // extload, zextload and sextload for a lengthening load which only uses 6350 // half the lanes available. Example: 6351 // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 6352 // Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 6353 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 6354 // (f64 (IMPLICIT_DEF)), (i32 0))), 6355 // dsub_0)>; 6356 multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 6357 string InsnLanes, string InsnTy> { 6358 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6359 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6360 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6361 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6362 dsub_0)>; 6363 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6364 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6365 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6366 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6367 dsub_0)>; 6368 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6369 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6370 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 6371 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6372 dsub_0)>; 6373 } 6374 6375 // The following class definition is basically a copy of the 6376 // Lengthen_HalfSingle definition above, however with an additional parameter 6377 // "RevLanes" to select the correct VREV32dXX instruction. This is to convert 6378 // data loaded by VLD1LN into proper vector format in big endian mode. 6379 multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy, 6380 string InsnLanes, string InsnTy, string RevLanes> { 6381 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6382 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6383 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6384 (!cast<Instruction>("VREV32d" # RevLanes) 6385 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6386 dsub_0)>; 6387 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6388 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6389 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6390 (!cast<Instruction>("VREV32d" # RevLanes) 6391 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6392 dsub_0)>; 6393 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6394 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6395 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 6396 (!cast<Instruction>("VREV32d" # RevLanes) 6397 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6398 dsub_0)>; 6399 } 6400 6401 // extload, zextload and sextload for a lengthening load followed by another 6402 // lengthening load, to quadruple the initial length. 6403 // 6404 // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 6405 // Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 6406 // (EXTRACT_SUBREG (VMOVLuv4i32 6407 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 6408 // (f64 (IMPLICIT_DEF)), 6409 // (i32 0))), 6410 // dsub_0)), 6411 // dsub_0)>; 6412 multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 6413 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6414 string Insn2Ty> { 6415 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6416 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6417 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6418 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6419 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6420 dsub_0))>; 6421 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6422 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6423 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6424 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6425 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6426 dsub_0))>; 6427 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6428 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6429 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6430 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6431 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6432 dsub_0))>; 6433 } 6434 6435 // The following class definition is basically a copy of the 6436 // Lengthen_Double definition above, however with an additional parameter 6437 // "RevLanes" to select the correct VREV32dXX instruction. This is to convert 6438 // data loaded by VLD1LN into proper vector format in big endian mode. 6439 multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy, 6440 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6441 string Insn2Ty, string RevLanes> { 6442 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6443 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6444 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6445 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6446 (!cast<Instruction>("VREV32d" # RevLanes) 6447 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6448 dsub_0))>; 6449 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6450 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6451 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6452 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6453 (!cast<Instruction>("VREV32d" # RevLanes) 6454 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6455 dsub_0))>; 6456 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6457 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6458 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6459 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6460 (!cast<Instruction>("VREV32d" # RevLanes) 6461 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6462 dsub_0))>; 6463 } 6464 6465 // extload, zextload and sextload for a lengthening load followed by another 6466 // lengthening load, to quadruple the initial length, but which ends up only 6467 // requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 6468 // 6469 // Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 6470 // Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 6471 // (EXTRACT_SUBREG (VMOVLuv4i32 6472 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 6473 // (f64 (IMPLICIT_DEF)), (i32 0))), 6474 // dsub_0)), 6475 // dsub_0)>; 6476 multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 6477 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6478 string Insn2Ty> { 6479 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6480 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 6481 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6482 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6483 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6484 dsub_0)), 6485 dsub_0)>; 6486 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6487 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 6488 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6489 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6490 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6491 dsub_0)), 6492 dsub_0)>; 6493 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6494 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 6495 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6496 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6497 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6498 dsub_0)), 6499 dsub_0)>; 6500 } 6501 6502 // The following class definition is basically a copy of the 6503 // Lengthen_HalfDouble definition above, however with an additional VREV16d8 6504 // instruction to convert data loaded by VLD1LN into proper vector format 6505 // in big endian mode. 6506 multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy, 6507 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6508 string Insn2Ty> { 6509 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6510 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 6511 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6512 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6513 (!cast<Instruction>("VREV16d8") 6514 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6515 dsub_0)), 6516 dsub_0)>; 6517 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6518 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 6519 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6520 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6521 (!cast<Instruction>("VREV16d8") 6522 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6523 dsub_0)), 6524 dsub_0)>; 6525 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6526 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 6527 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6528 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6529 (!cast<Instruction>("VREV16d8") 6530 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6531 dsub_0)), 6532 dsub_0)>; 6533 } 6534 6535 defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 6536 defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 6537 defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 6538 6539 let Predicates = [IsLE] in { 6540 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 6541 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 6542 6543 // Double lengthening - v4i8 -> v4i16 -> v4i32 6544 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 6545 // v2i8 -> v2i16 -> v2i32 6546 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 6547 // v2i16 -> v2i32 -> v2i64 6548 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 6549 } 6550 6551 let Predicates = [IsBE] in { 6552 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 6553 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 6554 6555 // Double lengthening - v4i8 -> v4i16 -> v4i32 6556 defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; 6557 // v2i8 -> v2i16 -> v2i32 6558 defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; 6559 // v2i16 -> v2i32 -> v2i64 6560 defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; 6561 } 6562 6563 // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 6564 let Predicates = [IsLE] in { 6565 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 6566 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 6567 (VLD1LNd16 addrmode6:$addr, 6568 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6569 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 6570 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 6571 (VLD1LNd16 addrmode6:$addr, 6572 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6573 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 6574 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 6575 (VLD1LNd16 addrmode6:$addr, 6576 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6577 } 6578 // The following patterns are basically a copy of the patterns above, 6579 // however with an additional VREV16d instruction to convert data 6580 // loaded by VLD1LN into proper vector format in big endian mode. 6581 let Predicates = [IsBE] in { 6582 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 6583 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 6584 (!cast<Instruction>("VREV16d8") 6585 (VLD1LNd16 addrmode6:$addr, 6586 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 6587 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 6588 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 6589 (!cast<Instruction>("VREV16d8") 6590 (VLD1LNd16 addrmode6:$addr, 6591 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 6592 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 6593 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 6594 (!cast<Instruction>("VREV16d8") 6595 (VLD1LNd16 addrmode6:$addr, 6596 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 6597 } 6598 6599 //===----------------------------------------------------------------------===// 6600 // Assembler aliases 6601 // 6602 6603 def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 6604 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 6605 def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 6606 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 6607 6608 // VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 6609 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 6610 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6611 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 6612 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6613 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 6614 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6615 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 6616 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6617 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 6618 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6619 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 6620 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6621 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 6622 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6623 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 6624 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6625 // ... two-operand aliases 6626 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 6627 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6628 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 6629 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6630 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 6631 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6632 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 6633 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6634 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 6635 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6636 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 6637 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6638 6639 // VLD1 single-lane pseudo-instructions. These need special handling for 6640 // the lane index that an InstAlias can't handle, so we use these instead. 6641 def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 6642 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 6643 pred:$p)>; 6644 def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 6645 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 6646 pred:$p)>; 6647 def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 6648 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 6649 pred:$p)>; 6650 6651 def VLD1LNdWB_fixed_Asm_8 : 6652 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 6653 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 6654 pred:$p)>; 6655 def VLD1LNdWB_fixed_Asm_16 : 6656 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 6657 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 6658 pred:$p)>; 6659 def VLD1LNdWB_fixed_Asm_32 : 6660 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 6661 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 6662 pred:$p)>; 6663 def VLD1LNdWB_register_Asm_8 : 6664 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 6665 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 6666 rGPR:$Rm, pred:$p)>; 6667 def VLD1LNdWB_register_Asm_16 : 6668 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 6669 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 6670 rGPR:$Rm, pred:$p)>; 6671 def VLD1LNdWB_register_Asm_32 : 6672 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 6673 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 6674 rGPR:$Rm, pred:$p)>; 6675 6676 6677 // VST1 single-lane pseudo-instructions. These need special handling for 6678 // the lane index that an InstAlias can't handle, so we use these instead. 6679 def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 6680 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 6681 pred:$p)>; 6682 def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 6683 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 6684 pred:$p)>; 6685 def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 6686 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 6687 pred:$p)>; 6688 6689 def VST1LNdWB_fixed_Asm_8 : 6690 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 6691 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 6692 pred:$p)>; 6693 def VST1LNdWB_fixed_Asm_16 : 6694 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 6695 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 6696 pred:$p)>; 6697 def VST1LNdWB_fixed_Asm_32 : 6698 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 6699 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 6700 pred:$p)>; 6701 def VST1LNdWB_register_Asm_8 : 6702 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 6703 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 6704 rGPR:$Rm, pred:$p)>; 6705 def VST1LNdWB_register_Asm_16 : 6706 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 6707 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 6708 rGPR:$Rm, pred:$p)>; 6709 def VST1LNdWB_register_Asm_32 : 6710 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 6711 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 6712 rGPR:$Rm, pred:$p)>; 6713 6714 // VLD2 single-lane pseudo-instructions. These need special handling for 6715 // the lane index that an InstAlias can't handle, so we use these instead. 6716 def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 6717 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 6718 pred:$p)>; 6719 def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 6720 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 6721 pred:$p)>; 6722 def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 6723 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; 6724 def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 6725 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 6726 pred:$p)>; 6727 def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 6728 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 6729 pred:$p)>; 6730 6731 def VLD2LNdWB_fixed_Asm_8 : 6732 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 6733 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 6734 pred:$p)>; 6735 def VLD2LNdWB_fixed_Asm_16 : 6736 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 6737 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 6738 pred:$p)>; 6739 def VLD2LNdWB_fixed_Asm_32 : 6740 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 6741 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 6742 pred:$p)>; 6743 def VLD2LNqWB_fixed_Asm_16 : 6744 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 6745 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 6746 pred:$p)>; 6747 def VLD2LNqWB_fixed_Asm_32 : 6748 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 6749 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 6750 pred:$p)>; 6751 def VLD2LNdWB_register_Asm_8 : 6752 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 6753 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 6754 rGPR:$Rm, pred:$p)>; 6755 def VLD2LNdWB_register_Asm_16 : 6756 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 6757 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 6758 rGPR:$Rm, pred:$p)>; 6759 def VLD2LNdWB_register_Asm_32 : 6760 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 6761 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 6762 rGPR:$Rm, pred:$p)>; 6763 def VLD2LNqWB_register_Asm_16 : 6764 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 6765 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 6766 rGPR:$Rm, pred:$p)>; 6767 def VLD2LNqWB_register_Asm_32 : 6768 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 6769 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 6770 rGPR:$Rm, pred:$p)>; 6771 6772 6773 // VST2 single-lane pseudo-instructions. These need special handling for 6774 // the lane index that an InstAlias can't handle, so we use these instead. 6775 def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 6776 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 6777 pred:$p)>; 6778 def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 6779 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 6780 pred:$p)>; 6781 def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 6782 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 6783 pred:$p)>; 6784 def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 6785 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 6786 pred:$p)>; 6787 def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 6788 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 6789 pred:$p)>; 6790 6791 def VST2LNdWB_fixed_Asm_8 : 6792 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 6793 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 6794 pred:$p)>; 6795 def VST2LNdWB_fixed_Asm_16 : 6796 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 6797 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 6798 pred:$p)>; 6799 def VST2LNdWB_fixed_Asm_32 : 6800 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 6801 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 6802 pred:$p)>; 6803 def VST2LNqWB_fixed_Asm_16 : 6804 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 6805 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 6806 pred:$p)>; 6807 def VST2LNqWB_fixed_Asm_32 : 6808 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 6809 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 6810 pred:$p)>; 6811 def VST2LNdWB_register_Asm_8 : 6812 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 6813 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 6814 rGPR:$Rm, pred:$p)>; 6815 def VST2LNdWB_register_Asm_16 : 6816 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 6817 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 6818 rGPR:$Rm, pred:$p)>; 6819 def VST2LNdWB_register_Asm_32 : 6820 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 6821 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 6822 rGPR:$Rm, pred:$p)>; 6823 def VST2LNqWB_register_Asm_16 : 6824 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 6825 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 6826 rGPR:$Rm, pred:$p)>; 6827 def VST2LNqWB_register_Asm_32 : 6828 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 6829 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 6830 rGPR:$Rm, pred:$p)>; 6831 6832 // VLD3 all-lanes pseudo-instructions. These need special handling for 6833 // the lane index that an InstAlias can't handle, so we use these instead. 6834 def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6835 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 6836 pred:$p)>; 6837 def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6838 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 6839 pred:$p)>; 6840 def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6841 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 6842 pred:$p)>; 6843 def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6844 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 6845 pred:$p)>; 6846 def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6847 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 6848 pred:$p)>; 6849 def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6850 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 6851 pred:$p)>; 6852 6853 def VLD3DUPdWB_fixed_Asm_8 : 6854 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6855 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 6856 pred:$p)>; 6857 def VLD3DUPdWB_fixed_Asm_16 : 6858 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6859 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 6860 pred:$p)>; 6861 def VLD3DUPdWB_fixed_Asm_32 : 6862 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6863 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 6864 pred:$p)>; 6865 def VLD3DUPqWB_fixed_Asm_8 : 6866 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6867 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 6868 pred:$p)>; 6869 def VLD3DUPqWB_fixed_Asm_16 : 6870 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6871 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 6872 pred:$p)>; 6873 def VLD3DUPqWB_fixed_Asm_32 : 6874 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6875 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 6876 pred:$p)>; 6877 def VLD3DUPdWB_register_Asm_8 : 6878 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6879 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 6880 rGPR:$Rm, pred:$p)>; 6881 def VLD3DUPdWB_register_Asm_16 : 6882 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6883 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 6884 rGPR:$Rm, pred:$p)>; 6885 def VLD3DUPdWB_register_Asm_32 : 6886 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6887 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 6888 rGPR:$Rm, pred:$p)>; 6889 def VLD3DUPqWB_register_Asm_8 : 6890 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6891 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 6892 rGPR:$Rm, pred:$p)>; 6893 def VLD3DUPqWB_register_Asm_16 : 6894 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6895 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 6896 rGPR:$Rm, pred:$p)>; 6897 def VLD3DUPqWB_register_Asm_32 : 6898 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6899 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 6900 rGPR:$Rm, pred:$p)>; 6901 6902 6903 // VLD3 single-lane pseudo-instructions. These need special handling for 6904 // the lane index that an InstAlias can't handle, so we use these instead. 6905 def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6906 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 6907 pred:$p)>; 6908 def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6909 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 6910 pred:$p)>; 6911 def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6912 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 6913 pred:$p)>; 6914 def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6915 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 6916 pred:$p)>; 6917 def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6918 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 6919 pred:$p)>; 6920 6921 def VLD3LNdWB_fixed_Asm_8 : 6922 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6923 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 6924 pred:$p)>; 6925 def VLD3LNdWB_fixed_Asm_16 : 6926 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6927 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 6928 pred:$p)>; 6929 def VLD3LNdWB_fixed_Asm_32 : 6930 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6931 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 6932 pred:$p)>; 6933 def VLD3LNqWB_fixed_Asm_16 : 6934 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6935 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 6936 pred:$p)>; 6937 def VLD3LNqWB_fixed_Asm_32 : 6938 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6939 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 6940 pred:$p)>; 6941 def VLD3LNdWB_register_Asm_8 : 6942 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6943 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 6944 rGPR:$Rm, pred:$p)>; 6945 def VLD3LNdWB_register_Asm_16 : 6946 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6947 (ins VecListThreeDHWordIndexed:$list, 6948 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 6949 def VLD3LNdWB_register_Asm_32 : 6950 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6951 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 6952 rGPR:$Rm, pred:$p)>; 6953 def VLD3LNqWB_register_Asm_16 : 6954 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6955 (ins VecListThreeQHWordIndexed:$list, 6956 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 6957 def VLD3LNqWB_register_Asm_32 : 6958 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6959 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 6960 rGPR:$Rm, pred:$p)>; 6961 6962 // VLD3 multiple structure pseudo-instructions. These need special handling for 6963 // the vector operands that the normal instructions don't yet model. 6964 // FIXME: Remove these when the register classes and instructions are updated. 6965 def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6966 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 6967 def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6968 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 6969 def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6970 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 6971 def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6972 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 6973 def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6974 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 6975 def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6976 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 6977 6978 def VLD3dWB_fixed_Asm_8 : 6979 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6980 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 6981 def VLD3dWB_fixed_Asm_16 : 6982 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6983 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 6984 def VLD3dWB_fixed_Asm_32 : 6985 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6986 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 6987 def VLD3qWB_fixed_Asm_8 : 6988 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6989 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 6990 def VLD3qWB_fixed_Asm_16 : 6991 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6992 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 6993 def VLD3qWB_fixed_Asm_32 : 6994 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6995 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 6996 def VLD3dWB_register_Asm_8 : 6997 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6998 (ins VecListThreeD:$list, addrmode6align64:$addr, 6999 rGPR:$Rm, pred:$p)>; 7000 def VLD3dWB_register_Asm_16 : 7001 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7002 (ins VecListThreeD:$list, addrmode6align64:$addr, 7003 rGPR:$Rm, pred:$p)>; 7004 def VLD3dWB_register_Asm_32 : 7005 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7006 (ins VecListThreeD:$list, addrmode6align64:$addr, 7007 rGPR:$Rm, pred:$p)>; 7008 def VLD3qWB_register_Asm_8 : 7009 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7010 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7011 rGPR:$Rm, pred:$p)>; 7012 def VLD3qWB_register_Asm_16 : 7013 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7014 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7015 rGPR:$Rm, pred:$p)>; 7016 def VLD3qWB_register_Asm_32 : 7017 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7018 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7019 rGPR:$Rm, pred:$p)>; 7020 7021 // VST3 single-lane pseudo-instructions. These need special handling for 7022 // the lane index that an InstAlias can't handle, so we use these instead. 7023 def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 7024 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7025 pred:$p)>; 7026 def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7027 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7028 pred:$p)>; 7029 def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7030 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7031 pred:$p)>; 7032 def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7033 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7034 pred:$p)>; 7035 def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7036 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7037 pred:$p)>; 7038 7039 def VST3LNdWB_fixed_Asm_8 : 7040 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 7041 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7042 pred:$p)>; 7043 def VST3LNdWB_fixed_Asm_16 : 7044 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7045 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7046 pred:$p)>; 7047 def VST3LNdWB_fixed_Asm_32 : 7048 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7049 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7050 pred:$p)>; 7051 def VST3LNqWB_fixed_Asm_16 : 7052 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7053 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7054 pred:$p)>; 7055 def VST3LNqWB_fixed_Asm_32 : 7056 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7057 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7058 pred:$p)>; 7059 def VST3LNdWB_register_Asm_8 : 7060 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 7061 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7062 rGPR:$Rm, pred:$p)>; 7063 def VST3LNdWB_register_Asm_16 : 7064 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7065 (ins VecListThreeDHWordIndexed:$list, 7066 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7067 def VST3LNdWB_register_Asm_32 : 7068 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7069 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7070 rGPR:$Rm, pred:$p)>; 7071 def VST3LNqWB_register_Asm_16 : 7072 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7073 (ins VecListThreeQHWordIndexed:$list, 7074 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7075 def VST3LNqWB_register_Asm_32 : 7076 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7077 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7078 rGPR:$Rm, pred:$p)>; 7079 7080 7081 // VST3 multiple structure pseudo-instructions. These need special handling for 7082 // the vector operands that the normal instructions don't yet model. 7083 // FIXME: Remove these when the register classes and instructions are updated. 7084 def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 7085 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7086 def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7087 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7088 def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7089 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7090 def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 7091 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7092 def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7093 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7094 def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7095 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7096 7097 def VST3dWB_fixed_Asm_8 : 7098 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 7099 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7100 def VST3dWB_fixed_Asm_16 : 7101 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7102 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7103 def VST3dWB_fixed_Asm_32 : 7104 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7105 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7106 def VST3qWB_fixed_Asm_8 : 7107 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 7108 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7109 def VST3qWB_fixed_Asm_16 : 7110 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7111 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7112 def VST3qWB_fixed_Asm_32 : 7113 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7114 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7115 def VST3dWB_register_Asm_8 : 7116 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 7117 (ins VecListThreeD:$list, addrmode6align64:$addr, 7118 rGPR:$Rm, pred:$p)>; 7119 def VST3dWB_register_Asm_16 : 7120 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7121 (ins VecListThreeD:$list, addrmode6align64:$addr, 7122 rGPR:$Rm, pred:$p)>; 7123 def VST3dWB_register_Asm_32 : 7124 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7125 (ins VecListThreeD:$list, addrmode6align64:$addr, 7126 rGPR:$Rm, pred:$p)>; 7127 def VST3qWB_register_Asm_8 : 7128 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 7129 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7130 rGPR:$Rm, pred:$p)>; 7131 def VST3qWB_register_Asm_16 : 7132 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7133 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7134 rGPR:$Rm, pred:$p)>; 7135 def VST3qWB_register_Asm_32 : 7136 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7137 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7138 rGPR:$Rm, pred:$p)>; 7139 7140 // VLD4 all-lanes pseudo-instructions. These need special handling for 7141 // the lane index that an InstAlias can't handle, so we use these instead. 7142 def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7143 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 7144 pred:$p)>; 7145 def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7146 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 7147 pred:$p)>; 7148 def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7149 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 7150 pred:$p)>; 7151 def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7152 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 7153 pred:$p)>; 7154 def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7155 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 7156 pred:$p)>; 7157 def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7158 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 7159 pred:$p)>; 7160 7161 def VLD4DUPdWB_fixed_Asm_8 : 7162 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7163 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 7164 pred:$p)>; 7165 def VLD4DUPdWB_fixed_Asm_16 : 7166 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7167 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 7168 pred:$p)>; 7169 def VLD4DUPdWB_fixed_Asm_32 : 7170 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7171 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 7172 pred:$p)>; 7173 def VLD4DUPqWB_fixed_Asm_8 : 7174 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7175 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 7176 pred:$p)>; 7177 def VLD4DUPqWB_fixed_Asm_16 : 7178 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7179 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 7180 pred:$p)>; 7181 def VLD4DUPqWB_fixed_Asm_32 : 7182 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7183 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 7184 pred:$p)>; 7185 def VLD4DUPdWB_register_Asm_8 : 7186 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7187 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 7188 rGPR:$Rm, pred:$p)>; 7189 def VLD4DUPdWB_register_Asm_16 : 7190 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7191 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 7192 rGPR:$Rm, pred:$p)>; 7193 def VLD4DUPdWB_register_Asm_32 : 7194 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7195 (ins VecListFourDAllLanes:$list, 7196 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 7197 def VLD4DUPqWB_register_Asm_8 : 7198 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7199 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 7200 rGPR:$Rm, pred:$p)>; 7201 def VLD4DUPqWB_register_Asm_16 : 7202 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7203 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 7204 rGPR:$Rm, pred:$p)>; 7205 def VLD4DUPqWB_register_Asm_32 : 7206 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7207 (ins VecListFourQAllLanes:$list, 7208 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 7209 7210 7211 // VLD4 single-lane pseudo-instructions. These need special handling for 7212 // the lane index that an InstAlias can't handle, so we use these instead. 7213 def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7214 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7215 pred:$p)>; 7216 def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7217 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7218 pred:$p)>; 7219 def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7220 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 7221 pred:$p)>; 7222 def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7223 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7224 pred:$p)>; 7225 def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7226 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 7227 pred:$p)>; 7228 7229 def VLD4LNdWB_fixed_Asm_8 : 7230 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7231 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7232 pred:$p)>; 7233 def VLD4LNdWB_fixed_Asm_16 : 7234 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7235 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7236 pred:$p)>; 7237 def VLD4LNdWB_fixed_Asm_32 : 7238 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7239 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 7240 pred:$p)>; 7241 def VLD4LNqWB_fixed_Asm_16 : 7242 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7243 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7244 pred:$p)>; 7245 def VLD4LNqWB_fixed_Asm_32 : 7246 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7247 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 7248 pred:$p)>; 7249 def VLD4LNdWB_register_Asm_8 : 7250 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7251 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7252 rGPR:$Rm, pred:$p)>; 7253 def VLD4LNdWB_register_Asm_16 : 7254 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7255 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7256 rGPR:$Rm, pred:$p)>; 7257 def VLD4LNdWB_register_Asm_32 : 7258 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7259 (ins VecListFourDWordIndexed:$list, 7260 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 7261 def VLD4LNqWB_register_Asm_16 : 7262 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7263 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7264 rGPR:$Rm, pred:$p)>; 7265 def VLD4LNqWB_register_Asm_32 : 7266 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7267 (ins VecListFourQWordIndexed:$list, 7268 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 7269 7270 7271 7272 // VLD4 multiple structure pseudo-instructions. These need special handling for 7273 // the vector operands that the normal instructions don't yet model. 7274 // FIXME: Remove these when the register classes and instructions are updated. 7275 def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7276 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7277 pred:$p)>; 7278 def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7279 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7280 pred:$p)>; 7281 def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7282 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7283 pred:$p)>; 7284 def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7285 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7286 pred:$p)>; 7287 def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7288 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7289 pred:$p)>; 7290 def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7291 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7292 pred:$p)>; 7293 7294 def VLD4dWB_fixed_Asm_8 : 7295 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7296 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7297 pred:$p)>; 7298 def VLD4dWB_fixed_Asm_16 : 7299 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7300 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7301 pred:$p)>; 7302 def VLD4dWB_fixed_Asm_32 : 7303 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7304 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7305 pred:$p)>; 7306 def VLD4qWB_fixed_Asm_8 : 7307 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7308 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7309 pred:$p)>; 7310 def VLD4qWB_fixed_Asm_16 : 7311 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7312 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7313 pred:$p)>; 7314 def VLD4qWB_fixed_Asm_32 : 7315 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7316 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7317 pred:$p)>; 7318 def VLD4dWB_register_Asm_8 : 7319 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7320 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7321 rGPR:$Rm, pred:$p)>; 7322 def VLD4dWB_register_Asm_16 : 7323 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7324 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7325 rGPR:$Rm, pred:$p)>; 7326 def VLD4dWB_register_Asm_32 : 7327 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7328 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7329 rGPR:$Rm, pred:$p)>; 7330 def VLD4qWB_register_Asm_8 : 7331 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7332 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7333 rGPR:$Rm, pred:$p)>; 7334 def VLD4qWB_register_Asm_16 : 7335 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7336 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7337 rGPR:$Rm, pred:$p)>; 7338 def VLD4qWB_register_Asm_32 : 7339 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7340 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7341 rGPR:$Rm, pred:$p)>; 7342 7343 // VST4 single-lane pseudo-instructions. These need special handling for 7344 // the lane index that an InstAlias can't handle, so we use these instead. 7345 def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 7346 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7347 pred:$p)>; 7348 def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 7349 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7350 pred:$p)>; 7351 def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 7352 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 7353 pred:$p)>; 7354 def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 7355 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7356 pred:$p)>; 7357 def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 7358 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 7359 pred:$p)>; 7360 7361 def VST4LNdWB_fixed_Asm_8 : 7362 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 7363 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7364 pred:$p)>; 7365 def VST4LNdWB_fixed_Asm_16 : 7366 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 7367 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7368 pred:$p)>; 7369 def VST4LNdWB_fixed_Asm_32 : 7370 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 7371 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 7372 pred:$p)>; 7373 def VST4LNqWB_fixed_Asm_16 : 7374 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 7375 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7376 pred:$p)>; 7377 def VST4LNqWB_fixed_Asm_32 : 7378 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 7379 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 7380 pred:$p)>; 7381 def VST4LNdWB_register_Asm_8 : 7382 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 7383 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7384 rGPR:$Rm, pred:$p)>; 7385 def VST4LNdWB_register_Asm_16 : 7386 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 7387 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7388 rGPR:$Rm, pred:$p)>; 7389 def VST4LNdWB_register_Asm_32 : 7390 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 7391 (ins VecListFourDWordIndexed:$list, 7392 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 7393 def VST4LNqWB_register_Asm_16 : 7394 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 7395 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7396 rGPR:$Rm, pred:$p)>; 7397 def VST4LNqWB_register_Asm_32 : 7398 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 7399 (ins VecListFourQWordIndexed:$list, 7400 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 7401 7402 7403 // VST4 multiple structure pseudo-instructions. These need special handling for 7404 // the vector operands that the normal instructions don't yet model. 7405 // FIXME: Remove these when the register classes and instructions are updated. 7406 def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 7407 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7408 pred:$p)>; 7409 def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 7410 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7411 pred:$p)>; 7412 def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 7413 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7414 pred:$p)>; 7415 def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 7416 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7417 pred:$p)>; 7418 def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 7419 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7420 pred:$p)>; 7421 def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 7422 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7423 pred:$p)>; 7424 7425 def VST4dWB_fixed_Asm_8 : 7426 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 7427 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7428 pred:$p)>; 7429 def VST4dWB_fixed_Asm_16 : 7430 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 7431 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7432 pred:$p)>; 7433 def VST4dWB_fixed_Asm_32 : 7434 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 7435 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7436 pred:$p)>; 7437 def VST4qWB_fixed_Asm_8 : 7438 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 7439 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7440 pred:$p)>; 7441 def VST4qWB_fixed_Asm_16 : 7442 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 7443 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7444 pred:$p)>; 7445 def VST4qWB_fixed_Asm_32 : 7446 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 7447 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7448 pred:$p)>; 7449 def VST4dWB_register_Asm_8 : 7450 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 7451 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7452 rGPR:$Rm, pred:$p)>; 7453 def VST4dWB_register_Asm_16 : 7454 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 7455 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7456 rGPR:$Rm, pred:$p)>; 7457 def VST4dWB_register_Asm_32 : 7458 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 7459 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7460 rGPR:$Rm, pred:$p)>; 7461 def VST4qWB_register_Asm_8 : 7462 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 7463 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7464 rGPR:$Rm, pred:$p)>; 7465 def VST4qWB_register_Asm_16 : 7466 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 7467 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7468 rGPR:$Rm, pred:$p)>; 7469 def VST4qWB_register_Asm_32 : 7470 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 7471 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7472 rGPR:$Rm, pred:$p)>; 7473 7474 // VMOV/VMVN takes an optional datatype suffix 7475 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 7476 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 7477 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 7478 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 7479 7480 defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 7481 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; 7482 defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 7483 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; 7484 7485 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 7486 // D-register versions. 7487 def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 7488 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7489 def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 7490 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7491 def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 7492 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7493 def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 7494 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7495 def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 7496 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7497 def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 7498 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7499 def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 7500 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7501 // Q-register versions. 7502 def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 7503 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7504 def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 7505 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7506 def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 7507 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7508 def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 7509 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7510 def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 7511 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7512 def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 7513 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7514 def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 7515 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7516 7517 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 7518 // D-register versions. 7519 def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 7520 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7521 def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 7522 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7523 def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 7524 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7525 def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 7526 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7527 def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 7528 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7529 def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 7530 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7531 def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 7532 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7533 // Q-register versions. 7534 def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 7535 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7536 def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 7537 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7538 def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 7539 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7540 def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 7541 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7542 def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 7543 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7544 def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 7545 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7546 def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 7547 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7548 7549 // VSWP allows, but does not require, a type suffix. 7550 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 7551 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 7552 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 7553 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 7554 7555 // VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 7556 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 7557 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7558 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 7559 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7560 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 7561 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7562 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 7563 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7564 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 7565 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7566 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 7567 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7568 7569 // "vmov Rd, #-imm" can be handled via "vmvn". 7570 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 7571 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7572 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 7573 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7574 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 7575 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7576 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 7577 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7578 7579 // 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 7580 // these should restrict to just the Q register variants, but the register 7581 // classes are enough to match correctly regardless, so we keep it simple 7582 // and just use MnemonicAlias. 7583 def : NEONMnemonicAlias<"vbicq", "vbic">; 7584 def : NEONMnemonicAlias<"vandq", "vand">; 7585 def : NEONMnemonicAlias<"veorq", "veor">; 7586 def : NEONMnemonicAlias<"vorrq", "vorr">; 7587 7588 def : NEONMnemonicAlias<"vmovq", "vmov">; 7589 def : NEONMnemonicAlias<"vmvnq", "vmvn">; 7590 // Explicit versions for floating point so that the FPImm variants get 7591 // handled early. The parser gets confused otherwise. 7592 def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 7593 def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 7594 7595 def : NEONMnemonicAlias<"vaddq", "vadd">; 7596 def : NEONMnemonicAlias<"vsubq", "vsub">; 7597 7598 def : NEONMnemonicAlias<"vminq", "vmin">; 7599 def : NEONMnemonicAlias<"vmaxq", "vmax">; 7600 7601 def : NEONMnemonicAlias<"vmulq", "vmul">; 7602 7603 def : NEONMnemonicAlias<"vabsq", "vabs">; 7604 7605 def : NEONMnemonicAlias<"vshlq", "vshl">; 7606 def : NEONMnemonicAlias<"vshrq", "vshr">; 7607 7608 def : NEONMnemonicAlias<"vcvtq", "vcvt">; 7609 7610 def : NEONMnemonicAlias<"vcleq", "vcle">; 7611 def : NEONMnemonicAlias<"vceqq", "vceq">; 7612 7613 def : NEONMnemonicAlias<"vzipq", "vzip">; 7614 def : NEONMnemonicAlias<"vswpq", "vswp">; 7615 7616 def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 7617 def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 7618 7619 7620 // Alias for loading floating point immediates that aren't representable 7621 // using the vmov.f32 encoding but the bitpattern is representable using 7622 // the .i32 encoding. 7623 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 7624 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 7625 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 7626 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 7627