1 //===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file describes the ARM NEON instruction set. 11 // 12 //===----------------------------------------------------------------------===// 13 14 15 //===----------------------------------------------------------------------===// 16 // NEON-specific Operands. 17 //===----------------------------------------------------------------------===// 18 def nModImm : Operand<i32> { 19 let PrintMethod = "printNEONModImmOperand"; 20 } 21 22 def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 23 def nImmSplatI8 : Operand<i32> { 24 let PrintMethod = "printNEONModImmOperand"; 25 let ParserMatchClass = nImmSplatI8AsmOperand; 26 } 27 def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 28 def nImmSplatI16 : Operand<i32> { 29 let PrintMethod = "printNEONModImmOperand"; 30 let ParserMatchClass = nImmSplatI16AsmOperand; 31 } 32 def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 33 def nImmSplatI32 : Operand<i32> { 34 let PrintMethod = "printNEONModImmOperand"; 35 let ParserMatchClass = nImmSplatI32AsmOperand; 36 } 37 def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } 38 def nImmSplatNotI16 : Operand<i32> { 39 let ParserMatchClass = nImmSplatNotI16AsmOperand; 40 } 41 def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; } 42 def nImmSplatNotI32 : Operand<i32> { 43 let ParserMatchClass = nImmSplatNotI32AsmOperand; 44 } 45 def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 46 def nImmVMOVI32 : Operand<i32> { 47 let PrintMethod = "printNEONModImmOperand"; 48 let ParserMatchClass = nImmVMOVI32AsmOperand; 49 } 50 51 def nImmVMOVI16AsmOperandByteReplicate : 52 AsmOperandClass { 53 let Name = "NEONi16vmovByteReplicate"; 54 let PredicateMethod = "isNEONi16ByteReplicate"; 55 let RenderMethod = "addNEONvmovByteReplicateOperands"; 56 } 57 def nImmVMOVI32AsmOperandByteReplicate : 58 AsmOperandClass { 59 let Name = "NEONi32vmovByteReplicate"; 60 let PredicateMethod = "isNEONi32ByteReplicate"; 61 let RenderMethod = "addNEONvmovByteReplicateOperands"; 62 } 63 def nImmVMVNI16AsmOperandByteReplicate : 64 AsmOperandClass { 65 let Name = "NEONi16invByteReplicate"; 66 let PredicateMethod = "isNEONi16ByteReplicate"; 67 let RenderMethod = "addNEONinvByteReplicateOperands"; 68 } 69 def nImmVMVNI32AsmOperandByteReplicate : 70 AsmOperandClass { 71 let Name = "NEONi32invByteReplicate"; 72 let PredicateMethod = "isNEONi32ByteReplicate"; 73 let RenderMethod = "addNEONinvByteReplicateOperands"; 74 } 75 76 def nImmVMOVI16ByteReplicate : Operand<i32> { 77 let PrintMethod = "printNEONModImmOperand"; 78 let ParserMatchClass = nImmVMOVI16AsmOperandByteReplicate; 79 } 80 def nImmVMOVI32ByteReplicate : Operand<i32> { 81 let PrintMethod = "printNEONModImmOperand"; 82 let ParserMatchClass = nImmVMOVI32AsmOperandByteReplicate; 83 } 84 def nImmVMVNI16ByteReplicate : Operand<i32> { 85 let PrintMethod = "printNEONModImmOperand"; 86 let ParserMatchClass = nImmVMVNI16AsmOperandByteReplicate; 87 } 88 def nImmVMVNI32ByteReplicate : Operand<i32> { 89 let PrintMethod = "printNEONModImmOperand"; 90 let ParserMatchClass = nImmVMVNI32AsmOperandByteReplicate; 91 } 92 93 def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 94 def nImmVMOVI32Neg : Operand<i32> { 95 let PrintMethod = "printNEONModImmOperand"; 96 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 97 } 98 def nImmVMOVF32 : Operand<i32> { 99 let PrintMethod = "printFPImmOperand"; 100 let ParserMatchClass = FPImmOperand; 101 } 102 def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 103 def nImmSplatI64 : Operand<i32> { 104 let PrintMethod = "printNEONModImmOperand"; 105 let ParserMatchClass = nImmSplatI64AsmOperand; 106 } 107 108 def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 109 def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 110 def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 111 def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 112 return ((uint64_t)Imm) < 8; 113 }]> { 114 let ParserMatchClass = VectorIndex8Operand; 115 let PrintMethod = "printVectorIndex"; 116 let MIOperandInfo = (ops i32imm); 117 } 118 def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 119 return ((uint64_t)Imm) < 4; 120 }]> { 121 let ParserMatchClass = VectorIndex16Operand; 122 let PrintMethod = "printVectorIndex"; 123 let MIOperandInfo = (ops i32imm); 124 } 125 def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 126 return ((uint64_t)Imm) < 2; 127 }]> { 128 let ParserMatchClass = VectorIndex32Operand; 129 let PrintMethod = "printVectorIndex"; 130 let MIOperandInfo = (ops i32imm); 131 } 132 133 // Register list of one D register. 134 def VecListOneDAsmOperand : AsmOperandClass { 135 let Name = "VecListOneD"; 136 let ParserMethod = "parseVectorList"; 137 let RenderMethod = "addVecListOperands"; 138 } 139 def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 140 let ParserMatchClass = VecListOneDAsmOperand; 141 } 142 // Register list of two sequential D registers. 143 def VecListDPairAsmOperand : AsmOperandClass { 144 let Name = "VecListDPair"; 145 let ParserMethod = "parseVectorList"; 146 let RenderMethod = "addVecListOperands"; 147 } 148 def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 149 let ParserMatchClass = VecListDPairAsmOperand; 150 } 151 // Register list of three sequential D registers. 152 def VecListThreeDAsmOperand : AsmOperandClass { 153 let Name = "VecListThreeD"; 154 let ParserMethod = "parseVectorList"; 155 let RenderMethod = "addVecListOperands"; 156 } 157 def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 158 let ParserMatchClass = VecListThreeDAsmOperand; 159 } 160 // Register list of four sequential D registers. 161 def VecListFourDAsmOperand : AsmOperandClass { 162 let Name = "VecListFourD"; 163 let ParserMethod = "parseVectorList"; 164 let RenderMethod = "addVecListOperands"; 165 } 166 def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 167 let ParserMatchClass = VecListFourDAsmOperand; 168 } 169 // Register list of two D registers spaced by 2 (two sequential Q registers). 170 def VecListDPairSpacedAsmOperand : AsmOperandClass { 171 let Name = "VecListDPairSpaced"; 172 let ParserMethod = "parseVectorList"; 173 let RenderMethod = "addVecListOperands"; 174 } 175 def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 176 let ParserMatchClass = VecListDPairSpacedAsmOperand; 177 } 178 // Register list of three D registers spaced by 2 (three Q registers). 179 def VecListThreeQAsmOperand : AsmOperandClass { 180 let Name = "VecListThreeQ"; 181 let ParserMethod = "parseVectorList"; 182 let RenderMethod = "addVecListOperands"; 183 } 184 def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 185 let ParserMatchClass = VecListThreeQAsmOperand; 186 } 187 // Register list of three D registers spaced by 2 (three Q registers). 188 def VecListFourQAsmOperand : AsmOperandClass { 189 let Name = "VecListFourQ"; 190 let ParserMethod = "parseVectorList"; 191 let RenderMethod = "addVecListOperands"; 192 } 193 def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 194 let ParserMatchClass = VecListFourQAsmOperand; 195 } 196 197 // Register list of one D register, with "all lanes" subscripting. 198 def VecListOneDAllLanesAsmOperand : AsmOperandClass { 199 let Name = "VecListOneDAllLanes"; 200 let ParserMethod = "parseVectorList"; 201 let RenderMethod = "addVecListOperands"; 202 } 203 def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 204 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 205 } 206 // Register list of two D registers, with "all lanes" subscripting. 207 def VecListDPairAllLanesAsmOperand : AsmOperandClass { 208 let Name = "VecListDPairAllLanes"; 209 let ParserMethod = "parseVectorList"; 210 let RenderMethod = "addVecListOperands"; 211 } 212 def VecListDPairAllLanes : RegisterOperand<DPair, 213 "printVectorListTwoAllLanes"> { 214 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 215 } 216 // Register list of two D registers spaced by 2 (two sequential Q registers). 217 def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 218 let Name = "VecListDPairSpacedAllLanes"; 219 let ParserMethod = "parseVectorList"; 220 let RenderMethod = "addVecListOperands"; 221 } 222 def VecListDPairSpacedAllLanes : RegisterOperand<DPair, 223 "printVectorListTwoSpacedAllLanes"> { 224 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 225 } 226 // Register list of three D registers, with "all lanes" subscripting. 227 def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 228 let Name = "VecListThreeDAllLanes"; 229 let ParserMethod = "parseVectorList"; 230 let RenderMethod = "addVecListOperands"; 231 } 232 def VecListThreeDAllLanes : RegisterOperand<DPR, 233 "printVectorListThreeAllLanes"> { 234 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 235 } 236 // Register list of three D registers spaced by 2 (three sequential Q regs). 237 def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 238 let Name = "VecListThreeQAllLanes"; 239 let ParserMethod = "parseVectorList"; 240 let RenderMethod = "addVecListOperands"; 241 } 242 def VecListThreeQAllLanes : RegisterOperand<DPR, 243 "printVectorListThreeSpacedAllLanes"> { 244 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 245 } 246 // Register list of four D registers, with "all lanes" subscripting. 247 def VecListFourDAllLanesAsmOperand : AsmOperandClass { 248 let Name = "VecListFourDAllLanes"; 249 let ParserMethod = "parseVectorList"; 250 let RenderMethod = "addVecListOperands"; 251 } 252 def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 253 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 254 } 255 // Register list of four D registers spaced by 2 (four sequential Q regs). 256 def VecListFourQAllLanesAsmOperand : AsmOperandClass { 257 let Name = "VecListFourQAllLanes"; 258 let ParserMethod = "parseVectorList"; 259 let RenderMethod = "addVecListOperands"; 260 } 261 def VecListFourQAllLanes : RegisterOperand<DPR, 262 "printVectorListFourSpacedAllLanes"> { 263 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 264 } 265 266 267 // Register list of one D register, with byte lane subscripting. 268 def VecListOneDByteIndexAsmOperand : AsmOperandClass { 269 let Name = "VecListOneDByteIndexed"; 270 let ParserMethod = "parseVectorList"; 271 let RenderMethod = "addVecListIndexedOperands"; 272 } 273 def VecListOneDByteIndexed : Operand<i32> { 274 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 275 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 276 } 277 // ...with half-word lane subscripting. 278 def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 279 let Name = "VecListOneDHWordIndexed"; 280 let ParserMethod = "parseVectorList"; 281 let RenderMethod = "addVecListIndexedOperands"; 282 } 283 def VecListOneDHWordIndexed : Operand<i32> { 284 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 285 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 286 } 287 // ...with word lane subscripting. 288 def VecListOneDWordIndexAsmOperand : AsmOperandClass { 289 let Name = "VecListOneDWordIndexed"; 290 let ParserMethod = "parseVectorList"; 291 let RenderMethod = "addVecListIndexedOperands"; 292 } 293 def VecListOneDWordIndexed : Operand<i32> { 294 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 296 } 297 298 // Register list of two D registers with byte lane subscripting. 299 def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 300 let Name = "VecListTwoDByteIndexed"; 301 let ParserMethod = "parseVectorList"; 302 let RenderMethod = "addVecListIndexedOperands"; 303 } 304 def VecListTwoDByteIndexed : Operand<i32> { 305 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 306 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 307 } 308 // ...with half-word lane subscripting. 309 def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 310 let Name = "VecListTwoDHWordIndexed"; 311 let ParserMethod = "parseVectorList"; 312 let RenderMethod = "addVecListIndexedOperands"; 313 } 314 def VecListTwoDHWordIndexed : Operand<i32> { 315 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 316 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 317 } 318 // ...with word lane subscripting. 319 def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 320 let Name = "VecListTwoDWordIndexed"; 321 let ParserMethod = "parseVectorList"; 322 let RenderMethod = "addVecListIndexedOperands"; 323 } 324 def VecListTwoDWordIndexed : Operand<i32> { 325 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 326 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 327 } 328 // Register list of two Q registers with half-word lane subscripting. 329 def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 330 let Name = "VecListTwoQHWordIndexed"; 331 let ParserMethod = "parseVectorList"; 332 let RenderMethod = "addVecListIndexedOperands"; 333 } 334 def VecListTwoQHWordIndexed : Operand<i32> { 335 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 336 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 337 } 338 // ...with word lane subscripting. 339 def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 340 let Name = "VecListTwoQWordIndexed"; 341 let ParserMethod = "parseVectorList"; 342 let RenderMethod = "addVecListIndexedOperands"; 343 } 344 def VecListTwoQWordIndexed : Operand<i32> { 345 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 346 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 347 } 348 349 350 // Register list of three D registers with byte lane subscripting. 351 def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 352 let Name = "VecListThreeDByteIndexed"; 353 let ParserMethod = "parseVectorList"; 354 let RenderMethod = "addVecListIndexedOperands"; 355 } 356 def VecListThreeDByteIndexed : Operand<i32> { 357 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 358 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 359 } 360 // ...with half-word lane subscripting. 361 def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 362 let Name = "VecListThreeDHWordIndexed"; 363 let ParserMethod = "parseVectorList"; 364 let RenderMethod = "addVecListIndexedOperands"; 365 } 366 def VecListThreeDHWordIndexed : Operand<i32> { 367 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 368 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 369 } 370 // ...with word lane subscripting. 371 def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 372 let Name = "VecListThreeDWordIndexed"; 373 let ParserMethod = "parseVectorList"; 374 let RenderMethod = "addVecListIndexedOperands"; 375 } 376 def VecListThreeDWordIndexed : Operand<i32> { 377 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 378 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 379 } 380 // Register list of three Q registers with half-word lane subscripting. 381 def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 382 let Name = "VecListThreeQHWordIndexed"; 383 let ParserMethod = "parseVectorList"; 384 let RenderMethod = "addVecListIndexedOperands"; 385 } 386 def VecListThreeQHWordIndexed : Operand<i32> { 387 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 388 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 389 } 390 // ...with word lane subscripting. 391 def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 392 let Name = "VecListThreeQWordIndexed"; 393 let ParserMethod = "parseVectorList"; 394 let RenderMethod = "addVecListIndexedOperands"; 395 } 396 def VecListThreeQWordIndexed : Operand<i32> { 397 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 399 } 400 401 // Register list of four D registers with byte lane subscripting. 402 def VecListFourDByteIndexAsmOperand : AsmOperandClass { 403 let Name = "VecListFourDByteIndexed"; 404 let ParserMethod = "parseVectorList"; 405 let RenderMethod = "addVecListIndexedOperands"; 406 } 407 def VecListFourDByteIndexed : Operand<i32> { 408 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 409 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 410 } 411 // ...with half-word lane subscripting. 412 def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 413 let Name = "VecListFourDHWordIndexed"; 414 let ParserMethod = "parseVectorList"; 415 let RenderMethod = "addVecListIndexedOperands"; 416 } 417 def VecListFourDHWordIndexed : Operand<i32> { 418 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 419 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 420 } 421 // ...with word lane subscripting. 422 def VecListFourDWordIndexAsmOperand : AsmOperandClass { 423 let Name = "VecListFourDWordIndexed"; 424 let ParserMethod = "parseVectorList"; 425 let RenderMethod = "addVecListIndexedOperands"; 426 } 427 def VecListFourDWordIndexed : Operand<i32> { 428 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 429 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 430 } 431 // Register list of four Q registers with half-word lane subscripting. 432 def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 433 let Name = "VecListFourQHWordIndexed"; 434 let ParserMethod = "parseVectorList"; 435 let RenderMethod = "addVecListIndexedOperands"; 436 } 437 def VecListFourQHWordIndexed : Operand<i32> { 438 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 439 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 440 } 441 // ...with word lane subscripting. 442 def VecListFourQWordIndexAsmOperand : AsmOperandClass { 443 let Name = "VecListFourQWordIndexed"; 444 let ParserMethod = "parseVectorList"; 445 let RenderMethod = "addVecListIndexedOperands"; 446 } 447 def VecListFourQWordIndexed : Operand<i32> { 448 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 449 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 450 } 451 452 def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 453 return cast<LoadSDNode>(N)->getAlignment() >= 8; 454 }]>; 455 def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 456 (store node:$val, node:$ptr), [{ 457 return cast<StoreSDNode>(N)->getAlignment() >= 8; 458 }]>; 459 def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 460 return cast<LoadSDNode>(N)->getAlignment() == 4; 461 }]>; 462 def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 463 (store node:$val, node:$ptr), [{ 464 return cast<StoreSDNode>(N)->getAlignment() == 4; 465 }]>; 466 def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 467 return cast<LoadSDNode>(N)->getAlignment() == 2; 468 }]>; 469 def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 470 (store node:$val, node:$ptr), [{ 471 return cast<StoreSDNode>(N)->getAlignment() == 2; 472 }]>; 473 def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 474 return cast<LoadSDNode>(N)->getAlignment() == 1; 475 }]>; 476 def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 477 (store node:$val, node:$ptr), [{ 478 return cast<StoreSDNode>(N)->getAlignment() == 1; 479 }]>; 480 def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 481 return cast<LoadSDNode>(N)->getAlignment() < 4; 482 }]>; 483 def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 484 (store node:$val, node:$ptr), [{ 485 return cast<StoreSDNode>(N)->getAlignment() < 4; 486 }]>; 487 488 //===----------------------------------------------------------------------===// 489 // NEON-specific DAG Nodes. 490 //===----------------------------------------------------------------------===// 491 492 def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 493 def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; 494 495 def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 496 def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; 497 def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 498 def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; 499 def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; 500 def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 501 def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 502 def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; 503 def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; 504 def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 505 def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 506 507 // Types for vector shift by immediates. The "SHX" version is for long and 508 // narrow operations where the source and destination vectors have different 509 // types. The "SHINS" version is for shift and insert operations. 510 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 511 SDTCisVT<2, i32>]>; 512 def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 513 SDTCisVT<2, i32>]>; 514 def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 515 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 516 517 def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 518 def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 519 def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 520 def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 521 522 def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 523 def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 524 def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 525 526 def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 527 def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 528 def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 529 def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 530 def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 531 def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 532 533 def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 534 def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 535 def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 536 537 def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 538 def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 539 540 def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 541 SDTCisVT<2, i32>]>; 542 def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 543 def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 544 545 def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 546 def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; 547 def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; 548 def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; 549 550 def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 551 SDTCisVT<2, i32>]>; 552 def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; 553 def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; 554 555 def NEONvbsl : SDNode<"ARMISD::VBSL", 556 SDTypeProfile<1, 3, [SDTCisVec<0>, 557 SDTCisSameAs<0, 1>, 558 SDTCisSameAs<0, 2>, 559 SDTCisSameAs<0, 3>]>>; 560 561 def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 562 563 // VDUPLANE can produce a quad-register result from a double-register source, 564 // so the result is not constrained to match the source. 565 def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 566 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 567 SDTCisVT<2, i32>]>>; 568 569 def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 570 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 571 def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 572 573 def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 574 def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 575 def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 576 def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 577 578 def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 579 SDTCisSameAs<0, 2>, 580 SDTCisSameAs<0, 3>]>; 581 def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 582 def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 583 def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 584 585 def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 586 SDTCisSameAs<1, 2>]>; 587 def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; 588 def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; 589 590 def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 591 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 592 unsigned EltBits = 0; 593 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 594 return (EltBits == 32 && EltVal == 0); 595 }]>; 596 597 def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 598 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 599 unsigned EltBits = 0; 600 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 601 return (EltBits == 8 && EltVal == 0xff); 602 }]>; 603 604 //===----------------------------------------------------------------------===// 605 // NEON load / store instructions 606 //===----------------------------------------------------------------------===// 607 608 // Use VLDM to load a Q register as a D register pair. 609 // This is a pseudo instruction that is expanded to VLDMD after reg alloc. 610 def VLDMQIA 611 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 612 IIC_fpLoad_m, "", 613 [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; 614 615 // Use VSTM to store a Q register as a D register pair. 616 // This is a pseudo instruction that is expanded to VSTMD after reg alloc. 617 def VSTMQIA 618 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 619 IIC_fpStore_m, "", 620 [(store (v2f64 DPair:$src), GPR:$Rn)]>; 621 622 // Classes for VLD* pseudo-instructions with multi-register operands. 623 // These are expanded to real instructions after register allocation. 624 class VLDQPseudo<InstrItinClass itin> 625 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 626 class VLDQWBPseudo<InstrItinClass itin> 627 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 628 (ins addrmode6:$addr, am6offset:$offset), itin, 629 "$addr.addr = $wb">; 630 class VLDQWBfixedPseudo<InstrItinClass itin> 631 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 632 (ins addrmode6:$addr), itin, 633 "$addr.addr = $wb">; 634 class VLDQWBregisterPseudo<InstrItinClass itin> 635 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 636 (ins addrmode6:$addr, rGPR:$offset), itin, 637 "$addr.addr = $wb">; 638 639 class VLDQQPseudo<InstrItinClass itin> 640 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 641 class VLDQQWBPseudo<InstrItinClass itin> 642 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 643 (ins addrmode6:$addr, am6offset:$offset), itin, 644 "$addr.addr = $wb">; 645 class VLDQQWBfixedPseudo<InstrItinClass itin> 646 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 647 (ins addrmode6:$addr), itin, 648 "$addr.addr = $wb">; 649 class VLDQQWBregisterPseudo<InstrItinClass itin> 650 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 651 (ins addrmode6:$addr, rGPR:$offset), itin, 652 "$addr.addr = $wb">; 653 654 655 class VLDQQQQPseudo<InstrItinClass itin> 656 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 657 "$src = $dst">; 658 class VLDQQQQWBPseudo<InstrItinClass itin> 659 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 660 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 661 "$addr.addr = $wb, $src = $dst">; 662 663 let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 664 665 // VLD1 : Vector Load (multiple single elements) 666 class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> 667 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 668 (ins AddrMode:$Rn), IIC_VLD1, 669 "vld1", Dt, "$Vd, $Rn", "", []> { 670 let Rm = 0b1111; 671 let Inst{4} = Rn{4}; 672 let DecoderMethod = "DecodeVLDST1Instruction"; 673 } 674 class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode> 675 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 676 (ins AddrMode:$Rn), IIC_VLD1x2, 677 "vld1", Dt, "$Vd, $Rn", "", []> { 678 let Rm = 0b1111; 679 let Inst{5-4} = Rn{5-4}; 680 let DecoderMethod = "DecodeVLDST1Instruction"; 681 } 682 683 def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; 684 def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; 685 def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; 686 def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; 687 688 def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; 689 def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; 690 def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; 691 def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; 692 693 // ...with address register writeback: 694 multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 695 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 696 (ins AddrMode:$Rn), IIC_VLD1u, 697 "vld1", Dt, "$Vd, $Rn!", 698 "$Rn.addr = $wb", []> { 699 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 700 let Inst{4} = Rn{4}; 701 let DecoderMethod = "DecodeVLDST1Instruction"; 702 } 703 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 704 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, 705 "vld1", Dt, "$Vd, $Rn, $Rm", 706 "$Rn.addr = $wb", []> { 707 let Inst{4} = Rn{4}; 708 let DecoderMethod = "DecodeVLDST1Instruction"; 709 } 710 } 711 multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 712 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 713 (ins AddrMode:$Rn), IIC_VLD1x2u, 714 "vld1", Dt, "$Vd, $Rn!", 715 "$Rn.addr = $wb", []> { 716 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 717 let Inst{5-4} = Rn{5-4}; 718 let DecoderMethod = "DecodeVLDST1Instruction"; 719 } 720 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 721 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 722 "vld1", Dt, "$Vd, $Rn, $Rm", 723 "$Rn.addr = $wb", []> { 724 let Inst{5-4} = Rn{5-4}; 725 let DecoderMethod = "DecodeVLDST1Instruction"; 726 } 727 } 728 729 defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; 730 defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; 731 defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; 732 defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; 733 defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 734 defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 735 defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 736 defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 737 738 // ...with 3 registers 739 class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode> 740 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 741 (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, 742 "$Vd, $Rn", "", []> { 743 let Rm = 0b1111; 744 let Inst{4} = Rn{4}; 745 let DecoderMethod = "DecodeVLDST1Instruction"; 746 } 747 multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 748 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 749 (ins AddrMode:$Rn), IIC_VLD1x2u, 750 "vld1", Dt, "$Vd, $Rn!", 751 "$Rn.addr = $wb", []> { 752 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 753 let Inst{4} = Rn{4}; 754 let DecoderMethod = "DecodeVLDST1Instruction"; 755 } 756 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 757 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 758 "vld1", Dt, "$Vd, $Rn, $Rm", 759 "$Rn.addr = $wb", []> { 760 let Inst{4} = Rn{4}; 761 let DecoderMethod = "DecodeVLDST1Instruction"; 762 } 763 } 764 765 def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; 766 def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; 767 def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; 768 def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; 769 770 defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; 771 defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; 772 defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; 773 defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; 774 775 def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; 776 def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>; 777 def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>; 778 779 // ...with 4 registers 780 class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode> 781 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 782 (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, 783 "$Vd, $Rn", "", []> { 784 let Rm = 0b1111; 785 let Inst{5-4} = Rn{5-4}; 786 let DecoderMethod = "DecodeVLDST1Instruction"; 787 } 788 multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 789 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 790 (ins AddrMode:$Rn), IIC_VLD1x2u, 791 "vld1", Dt, "$Vd, $Rn!", 792 "$Rn.addr = $wb", []> { 793 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 794 let Inst{5-4} = Rn{5-4}; 795 let DecoderMethod = "DecodeVLDST1Instruction"; 796 } 797 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 798 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 799 "vld1", Dt, "$Vd, $Rn, $Rm", 800 "$Rn.addr = $wb", []> { 801 let Inst{5-4} = Rn{5-4}; 802 let DecoderMethod = "DecodeVLDST1Instruction"; 803 } 804 } 805 806 def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 807 def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 808 def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 809 def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 810 811 defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 812 defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 813 defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 814 defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 815 816 def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; 817 def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>; 818 def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>; 819 820 // VLD2 : Vector Load (multiple 2-element structures) 821 class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 822 InstrItinClass itin, Operand AddrMode> 823 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 824 (ins AddrMode:$Rn), itin, 825 "vld2", Dt, "$Vd, $Rn", "", []> { 826 let Rm = 0b1111; 827 let Inst{5-4} = Rn{5-4}; 828 let DecoderMethod = "DecodeVLDST2Instruction"; 829 } 830 831 def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, 832 addrmode6align64or128>; 833 def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, 834 addrmode6align64or128>; 835 def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, 836 addrmode6align64or128>; 837 838 def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, 839 addrmode6align64or128or256>; 840 def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, 841 addrmode6align64or128or256>; 842 def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, 843 addrmode6align64or128or256>; 844 845 def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; 846 def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; 847 def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; 848 849 // ...with address register writeback: 850 multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 851 RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { 852 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 853 (ins AddrMode:$Rn), itin, 854 "vld2", Dt, "$Vd, $Rn!", 855 "$Rn.addr = $wb", []> { 856 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 857 let Inst{5-4} = Rn{5-4}; 858 let DecoderMethod = "DecodeVLDST2Instruction"; 859 } 860 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 861 (ins AddrMode:$Rn, rGPR:$Rm), itin, 862 "vld2", Dt, "$Vd, $Rn, $Rm", 863 "$Rn.addr = $wb", []> { 864 let Inst{5-4} = Rn{5-4}; 865 let DecoderMethod = "DecodeVLDST2Instruction"; 866 } 867 } 868 869 defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, 870 addrmode6align64or128>; 871 defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, 872 addrmode6align64or128>; 873 defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, 874 addrmode6align64or128>; 875 876 defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, 877 addrmode6align64or128or256>; 878 defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, 879 addrmode6align64or128or256>; 880 defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, 881 addrmode6align64or128or256>; 882 883 def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 884 def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 885 def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 886 def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 887 def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 888 def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 889 890 // ...with double-spaced registers 891 def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, 892 addrmode6align64or128>; 893 def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, 894 addrmode6align64or128>; 895 def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, 896 addrmode6align64or128>; 897 defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, 898 addrmode6align64or128>; 899 defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, 900 addrmode6align64or128>; 901 defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, 902 addrmode6align64or128>; 903 904 // VLD3 : Vector Load (multiple 3-element structures) 905 class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 906 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 907 (ins addrmode6:$Rn), IIC_VLD3, 908 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { 909 let Rm = 0b1111; 910 let Inst{4} = Rn{4}; 911 let DecoderMethod = "DecodeVLDST3Instruction"; 912 } 913 914 def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 915 def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 916 def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 917 918 def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>; 919 def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>; 920 def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>; 921 922 // ...with address register writeback: 923 class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 924 : NLdSt<0, 0b10, op11_8, op7_4, 925 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 926 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 927 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 928 "$Rn.addr = $wb", []> { 929 let Inst{4} = Rn{4}; 930 let DecoderMethod = "DecodeVLDST3Instruction"; 931 } 932 933 def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 934 def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 935 def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 936 937 def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 938 def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 939 def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 940 941 // ...with double-spaced registers: 942 def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 943 def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 944 def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 945 def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 946 def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 947 def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 948 949 def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 950 def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 951 def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 952 953 // ...alternate versions to be allocated odd register numbers: 954 def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 955 def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 956 def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 957 958 def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 959 def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 960 def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 961 962 // VLD4 : Vector Load (multiple 4-element structures) 963 class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 964 : NLdSt<0, 0b10, op11_8, op7_4, 965 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 966 (ins addrmode6:$Rn), IIC_VLD4, 967 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { 968 let Rm = 0b1111; 969 let Inst{5-4} = Rn{5-4}; 970 let DecoderMethod = "DecodeVLDST4Instruction"; 971 } 972 973 def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 974 def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 975 def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 976 977 def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>; 978 def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>; 979 def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>; 980 981 // ...with address register writeback: 982 class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 983 : NLdSt<0, 0b10, op11_8, op7_4, 984 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 985 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 986 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 987 "$Rn.addr = $wb", []> { 988 let Inst{5-4} = Rn{5-4}; 989 let DecoderMethod = "DecodeVLDST4Instruction"; 990 } 991 992 def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 993 def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 994 def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 995 996 def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 997 def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 998 def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 999 1000 // ...with double-spaced registers: 1001 def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 1002 def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 1003 def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 1004 def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 1005 def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 1006 def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 1007 1008 def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1009 def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1010 def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1011 1012 // ...alternate versions to be allocated odd register numbers: 1013 def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 1014 def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 1015 def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 1016 1017 def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1018 def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1019 def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1020 1021 } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1022 1023 // Classes for VLD*LN pseudo-instructions with multi-register operands. 1024 // These are expanded to real instructions after register allocation. 1025 class VLDQLNPseudo<InstrItinClass itin> 1026 : PseudoNLdSt<(outs QPR:$dst), 1027 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1028 itin, "$src = $dst">; 1029 class VLDQLNWBPseudo<InstrItinClass itin> 1030 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 1031 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1032 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1033 class VLDQQLNPseudo<InstrItinClass itin> 1034 : PseudoNLdSt<(outs QQPR:$dst), 1035 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1036 itin, "$src = $dst">; 1037 class VLDQQLNWBPseudo<InstrItinClass itin> 1038 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 1039 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1040 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1041 class VLDQQQQLNPseudo<InstrItinClass itin> 1042 : PseudoNLdSt<(outs QQQQPR:$dst), 1043 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1044 itin, "$src = $dst">; 1045 class VLDQQQQLNWBPseudo<InstrItinClass itin> 1046 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 1047 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1048 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1049 1050 // VLD1LN : Vector Load (single element to one lane) 1051 class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1052 PatFrag LoadOp> 1053 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1054 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 1055 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1056 "$src = $Vd", 1057 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1058 (i32 (LoadOp addrmode6:$Rn)), 1059 imm:$lane))]> { 1060 let Rm = 0b1111; 1061 let DecoderMethod = "DecodeVLD1LN"; 1062 } 1063 class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1064 PatFrag LoadOp> 1065 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1066 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1067 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1068 "$src = $Vd", 1069 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1070 (i32 (LoadOp addrmode6oneL32:$Rn)), 1071 imm:$lane))]> { 1072 let Rm = 0b1111; 1073 let DecoderMethod = "DecodeVLD1LN"; 1074 } 1075 class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { 1076 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1077 (i32 (LoadOp addrmode6:$addr)), 1078 imm:$lane))]; 1079 } 1080 1081 def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1082 let Inst{7-5} = lane{2-0}; 1083 } 1084 def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1085 let Inst{7-6} = lane{1-0}; 1086 let Inst{5-4} = Rn{5-4}; 1087 } 1088 def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1089 let Inst{7} = lane{0}; 1090 let Inst{5-4} = Rn{5-4}; 1091 } 1092 1093 def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1094 def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1095 def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1096 1097 def : Pat<(vector_insert (v2f32 DPR:$src), 1098 (f32 (load addrmode6:$addr)), imm:$lane), 1099 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1100 def : Pat<(vector_insert (v4f32 QPR:$src), 1101 (f32 (load addrmode6:$addr)), imm:$lane), 1102 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1103 1104 let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1105 1106 // ...with address register writeback: 1107 class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1108 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1109 (ins addrmode6:$Rn, am6offset:$Rm, 1110 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1111 "\\{$Vd[$lane]\\}, $Rn$Rm", 1112 "$src = $Vd, $Rn.addr = $wb", []> { 1113 let DecoderMethod = "DecodeVLD1LN"; 1114 } 1115 1116 def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1117 let Inst{7-5} = lane{2-0}; 1118 } 1119 def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1120 let Inst{7-6} = lane{1-0}; 1121 let Inst{4} = Rn{4}; 1122 } 1123 def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1124 let Inst{7} = lane{0}; 1125 let Inst{5} = Rn{4}; 1126 let Inst{4} = Rn{4}; 1127 } 1128 1129 def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1130 def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1131 def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1132 1133 // VLD2LN : Vector Load (single 2-element structure to one lane) 1134 class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1135 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1136 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1137 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1138 "$src1 = $Vd, $src2 = $dst2", []> { 1139 let Rm = 0b1111; 1140 let Inst{4} = Rn{4}; 1141 let DecoderMethod = "DecodeVLD2LN"; 1142 } 1143 1144 def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1145 let Inst{7-5} = lane{2-0}; 1146 } 1147 def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1148 let Inst{7-6} = lane{1-0}; 1149 } 1150 def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1151 let Inst{7} = lane{0}; 1152 } 1153 1154 def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1155 def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1156 def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1157 1158 // ...with double-spaced registers: 1159 def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1160 let Inst{7-6} = lane{1-0}; 1161 } 1162 def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1163 let Inst{7} = lane{0}; 1164 } 1165 1166 def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1167 def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1168 1169 // ...with address register writeback: 1170 class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1171 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1172 (ins addrmode6:$Rn, am6offset:$Rm, 1173 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1174 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1175 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1176 let Inst{4} = Rn{4}; 1177 let DecoderMethod = "DecodeVLD2LN"; 1178 } 1179 1180 def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1181 let Inst{7-5} = lane{2-0}; 1182 } 1183 def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1184 let Inst{7-6} = lane{1-0}; 1185 } 1186 def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1187 let Inst{7} = lane{0}; 1188 } 1189 1190 def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1191 def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1192 def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1193 1194 def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1195 let Inst{7-6} = lane{1-0}; 1196 } 1197 def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1198 let Inst{7} = lane{0}; 1199 } 1200 1201 def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1202 def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1203 1204 // VLD3LN : Vector Load (single 3-element structure to one lane) 1205 class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1206 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1207 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1208 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1209 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1210 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { 1211 let Rm = 0b1111; 1212 let DecoderMethod = "DecodeVLD3LN"; 1213 } 1214 1215 def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1216 let Inst{7-5} = lane{2-0}; 1217 } 1218 def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1219 let Inst{7-6} = lane{1-0}; 1220 } 1221 def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1222 let Inst{7} = lane{0}; 1223 } 1224 1225 def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1226 def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1227 def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1228 1229 // ...with double-spaced registers: 1230 def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1231 let Inst{7-6} = lane{1-0}; 1232 } 1233 def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1234 let Inst{7} = lane{0}; 1235 } 1236 1237 def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1238 def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1239 1240 // ...with address register writeback: 1241 class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1242 : NLdStLn<1, 0b10, op11_8, op7_4, 1243 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1244 (ins addrmode6:$Rn, am6offset:$Rm, 1245 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1246 IIC_VLD3lnu, "vld3", Dt, 1247 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1248 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1249 []> { 1250 let DecoderMethod = "DecodeVLD3LN"; 1251 } 1252 1253 def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1254 let Inst{7-5} = lane{2-0}; 1255 } 1256 def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1257 let Inst{7-6} = lane{1-0}; 1258 } 1259 def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1260 let Inst{7} = lane{0}; 1261 } 1262 1263 def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1264 def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1265 def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1266 1267 def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1268 let Inst{7-6} = lane{1-0}; 1269 } 1270 def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1271 let Inst{7} = lane{0}; 1272 } 1273 1274 def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1275 def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1276 1277 // VLD4LN : Vector Load (single 4-element structure to one lane) 1278 class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1279 : NLdStLn<1, 0b10, op11_8, op7_4, 1280 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1281 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1282 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1283 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1284 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { 1285 let Rm = 0b1111; 1286 let Inst{4} = Rn{4}; 1287 let DecoderMethod = "DecodeVLD4LN"; 1288 } 1289 1290 def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1291 let Inst{7-5} = lane{2-0}; 1292 } 1293 def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1294 let Inst{7-6} = lane{1-0}; 1295 } 1296 def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1297 let Inst{7} = lane{0}; 1298 let Inst{5} = Rn{5}; 1299 } 1300 1301 def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1302 def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1303 def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1304 1305 // ...with double-spaced registers: 1306 def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1307 let Inst{7-6} = lane{1-0}; 1308 } 1309 def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1310 let Inst{7} = lane{0}; 1311 let Inst{5} = Rn{5}; 1312 } 1313 1314 def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1315 def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1316 1317 // ...with address register writeback: 1318 class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1319 : NLdStLn<1, 0b10, op11_8, op7_4, 1320 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1321 (ins addrmode6:$Rn, am6offset:$Rm, 1322 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1323 IIC_VLD4lnu, "vld4", Dt, 1324 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1325 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1326 []> { 1327 let Inst{4} = Rn{4}; 1328 let DecoderMethod = "DecodeVLD4LN" ; 1329 } 1330 1331 def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1332 let Inst{7-5} = lane{2-0}; 1333 } 1334 def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1335 let Inst{7-6} = lane{1-0}; 1336 } 1337 def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1338 let Inst{7} = lane{0}; 1339 let Inst{5} = Rn{5}; 1340 } 1341 1342 def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1343 def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1344 def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1345 1346 def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1347 let Inst{7-6} = lane{1-0}; 1348 } 1349 def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1350 let Inst{7} = lane{0}; 1351 let Inst{5} = Rn{5}; 1352 } 1353 1354 def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1355 def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1356 1357 } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1358 1359 // VLD1DUP : Vector Load (single element to all lanes) 1360 class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1361 Operand AddrMode> 1362 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1363 (ins AddrMode:$Rn), 1364 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1365 [(set VecListOneDAllLanes:$Vd, 1366 (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { 1367 let Rm = 0b1111; 1368 let Inst{4} = Rn{4}; 1369 let DecoderMethod = "DecodeVLD1DupInstruction"; 1370 } 1371 def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, 1372 addrmode6dupalignNone>; 1373 def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, 1374 addrmode6dupalign16>; 1375 def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, 1376 addrmode6dupalign32>; 1377 1378 def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1379 (VLD1DUPd32 addrmode6:$addr)>; 1380 1381 class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1382 Operand AddrMode> 1383 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1384 (ins AddrMode:$Rn), IIC_VLD1dup, 1385 "vld1", Dt, "$Vd, $Rn", "", 1386 [(set VecListDPairAllLanes:$Vd, 1387 (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { 1388 let Rm = 0b1111; 1389 let Inst{4} = Rn{4}; 1390 let DecoderMethod = "DecodeVLD1DupInstruction"; 1391 } 1392 1393 def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, 1394 addrmode6dupalignNone>; 1395 def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, 1396 addrmode6dupalign16>; 1397 def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, 1398 addrmode6dupalign32>; 1399 1400 def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1401 (VLD1DUPq32 addrmode6:$addr)>; 1402 1403 let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1404 // ...with address register writeback: 1405 multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1406 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1407 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1408 (ins AddrMode:$Rn), IIC_VLD1dupu, 1409 "vld1", Dt, "$Vd, $Rn!", 1410 "$Rn.addr = $wb", []> { 1411 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1412 let Inst{4} = Rn{4}; 1413 let DecoderMethod = "DecodeVLD1DupInstruction"; 1414 } 1415 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1416 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1417 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1418 "vld1", Dt, "$Vd, $Rn, $Rm", 1419 "$Rn.addr = $wb", []> { 1420 let Inst{4} = Rn{4}; 1421 let DecoderMethod = "DecodeVLD1DupInstruction"; 1422 } 1423 } 1424 multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1425 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1426 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1427 (ins AddrMode:$Rn), IIC_VLD1dupu, 1428 "vld1", Dt, "$Vd, $Rn!", 1429 "$Rn.addr = $wb", []> { 1430 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1431 let Inst{4} = Rn{4}; 1432 let DecoderMethod = "DecodeVLD1DupInstruction"; 1433 } 1434 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1435 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1436 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1437 "vld1", Dt, "$Vd, $Rn, $Rm", 1438 "$Rn.addr = $wb", []> { 1439 let Inst{4} = Rn{4}; 1440 let DecoderMethod = "DecodeVLD1DupInstruction"; 1441 } 1442 } 1443 1444 defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; 1445 defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; 1446 defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; 1447 1448 defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; 1449 defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; 1450 defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; 1451 1452 // VLD2DUP : Vector Load (single 2-element structure to all lanes) 1453 class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> 1454 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1455 (ins AddrMode:$Rn), IIC_VLD2dup, 1456 "vld2", Dt, "$Vd, $Rn", "", []> { 1457 let Rm = 0b1111; 1458 let Inst{4} = Rn{4}; 1459 let DecoderMethod = "DecodeVLD2DupInstruction"; 1460 } 1461 1462 def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, 1463 addrmode6dupalign16>; 1464 def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, 1465 addrmode6dupalign32>; 1466 def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, 1467 addrmode6dupalign64>; 1468 1469 // HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or 1470 // "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". 1471 // ...with double-spaced registers 1472 def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, 1473 addrmode6dupalign16>; 1474 def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1475 addrmode6dupalign32>; 1476 def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1477 addrmode6dupalign64>; 1478 1479 // ...with address register writeback: 1480 multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy, 1481 Operand AddrMode> { 1482 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1483 (outs VdTy:$Vd, GPR:$wb), 1484 (ins AddrMode:$Rn), IIC_VLD2dupu, 1485 "vld2", Dt, "$Vd, $Rn!", 1486 "$Rn.addr = $wb", []> { 1487 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1488 let Inst{4} = Rn{4}; 1489 let DecoderMethod = "DecodeVLD2DupInstruction"; 1490 } 1491 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1492 (outs VdTy:$Vd, GPR:$wb), 1493 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1494 "vld2", Dt, "$Vd, $Rn, $Rm", 1495 "$Rn.addr = $wb", []> { 1496 let Inst{4} = Rn{4}; 1497 let DecoderMethod = "DecodeVLD2DupInstruction"; 1498 } 1499 } 1500 1501 defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, 1502 addrmode6dupalign16>; 1503 defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, 1504 addrmode6dupalign32>; 1505 defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, 1506 addrmode6dupalign64>; 1507 1508 defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, 1509 addrmode6dupalign16>; 1510 defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1511 addrmode6dupalign32>; 1512 defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1513 addrmode6dupalign64>; 1514 1515 // VLD3DUP : Vector Load (single 3-element structure to all lanes) 1516 class VLD3DUP<bits<4> op7_4, string Dt> 1517 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1518 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1519 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { 1520 let Rm = 0b1111; 1521 let Inst{4} = 0; 1522 let DecoderMethod = "DecodeVLD3DupInstruction"; 1523 } 1524 1525 def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1526 def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1527 def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1528 1529 def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1530 def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1531 def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1532 1533 // ...with double-spaced registers (not used for codegen): 1534 def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1535 def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1536 def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1537 1538 // ...with address register writeback: 1539 class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> 1540 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1541 (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1542 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1543 "$Rn.addr = $wb", []> { 1544 let Inst{4} = 0; 1545 let DecoderMethod = "DecodeVLD3DupInstruction"; 1546 } 1547 1548 def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; 1549 def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; 1550 def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; 1551 1552 def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; 1553 def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; 1554 def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; 1555 1556 def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1557 def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1558 def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1559 1560 // VLD4DUP : Vector Load (single 4-element structure to all lanes) 1561 class VLD4DUP<bits<4> op7_4, string Dt> 1562 : NLdSt<1, 0b10, 0b1111, op7_4, 1563 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1564 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1565 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1566 let Rm = 0b1111; 1567 let Inst{4} = Rn{4}; 1568 let DecoderMethod = "DecodeVLD4DupInstruction"; 1569 } 1570 1571 def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1572 def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1573 def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1574 1575 def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1576 def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1577 def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1578 1579 // ...with double-spaced registers (not used for codegen): 1580 def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1581 def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1582 def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1583 1584 // ...with address register writeback: 1585 class VLD4DUPWB<bits<4> op7_4, string Dt> 1586 : NLdSt<1, 0b10, 0b1111, op7_4, 1587 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1588 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1589 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1590 "$Rn.addr = $wb", []> { 1591 let Inst{4} = Rn{4}; 1592 let DecoderMethod = "DecodeVLD4DupInstruction"; 1593 } 1594 1595 def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1596 def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1597 def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1598 1599 def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1600 def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1601 def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1602 1603 def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1604 def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1605 def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1606 1607 } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1608 1609 let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 1610 1611 // Classes for VST* pseudo-instructions with multi-register operands. 1612 // These are expanded to real instructions after register allocation. 1613 class VSTQPseudo<InstrItinClass itin> 1614 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1615 class VSTQWBPseudo<InstrItinClass itin> 1616 : PseudoNLdSt<(outs GPR:$wb), 1617 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1618 "$addr.addr = $wb">; 1619 class VSTQWBfixedPseudo<InstrItinClass itin> 1620 : PseudoNLdSt<(outs GPR:$wb), 1621 (ins addrmode6:$addr, QPR:$src), itin, 1622 "$addr.addr = $wb">; 1623 class VSTQWBregisterPseudo<InstrItinClass itin> 1624 : PseudoNLdSt<(outs GPR:$wb), 1625 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1626 "$addr.addr = $wb">; 1627 class VSTQQPseudo<InstrItinClass itin> 1628 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1629 class VSTQQWBPseudo<InstrItinClass itin> 1630 : PseudoNLdSt<(outs GPR:$wb), 1631 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1632 "$addr.addr = $wb">; 1633 class VSTQQWBfixedPseudo<InstrItinClass itin> 1634 : PseudoNLdSt<(outs GPR:$wb), 1635 (ins addrmode6:$addr, QQPR:$src), itin, 1636 "$addr.addr = $wb">; 1637 class VSTQQWBregisterPseudo<InstrItinClass itin> 1638 : PseudoNLdSt<(outs GPR:$wb), 1639 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1640 "$addr.addr = $wb">; 1641 1642 class VSTQQQQPseudo<InstrItinClass itin> 1643 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1644 class VSTQQQQWBPseudo<InstrItinClass itin> 1645 : PseudoNLdSt<(outs GPR:$wb), 1646 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1647 "$addr.addr = $wb">; 1648 1649 // VST1 : Vector Store (multiple single elements) 1650 class VST1D<bits<4> op7_4, string Dt, Operand AddrMode> 1651 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), 1652 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { 1653 let Rm = 0b1111; 1654 let Inst{4} = Rn{4}; 1655 let DecoderMethod = "DecodeVLDST1Instruction"; 1656 } 1657 class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode> 1658 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), 1659 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { 1660 let Rm = 0b1111; 1661 let Inst{5-4} = Rn{5-4}; 1662 let DecoderMethod = "DecodeVLDST1Instruction"; 1663 } 1664 1665 def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; 1666 def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; 1667 def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; 1668 def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; 1669 1670 def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; 1671 def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; 1672 def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; 1673 def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; 1674 1675 // ...with address register writeback: 1676 multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1677 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1678 (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1679 "vst1", Dt, "$Vd, $Rn!", 1680 "$Rn.addr = $wb", []> { 1681 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1682 let Inst{4} = Rn{4}; 1683 let DecoderMethod = "DecodeVLDST1Instruction"; 1684 } 1685 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1686 (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1687 IIC_VLD1u, 1688 "vst1", Dt, "$Vd, $Rn, $Rm", 1689 "$Rn.addr = $wb", []> { 1690 let Inst{4} = Rn{4}; 1691 let DecoderMethod = "DecodeVLDST1Instruction"; 1692 } 1693 } 1694 multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1695 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1696 (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1697 "vst1", Dt, "$Vd, $Rn!", 1698 "$Rn.addr = $wb", []> { 1699 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1700 let Inst{5-4} = Rn{5-4}; 1701 let DecoderMethod = "DecodeVLDST1Instruction"; 1702 } 1703 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1704 (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1705 IIC_VLD1x2u, 1706 "vst1", Dt, "$Vd, $Rn, $Rm", 1707 "$Rn.addr = $wb", []> { 1708 let Inst{5-4} = Rn{5-4}; 1709 let DecoderMethod = "DecodeVLDST1Instruction"; 1710 } 1711 } 1712 1713 defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; 1714 defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; 1715 defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; 1716 defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; 1717 1718 defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 1719 defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 1720 defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 1721 defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 1722 1723 // ...with 3 registers 1724 class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode> 1725 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1726 (ins AddrMode:$Rn, VecListThreeD:$Vd), 1727 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { 1728 let Rm = 0b1111; 1729 let Inst{4} = Rn{4}; 1730 let DecoderMethod = "DecodeVLDST1Instruction"; 1731 } 1732 multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1733 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1734 (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1735 "vst1", Dt, "$Vd, $Rn!", 1736 "$Rn.addr = $wb", []> { 1737 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1738 let Inst{5-4} = Rn{5-4}; 1739 let DecoderMethod = "DecodeVLDST1Instruction"; 1740 } 1741 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1742 (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1743 IIC_VLD1x3u, 1744 "vst1", Dt, "$Vd, $Rn, $Rm", 1745 "$Rn.addr = $wb", []> { 1746 let Inst{5-4} = Rn{5-4}; 1747 let DecoderMethod = "DecodeVLDST1Instruction"; 1748 } 1749 } 1750 1751 def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; 1752 def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; 1753 def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; 1754 def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; 1755 1756 defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; 1757 defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; 1758 defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; 1759 defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; 1760 1761 def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; 1762 def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>; 1763 def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; 1764 1765 // ...with 4 registers 1766 class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode> 1767 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1768 (ins AddrMode:$Rn, VecListFourD:$Vd), 1769 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1770 []> { 1771 let Rm = 0b1111; 1772 let Inst{5-4} = Rn{5-4}; 1773 let DecoderMethod = "DecodeVLDST1Instruction"; 1774 } 1775 multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1776 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1777 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1778 "vst1", Dt, "$Vd, $Rn!", 1779 "$Rn.addr = $wb", []> { 1780 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1781 let Inst{5-4} = Rn{5-4}; 1782 let DecoderMethod = "DecodeVLDST1Instruction"; 1783 } 1784 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1785 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1786 IIC_VLD1x4u, 1787 "vst1", Dt, "$Vd, $Rn, $Rm", 1788 "$Rn.addr = $wb", []> { 1789 let Inst{5-4} = Rn{5-4}; 1790 let DecoderMethod = "DecodeVLDST1Instruction"; 1791 } 1792 } 1793 1794 def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 1795 def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 1796 def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 1797 def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 1798 1799 defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1800 defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1801 defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1802 defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 1803 1804 def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; 1805 def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>; 1806 def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; 1807 1808 // VST2 : Vector Store (multiple 2-element structures) 1809 class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1810 InstrItinClass itin, Operand AddrMode> 1811 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), 1812 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1813 let Rm = 0b1111; 1814 let Inst{5-4} = Rn{5-4}; 1815 let DecoderMethod = "DecodeVLDST2Instruction"; 1816 } 1817 1818 def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, 1819 addrmode6align64or128>; 1820 def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, 1821 addrmode6align64or128>; 1822 def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, 1823 addrmode6align64or128>; 1824 1825 def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, 1826 addrmode6align64or128or256>; 1827 def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, 1828 addrmode6align64or128or256>; 1829 def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, 1830 addrmode6align64or128or256>; 1831 1832 def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; 1833 def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; 1834 def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; 1835 1836 // ...with address register writeback: 1837 multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1838 RegisterOperand VdTy, Operand AddrMode> { 1839 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1840 (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, 1841 "vst2", Dt, "$Vd, $Rn!", 1842 "$Rn.addr = $wb", []> { 1843 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1844 let Inst{5-4} = Rn{5-4}; 1845 let DecoderMethod = "DecodeVLDST2Instruction"; 1846 } 1847 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1848 (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1849 "vst2", Dt, "$Vd, $Rn, $Rm", 1850 "$Rn.addr = $wb", []> { 1851 let Inst{5-4} = Rn{5-4}; 1852 let DecoderMethod = "DecodeVLDST2Instruction"; 1853 } 1854 } 1855 multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1856 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1857 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1858 "vst2", Dt, "$Vd, $Rn!", 1859 "$Rn.addr = $wb", []> { 1860 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1861 let Inst{5-4} = Rn{5-4}; 1862 let DecoderMethod = "DecodeVLDST2Instruction"; 1863 } 1864 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1865 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1866 IIC_VLD1u, 1867 "vst2", Dt, "$Vd, $Rn, $Rm", 1868 "$Rn.addr = $wb", []> { 1869 let Inst{5-4} = Rn{5-4}; 1870 let DecoderMethod = "DecodeVLDST2Instruction"; 1871 } 1872 } 1873 1874 defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, 1875 addrmode6align64or128>; 1876 defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, 1877 addrmode6align64or128>; 1878 defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, 1879 addrmode6align64or128>; 1880 1881 defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1882 defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1883 defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1884 1885 def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1886 def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1887 def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1888 def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1889 def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1890 def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1891 1892 // ...with double-spaced registers 1893 def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, 1894 addrmode6align64or128>; 1895 def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, 1896 addrmode6align64or128>; 1897 def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, 1898 addrmode6align64or128>; 1899 defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, 1900 addrmode6align64or128>; 1901 defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, 1902 addrmode6align64or128>; 1903 defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, 1904 addrmode6align64or128>; 1905 1906 // VST3 : Vector Store (multiple 3-element structures) 1907 class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1908 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1909 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1910 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { 1911 let Rm = 0b1111; 1912 let Inst{4} = Rn{4}; 1913 let DecoderMethod = "DecodeVLDST3Instruction"; 1914 } 1915 1916 def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1917 def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1918 def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1919 1920 def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>; 1921 def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>; 1922 def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>; 1923 1924 // ...with address register writeback: 1925 class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1926 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1927 (ins addrmode6:$Rn, am6offset:$Rm, 1928 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 1929 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1930 "$Rn.addr = $wb", []> { 1931 let Inst{4} = Rn{4}; 1932 let DecoderMethod = "DecodeVLDST3Instruction"; 1933 } 1934 1935 def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 1936 def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 1937 def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 1938 1939 def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1940 def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1941 def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1942 1943 // ...with double-spaced registers: 1944 def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 1945 def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 1946 def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 1947 def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 1948 def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 1949 def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 1950 1951 def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1952 def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1953 def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1954 1955 // ...alternate versions to be allocated odd register numbers: 1956 def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1957 def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1958 def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1959 1960 def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1961 def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1962 def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1963 1964 // VST4 : Vector Store (multiple 4-element structures) 1965 class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 1966 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1967 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 1968 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 1969 "", []> { 1970 let Rm = 0b1111; 1971 let Inst{5-4} = Rn{5-4}; 1972 let DecoderMethod = "DecodeVLDST4Instruction"; 1973 } 1974 1975 def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 1976 def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 1977 def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 1978 1979 def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>; 1980 def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>; 1981 def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>; 1982 1983 // ...with address register writeback: 1984 class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1985 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1986 (ins addrmode6:$Rn, am6offset:$Rm, 1987 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 1988 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 1989 "$Rn.addr = $wb", []> { 1990 let Inst{5-4} = Rn{5-4}; 1991 let DecoderMethod = "DecodeVLDST4Instruction"; 1992 } 1993 1994 def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 1995 def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 1996 def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 1997 1998 def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1999 def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 2000 def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 2001 2002 // ...with double-spaced registers: 2003 def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 2004 def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 2005 def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 2006 def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 2007 def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 2008 def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 2009 2010 def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2011 def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2012 def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2013 2014 // ...alternate versions to be allocated odd register numbers: 2015 def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>; 2016 def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>; 2017 def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>; 2018 2019 def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2020 def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2021 def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2022 2023 } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2024 2025 // Classes for VST*LN pseudo-instructions with multi-register operands. 2026 // These are expanded to real instructions after register allocation. 2027 class VSTQLNPseudo<InstrItinClass itin> 2028 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 2029 itin, "">; 2030 class VSTQLNWBPseudo<InstrItinClass itin> 2031 : PseudoNLdSt<(outs GPR:$wb), 2032 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 2033 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2034 class VSTQQLNPseudo<InstrItinClass itin> 2035 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 2036 itin, "">; 2037 class VSTQQLNWBPseudo<InstrItinClass itin> 2038 : PseudoNLdSt<(outs GPR:$wb), 2039 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 2040 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2041 class VSTQQQQLNPseudo<InstrItinClass itin> 2042 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 2043 itin, "">; 2044 class VSTQQQQLNWBPseudo<InstrItinClass itin> 2045 : PseudoNLdSt<(outs GPR:$wb), 2046 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 2047 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2048 2049 // VST1LN : Vector Store (single element from one lane) 2050 class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2051 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 2052 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2053 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 2054 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 2055 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { 2056 let Rm = 0b1111; 2057 let DecoderMethod = "DecodeVST1LN"; 2058 } 2059 class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2060 : VSTQLNPseudo<IIC_VST1ln> { 2061 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2062 addrmode6:$addr)]; 2063 } 2064 2065 def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 2066 NEONvgetlaneu, addrmode6> { 2067 let Inst{7-5} = lane{2-0}; 2068 } 2069 def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 2070 NEONvgetlaneu, addrmode6> { 2071 let Inst{7-6} = lane{1-0}; 2072 let Inst{4} = Rn{4}; 2073 } 2074 2075 def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 2076 addrmode6oneL32> { 2077 let Inst{7} = lane{0}; 2078 let Inst{5-4} = Rn{5-4}; 2079 } 2080 2081 def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>; 2082 def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>; 2083 def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 2084 2085 def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 2086 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 2087 def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 2088 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2089 2090 // ...with address register writeback: 2091 class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2092 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 2093 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2094 (ins AdrMode:$Rn, am6offset:$Rm, 2095 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 2096 "\\{$Vd[$lane]\\}, $Rn$Rm", 2097 "$Rn.addr = $wb", 2098 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 2099 AdrMode:$Rn, am6offset:$Rm))]> { 2100 let DecoderMethod = "DecodeVST1LN"; 2101 } 2102 class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2103 : VSTQLNWBPseudo<IIC_VST1lnu> { 2104 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2105 addrmode6:$addr, am6offset:$offset))]; 2106 } 2107 2108 def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2109 NEONvgetlaneu, addrmode6> { 2110 let Inst{7-5} = lane{2-0}; 2111 } 2112 def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2113 NEONvgetlaneu, addrmode6> { 2114 let Inst{7-6} = lane{1-0}; 2115 let Inst{4} = Rn{4}; 2116 } 2117 def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2118 extractelt, addrmode6oneL32> { 2119 let Inst{7} = lane{0}; 2120 let Inst{5-4} = Rn{5-4}; 2121 } 2122 2123 def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; 2124 def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; 2125 def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2126 2127 let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 2128 2129 // VST2LN : Vector Store (single 2-element structure from one lane) 2130 class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2131 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2132 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2133 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2134 "", []> { 2135 let Rm = 0b1111; 2136 let Inst{4} = Rn{4}; 2137 let DecoderMethod = "DecodeVST2LN"; 2138 } 2139 2140 def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2141 let Inst{7-5} = lane{2-0}; 2142 } 2143 def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2144 let Inst{7-6} = lane{1-0}; 2145 } 2146 def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2147 let Inst{7} = lane{0}; 2148 } 2149 2150 def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2151 def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2152 def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2153 2154 // ...with double-spaced registers: 2155 def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2156 let Inst{7-6} = lane{1-0}; 2157 let Inst{4} = Rn{4}; 2158 } 2159 def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2160 let Inst{7} = lane{0}; 2161 let Inst{4} = Rn{4}; 2162 } 2163 2164 def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2165 def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2166 2167 // ...with address register writeback: 2168 class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2169 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2170 (ins addrmode6:$Rn, am6offset:$Rm, 2171 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2172 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2173 "$Rn.addr = $wb", []> { 2174 let Inst{4} = Rn{4}; 2175 let DecoderMethod = "DecodeVST2LN"; 2176 } 2177 2178 def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2179 let Inst{7-5} = lane{2-0}; 2180 } 2181 def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2182 let Inst{7-6} = lane{1-0}; 2183 } 2184 def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2185 let Inst{7} = lane{0}; 2186 } 2187 2188 def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2189 def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2190 def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2191 2192 def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2193 let Inst{7-6} = lane{1-0}; 2194 } 2195 def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2196 let Inst{7} = lane{0}; 2197 } 2198 2199 def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2200 def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2201 2202 // VST3LN : Vector Store (single 3-element structure from one lane) 2203 class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2204 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2205 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2206 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2207 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { 2208 let Rm = 0b1111; 2209 let DecoderMethod = "DecodeVST3LN"; 2210 } 2211 2212 def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2213 let Inst{7-5} = lane{2-0}; 2214 } 2215 def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2216 let Inst{7-6} = lane{1-0}; 2217 } 2218 def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2219 let Inst{7} = lane{0}; 2220 } 2221 2222 def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2223 def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2224 def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2225 2226 // ...with double-spaced registers: 2227 def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2228 let Inst{7-6} = lane{1-0}; 2229 } 2230 def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2231 let Inst{7} = lane{0}; 2232 } 2233 2234 def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2235 def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2236 2237 // ...with address register writeback: 2238 class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2239 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2240 (ins addrmode6:$Rn, am6offset:$Rm, 2241 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2242 IIC_VST3lnu, "vst3", Dt, 2243 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2244 "$Rn.addr = $wb", []> { 2245 let DecoderMethod = "DecodeVST3LN"; 2246 } 2247 2248 def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2249 let Inst{7-5} = lane{2-0}; 2250 } 2251 def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2252 let Inst{7-6} = lane{1-0}; 2253 } 2254 def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2255 let Inst{7} = lane{0}; 2256 } 2257 2258 def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2259 def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2260 def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2261 2262 def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2263 let Inst{7-6} = lane{1-0}; 2264 } 2265 def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2266 let Inst{7} = lane{0}; 2267 } 2268 2269 def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2270 def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2271 2272 // VST4LN : Vector Store (single 4-element structure from one lane) 2273 class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2274 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2275 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2276 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2277 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2278 "", []> { 2279 let Rm = 0b1111; 2280 let Inst{4} = Rn{4}; 2281 let DecoderMethod = "DecodeVST4LN"; 2282 } 2283 2284 def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2285 let Inst{7-5} = lane{2-0}; 2286 } 2287 def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2288 let Inst{7-6} = lane{1-0}; 2289 } 2290 def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2291 let Inst{7} = lane{0}; 2292 let Inst{5} = Rn{5}; 2293 } 2294 2295 def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2296 def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2297 def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2298 2299 // ...with double-spaced registers: 2300 def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2301 let Inst{7-6} = lane{1-0}; 2302 } 2303 def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2304 let Inst{7} = lane{0}; 2305 let Inst{5} = Rn{5}; 2306 } 2307 2308 def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2309 def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2310 2311 // ...with address register writeback: 2312 class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2313 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2314 (ins addrmode6:$Rn, am6offset:$Rm, 2315 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2316 IIC_VST4lnu, "vst4", Dt, 2317 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2318 "$Rn.addr = $wb", []> { 2319 let Inst{4} = Rn{4}; 2320 let DecoderMethod = "DecodeVST4LN"; 2321 } 2322 2323 def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2324 let Inst{7-5} = lane{2-0}; 2325 } 2326 def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2327 let Inst{7-6} = lane{1-0}; 2328 } 2329 def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2330 let Inst{7} = lane{0}; 2331 let Inst{5} = Rn{5}; 2332 } 2333 2334 def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2335 def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2336 def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2337 2338 def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2339 let Inst{7-6} = lane{1-0}; 2340 } 2341 def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2342 let Inst{7} = lane{0}; 2343 let Inst{5} = Rn{5}; 2344 } 2345 2346 def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2347 def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2348 2349 } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2350 2351 // Use vld1/vst1 for unaligned f64 load / store 2352 def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2353 (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; 2354 def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2355 (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2356 def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2357 (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; 2358 def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2359 (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2360 def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2361 (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; 2362 def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2363 (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; 2364 2365 // Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2366 // load / store if it's legal. 2367 def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2368 (VLD1q64 addrmode6:$addr)>; 2369 def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2370 (VST1q64 addrmode6:$addr, QPR:$value)>; 2371 def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2372 (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>; 2373 def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2374 (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2375 def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2376 (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; 2377 def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2378 (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2379 def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2380 (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>; 2381 def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2382 (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2383 2384 //===----------------------------------------------------------------------===// 2385 // NEON pattern fragments 2386 //===----------------------------------------------------------------------===// 2387 2388 // Extract D sub-registers of Q registers. 2389 def DSubReg_i8_reg : SDNodeXForm<imm, [{ 2390 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2391 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, SDLoc(N), 2392 MVT::i32); 2393 }]>; 2394 def DSubReg_i16_reg : SDNodeXForm<imm, [{ 2395 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2396 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, SDLoc(N), 2397 MVT::i32); 2398 }]>; 2399 def DSubReg_i32_reg : SDNodeXForm<imm, [{ 2400 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2401 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, SDLoc(N), 2402 MVT::i32); 2403 }]>; 2404 def DSubReg_f64_reg : SDNodeXForm<imm, [{ 2405 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2406 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), SDLoc(N), 2407 MVT::i32); 2408 }]>; 2409 2410 // Extract S sub-registers of Q/D registers. 2411 def SSubReg_f32_reg : SDNodeXForm<imm, [{ 2412 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 2413 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), SDLoc(N), 2414 MVT::i32); 2415 }]>; 2416 2417 // Translate lane numbers from Q registers to D subregs. 2418 def SubReg_i8_lane : SDNodeXForm<imm, [{ 2419 return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32); 2420 }]>; 2421 def SubReg_i16_lane : SDNodeXForm<imm, [{ 2422 return CurDAG->getTargetConstant(N->getZExtValue() & 3, SDLoc(N), MVT::i32); 2423 }]>; 2424 def SubReg_i32_lane : SDNodeXForm<imm, [{ 2425 return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i32); 2426 }]>; 2427 2428 //===----------------------------------------------------------------------===// 2429 // Instruction Classes 2430 //===----------------------------------------------------------------------===// 2431 2432 // Basic 2-register operations: double- and quad-register. 2433 class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2434 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2435 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2436 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2437 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2438 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2439 class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2440 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2441 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2442 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2443 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2444 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2445 2446 // Basic 2-register intrinsics, both double- and quad-register. 2447 class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2448 bits<2> op17_16, bits<5> op11_7, bit op4, 2449 InstrItinClass itin, string OpcodeStr, string Dt, 2450 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2451 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2452 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2453 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2454 class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2455 bits<2> op17_16, bits<5> op11_7, bit op4, 2456 InstrItinClass itin, string OpcodeStr, string Dt, 2457 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2458 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2459 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2460 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2461 2462 // Same as above, but not predicated. 2463 class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2464 InstrItinClass itin, string OpcodeStr, string Dt, 2465 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2466 : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), 2467 itin, OpcodeStr, Dt, 2468 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2469 2470 class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2471 InstrItinClass itin, string OpcodeStr, string Dt, 2472 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2473 : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), 2474 itin, OpcodeStr, Dt, 2475 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2476 2477 // Similar to NV2VQIntnp with some more encoding bits exposed (crypto). 2478 class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2479 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2480 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2481 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), 2482 itin, OpcodeStr, Dt, 2483 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2484 2485 // Same as N2VQIntXnp but with Vd as a src register. 2486 class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2487 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2488 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2489 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, 2490 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), 2491 itin, OpcodeStr, Dt, 2492 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { 2493 let Constraints = "$src = $Vd"; 2494 } 2495 2496 // Narrow 2-register operations. 2497 class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2498 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2499 InstrItinClass itin, string OpcodeStr, string Dt, 2500 ValueType TyD, ValueType TyQ, SDNode OpNode> 2501 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2502 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2503 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2504 2505 // Narrow 2-register intrinsics. 2506 class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2507 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2508 InstrItinClass itin, string OpcodeStr, string Dt, 2509 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2510 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2511 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2512 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2513 2514 // Long 2-register operations (currently only used for VMOVL). 2515 class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2516 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2517 InstrItinClass itin, string OpcodeStr, string Dt, 2518 ValueType TyQ, ValueType TyD, SDNode OpNode> 2519 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2520 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2521 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2522 2523 // Long 2-register intrinsics. 2524 class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2525 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2526 InstrItinClass itin, string OpcodeStr, string Dt, 2527 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2528 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2529 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2530 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2531 2532 // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2533 class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2534 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2535 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2536 OpcodeStr, Dt, "$Vd, $Vm", 2537 "$src1 = $Vd, $src2 = $Vm", []>; 2538 class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2539 InstrItinClass itin, string OpcodeStr, string Dt> 2540 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2541 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2542 "$src1 = $Vd, $src2 = $Vm", []>; 2543 2544 // Basic 3-register operations: double- and quad-register. 2545 class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2546 InstrItinClass itin, string OpcodeStr, string Dt, 2547 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2548 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2549 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2550 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2551 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2552 // All of these have a two-operand InstAlias. 2553 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2554 let isCommutable = Commutable; 2555 } 2556 // Same as N3VD but no data type. 2557 class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2558 InstrItinClass itin, string OpcodeStr, 2559 ValueType ResTy, ValueType OpTy, 2560 SDNode OpNode, bit Commutable> 2561 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2562 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2563 OpcodeStr, "$Vd, $Vn, $Vm", "", 2564 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2565 // All of these have a two-operand InstAlias. 2566 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2567 let isCommutable = Commutable; 2568 } 2569 2570 class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2571 InstrItinClass itin, string OpcodeStr, string Dt, 2572 ValueType Ty, SDNode ShOp> 2573 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2574 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2575 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2576 [(set (Ty DPR:$Vd), 2577 (Ty (ShOp (Ty DPR:$Vn), 2578 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2579 // All of these have a two-operand InstAlias. 2580 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2581 let isCommutable = 0; 2582 } 2583 class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2584 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2585 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2586 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2587 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2588 [(set (Ty DPR:$Vd), 2589 (Ty (ShOp (Ty DPR:$Vn), 2590 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2591 // All of these have a two-operand InstAlias. 2592 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2593 let isCommutable = 0; 2594 } 2595 2596 class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2597 InstrItinClass itin, string OpcodeStr, string Dt, 2598 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2599 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2600 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2601 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2602 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2603 // All of these have a two-operand InstAlias. 2604 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2605 let isCommutable = Commutable; 2606 } 2607 class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2608 InstrItinClass itin, string OpcodeStr, 2609 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2610 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2611 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2612 OpcodeStr, "$Vd, $Vn, $Vm", "", 2613 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2614 // All of these have a two-operand InstAlias. 2615 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2616 let isCommutable = Commutable; 2617 } 2618 class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2619 InstrItinClass itin, string OpcodeStr, string Dt, 2620 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2621 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2622 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2623 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2624 [(set (ResTy QPR:$Vd), 2625 (ResTy (ShOp (ResTy QPR:$Vn), 2626 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2627 imm:$lane)))))]> { 2628 // All of these have a two-operand InstAlias. 2629 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2630 let isCommutable = 0; 2631 } 2632 class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2633 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2634 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2635 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2636 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2637 [(set (ResTy QPR:$Vd), 2638 (ResTy (ShOp (ResTy QPR:$Vn), 2639 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2640 imm:$lane)))))]> { 2641 // All of these have a two-operand InstAlias. 2642 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2643 let isCommutable = 0; 2644 } 2645 2646 // Basic 3-register intrinsics, both double- and quad-register. 2647 class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2648 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2649 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2650 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2651 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2652 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2653 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2654 // All of these have a two-operand InstAlias. 2655 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2656 let isCommutable = Commutable; 2657 } 2658 2659 class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2660 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2661 string Dt, ValueType ResTy, ValueType OpTy, 2662 SDPatternOperator IntOp, bit Commutable> 2663 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2664 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 2665 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2666 2667 class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2668 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2669 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2670 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2671 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2672 [(set (Ty DPR:$Vd), 2673 (Ty (IntOp (Ty DPR:$Vn), 2674 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2675 imm:$lane)))))]> { 2676 let isCommutable = 0; 2677 } 2678 2679 class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2680 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2681 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2682 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2683 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2684 [(set (Ty DPR:$Vd), 2685 (Ty (IntOp (Ty DPR:$Vn), 2686 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2687 let isCommutable = 0; 2688 } 2689 class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2690 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2691 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2692 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2693 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2694 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2695 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2696 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2697 let isCommutable = 0; 2698 } 2699 2700 class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2701 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2702 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2703 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2704 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2705 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2706 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2707 // All of these have a two-operand InstAlias. 2708 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2709 let isCommutable = Commutable; 2710 } 2711 2712 class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2713 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2714 string Dt, ValueType ResTy, ValueType OpTy, 2715 SDPatternOperator IntOp, bit Commutable> 2716 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2717 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, 2718 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2719 2720 // Same as N3VQIntnp but with Vd as a src register. 2721 class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2722 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2723 string Dt, ValueType ResTy, ValueType OpTy, 2724 SDPatternOperator IntOp, bit Commutable> 2725 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2726 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), 2727 f, itin, OpcodeStr, Dt, 2728 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), 2729 (OpTy QPR:$Vm))))]> { 2730 let Constraints = "$src = $Vd"; 2731 } 2732 2733 class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2734 string OpcodeStr, string Dt, 2735 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2736 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2737 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2738 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2739 [(set (ResTy QPR:$Vd), 2740 (ResTy (IntOp (ResTy QPR:$Vn), 2741 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2742 imm:$lane)))))]> { 2743 let isCommutable = 0; 2744 } 2745 class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2746 string OpcodeStr, string Dt, 2747 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2748 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2749 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2750 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2751 [(set (ResTy QPR:$Vd), 2752 (ResTy (IntOp (ResTy QPR:$Vn), 2753 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2754 imm:$lane)))))]> { 2755 let isCommutable = 0; 2756 } 2757 class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2758 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2759 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2760 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2761 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2762 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2763 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2764 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2765 let isCommutable = 0; 2766 } 2767 2768 // Multiply-Add/Sub operations: double- and quad-register. 2769 class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2770 InstrItinClass itin, string OpcodeStr, string Dt, 2771 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2772 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2773 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2774 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2775 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2776 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2777 2778 class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2779 string OpcodeStr, string Dt, 2780 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2781 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2782 (outs DPR:$Vd), 2783 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2784 NVMulSLFrm, itin, 2785 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2786 [(set (Ty DPR:$Vd), 2787 (Ty (ShOp (Ty DPR:$src1), 2788 (Ty (MulOp DPR:$Vn, 2789 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2790 imm:$lane)))))))]>; 2791 class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2792 string OpcodeStr, string Dt, 2793 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2794 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2795 (outs DPR:$Vd), 2796 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2797 NVMulSLFrm, itin, 2798 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2799 [(set (Ty DPR:$Vd), 2800 (Ty (ShOp (Ty DPR:$src1), 2801 (Ty (MulOp DPR:$Vn, 2802 (Ty (NEONvduplane (Ty DPR_8:$Vm), 2803 imm:$lane)))))))]>; 2804 2805 class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2806 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2807 SDPatternOperator MulOp, SDPatternOperator OpNode> 2808 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2809 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2810 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2811 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2812 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2813 class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2814 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2815 SDPatternOperator MulOp, SDPatternOperator ShOp> 2816 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2817 (outs QPR:$Vd), 2818 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2819 NVMulSLFrm, itin, 2820 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2821 [(set (ResTy QPR:$Vd), 2822 (ResTy (ShOp (ResTy QPR:$src1), 2823 (ResTy (MulOp QPR:$Vn, 2824 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2825 imm:$lane)))))))]>; 2826 class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2827 string OpcodeStr, string Dt, 2828 ValueType ResTy, ValueType OpTy, 2829 SDPatternOperator MulOp, SDPatternOperator ShOp> 2830 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2831 (outs QPR:$Vd), 2832 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2833 NVMulSLFrm, itin, 2834 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2835 [(set (ResTy QPR:$Vd), 2836 (ResTy (ShOp (ResTy QPR:$src1), 2837 (ResTy (MulOp QPR:$Vn, 2838 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2839 imm:$lane)))))))]>; 2840 2841 // Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2842 class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2843 InstrItinClass itin, string OpcodeStr, string Dt, 2844 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2845 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2846 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2847 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2848 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2849 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2850 class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2851 InstrItinClass itin, string OpcodeStr, string Dt, 2852 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2853 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2854 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2855 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2856 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2857 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2858 2859 // Neon 3-argument intrinsics, both double- and quad-register. 2860 // The destination register is also used as the first source operand register. 2861 class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2862 InstrItinClass itin, string OpcodeStr, string Dt, 2863 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2864 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2865 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2866 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2867 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2868 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2869 class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2870 InstrItinClass itin, string OpcodeStr, string Dt, 2871 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2872 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2873 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2874 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2875 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2876 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2877 2878 // Long Multiply-Add/Sub operations. 2879 class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2880 InstrItinClass itin, string OpcodeStr, string Dt, 2881 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2882 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2883 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2884 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2885 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2886 (TyQ (MulOp (TyD DPR:$Vn), 2887 (TyD DPR:$Vm)))))]>; 2888 class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2889 InstrItinClass itin, string OpcodeStr, string Dt, 2890 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2891 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2892 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2893 NVMulSLFrm, itin, 2894 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2895 [(set QPR:$Vd, 2896 (OpNode (TyQ QPR:$src1), 2897 (TyQ (MulOp (TyD DPR:$Vn), 2898 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm), 2899 imm:$lane))))))]>; 2900 class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2901 InstrItinClass itin, string OpcodeStr, string Dt, 2902 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2903 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2904 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2905 NVMulSLFrm, itin, 2906 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2907 [(set QPR:$Vd, 2908 (OpNode (TyQ QPR:$src1), 2909 (TyQ (MulOp (TyD DPR:$Vn), 2910 (TyD (NEONvduplane (TyD DPR_8:$Vm), 2911 imm:$lane))))))]>; 2912 2913 // Long Intrinsic-Op vector operations with explicit extend (VABAL). 2914 class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2915 InstrItinClass itin, string OpcodeStr, string Dt, 2916 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2917 SDNode OpNode> 2918 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2919 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2920 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2921 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2922 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2923 (TyD DPR:$Vm)))))))]>; 2924 2925 // Neon Long 3-argument intrinsic. The destination register is 2926 // a quad-register and is also used as the first source operand register. 2927 class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2928 InstrItinClass itin, string OpcodeStr, string Dt, 2929 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2930 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2931 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2932 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2933 [(set QPR:$Vd, 2934 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 2935 class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2936 string OpcodeStr, string Dt, 2937 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2938 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2939 (outs QPR:$Vd), 2940 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2941 NVMulSLFrm, itin, 2942 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2943 [(set (ResTy QPR:$Vd), 2944 (ResTy (IntOp (ResTy QPR:$src1), 2945 (OpTy DPR:$Vn), 2946 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2947 imm:$lane)))))]>; 2948 class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2949 InstrItinClass itin, string OpcodeStr, string Dt, 2950 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2951 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2952 (outs QPR:$Vd), 2953 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2954 NVMulSLFrm, itin, 2955 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2956 [(set (ResTy QPR:$Vd), 2957 (ResTy (IntOp (ResTy QPR:$src1), 2958 (OpTy DPR:$Vn), 2959 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2960 imm:$lane)))))]>; 2961 2962 // Narrowing 3-register intrinsics. 2963 class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2964 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 2965 SDPatternOperator IntOp, bit Commutable> 2966 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2967 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 2968 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2969 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 2970 let isCommutable = Commutable; 2971 } 2972 2973 // Long 3-register operations. 2974 class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2975 InstrItinClass itin, string OpcodeStr, string Dt, 2976 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 2977 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2978 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2979 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2980 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2981 let isCommutable = Commutable; 2982 } 2983 2984 class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2985 InstrItinClass itin, string OpcodeStr, string Dt, 2986 ValueType TyQ, ValueType TyD, SDNode OpNode> 2987 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2988 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2989 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2990 [(set QPR:$Vd, 2991 (TyQ (OpNode (TyD DPR:$Vn), 2992 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 2993 class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2994 InstrItinClass itin, string OpcodeStr, string Dt, 2995 ValueType TyQ, ValueType TyD, SDNode OpNode> 2996 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2997 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2998 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2999 [(set QPR:$Vd, 3000 (TyQ (OpNode (TyD DPR:$Vn), 3001 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 3002 3003 // Long 3-register operations with explicitly extended operands. 3004 class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3005 InstrItinClass itin, string OpcodeStr, string Dt, 3006 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 3007 bit Commutable> 3008 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3009 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3010 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3011 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 3012 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3013 let isCommutable = Commutable; 3014 } 3015 3016 // Long 3-register intrinsics with explicit extend (VABDL). 3017 class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3018 InstrItinClass itin, string OpcodeStr, string Dt, 3019 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 3020 bit Commutable> 3021 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3022 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3023 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3024 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3025 (TyD DPR:$Vm))))))]> { 3026 let isCommutable = Commutable; 3027 } 3028 3029 // Long 3-register intrinsics. 3030 class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3031 InstrItinClass itin, string OpcodeStr, string Dt, 3032 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 3033 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3034 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3035 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3036 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3037 let isCommutable = Commutable; 3038 } 3039 3040 // Same as above, but not predicated. 3041 class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 3042 bit op4, InstrItinClass itin, string OpcodeStr, 3043 string Dt, ValueType ResTy, ValueType OpTy, 3044 SDPatternOperator IntOp, bit Commutable> 3045 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 3046 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 3047 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 3048 3049 class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3050 string OpcodeStr, string Dt, 3051 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3052 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3053 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3054 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3055 [(set (ResTy QPR:$Vd), 3056 (ResTy (IntOp (OpTy DPR:$Vn), 3057 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 3058 imm:$lane)))))]>; 3059 class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3060 InstrItinClass itin, string OpcodeStr, string Dt, 3061 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3062 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3063 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3064 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3065 [(set (ResTy QPR:$Vd), 3066 (ResTy (IntOp (OpTy DPR:$Vn), 3067 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 3068 imm:$lane)))))]>; 3069 3070 // Wide 3-register operations. 3071 class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3072 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 3073 SDNode OpNode, SDNode ExtOp, bit Commutable> 3074 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3075 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 3076 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3077 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 3078 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3079 // All of these have a two-operand InstAlias. 3080 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3081 let isCommutable = Commutable; 3082 } 3083 3084 // Pairwise long 2-register intrinsics, both double- and quad-register. 3085 class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3086 bits<2> op17_16, bits<5> op11_7, bit op4, 3087 string OpcodeStr, string Dt, 3088 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3089 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 3090 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3091 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 3092 class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3093 bits<2> op17_16, bits<5> op11_7, bit op4, 3094 string OpcodeStr, string Dt, 3095 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3096 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 3097 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3098 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 3099 3100 // Pairwise long 2-register accumulate intrinsics, 3101 // both double- and quad-register. 3102 // The destination register is also used as the first source operand register. 3103 class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3104 bits<2> op17_16, bits<5> op11_7, bit op4, 3105 string OpcodeStr, string Dt, 3106 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3107 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 3108 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 3109 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3110 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 3111 class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3112 bits<2> op17_16, bits<5> op11_7, bit op4, 3113 string OpcodeStr, string Dt, 3114 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3115 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 3116 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 3117 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3118 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 3119 3120 // Shift by immediate, 3121 // both double- and quad-register. 3122 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3123 class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3124 Format f, InstrItinClass itin, Operand ImmTy, 3125 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3126 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3127 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 3128 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3129 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 3130 class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3131 Format f, InstrItinClass itin, Operand ImmTy, 3132 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3133 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3134 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 3135 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3136 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 3137 } 3138 3139 // Long shift by immediate. 3140 class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3141 string OpcodeStr, string Dt, 3142 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3143 SDPatternOperator OpNode> 3144 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3145 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 3146 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3147 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>; 3148 3149 // Narrow shift by immediate. 3150 class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3151 InstrItinClass itin, string OpcodeStr, string Dt, 3152 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3153 SDPatternOperator OpNode> 3154 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3155 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 3156 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3157 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3158 (i32 ImmTy:$SIMM))))]>; 3159 3160 // Shift right by immediate and accumulate, 3161 // both double- and quad-register. 3162 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3163 class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3164 Operand ImmTy, string OpcodeStr, string Dt, 3165 ValueType Ty, SDNode ShOp> 3166 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3167 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3168 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3169 [(set DPR:$Vd, (Ty (add DPR:$src1, 3170 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3171 class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3172 Operand ImmTy, string OpcodeStr, string Dt, 3173 ValueType Ty, SDNode ShOp> 3174 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3175 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3176 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3177 [(set QPR:$Vd, (Ty (add QPR:$src1, 3178 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3179 } 3180 3181 // Shift by immediate and insert, 3182 // both double- and quad-register. 3183 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3184 class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3185 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3186 ValueType Ty,SDNode ShOp> 3187 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3188 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3189 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3190 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3191 class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3192 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3193 ValueType Ty,SDNode ShOp> 3194 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3195 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3196 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3197 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3198 } 3199 3200 // Convert, with fractional bits immediate, 3201 // both double- and quad-register. 3202 class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3203 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3204 SDPatternOperator IntOp> 3205 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3206 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3207 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3208 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3209 class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3210 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3211 SDPatternOperator IntOp> 3212 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3213 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3214 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3215 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3216 3217 //===----------------------------------------------------------------------===// 3218 // Multiclasses 3219 //===----------------------------------------------------------------------===// 3220 3221 // Abbreviations used in multiclass suffixes: 3222 // Q = quarter int (8 bit) elements 3223 // H = half int (16 bit) elements 3224 // S = single int (32 bit) elements 3225 // D = double int (64 bit) elements 3226 3227 // Neon 2-register vector operations and intrinsics. 3228 3229 // Neon 2-register comparisons. 3230 // source operand element sizes of 8, 16 and 32 bits: 3231 multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3232 bits<5> op11_7, bit op4, string opc, string Dt, 3233 string asm, SDNode OpNode> { 3234 // 64-bit vector types. 3235 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3236 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3237 opc, !strconcat(Dt, "8"), asm, "", 3238 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; 3239 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3240 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3241 opc, !strconcat(Dt, "16"), asm, "", 3242 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; 3243 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3244 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3245 opc, !strconcat(Dt, "32"), asm, "", 3246 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; 3247 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3248 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3249 opc, "f32", asm, "", 3250 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { 3251 let Inst{10} = 1; // overwrite F = 1 3252 } 3253 def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3254 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3255 opc, "f16", asm, "", 3256 [(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>, 3257 Requires<[HasNEON,HasFullFP16]> { 3258 let Inst{10} = 1; // overwrite F = 1 3259 } 3260 3261 // 128-bit vector types. 3262 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3263 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3264 opc, !strconcat(Dt, "8"), asm, "", 3265 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; 3266 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3267 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3268 opc, !strconcat(Dt, "16"), asm, "", 3269 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; 3270 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3271 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3272 opc, !strconcat(Dt, "32"), asm, "", 3273 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; 3274 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3275 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3276 opc, "f32", asm, "", 3277 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { 3278 let Inst{10} = 1; // overwrite F = 1 3279 } 3280 def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3281 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3282 opc, "f16", asm, "", 3283 [(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>, 3284 Requires<[HasNEON,HasFullFP16]> { 3285 let Inst{10} = 1; // overwrite F = 1 3286 } 3287 } 3288 3289 3290 // Neon 2-register vector intrinsics, 3291 // element sizes of 8, 16 and 32 bits: 3292 multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3293 bits<5> op11_7, bit op4, 3294 InstrItinClass itinD, InstrItinClass itinQ, 3295 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3296 // 64-bit vector types. 3297 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3298 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3299 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3300 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3301 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3302 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3303 3304 // 128-bit vector types. 3305 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3306 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3307 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3308 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3309 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3310 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3311 } 3312 3313 3314 // Neon Narrowing 2-register vector operations, 3315 // source operand element sizes of 16, 32 and 64 bits: 3316 multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3317 bits<5> op11_7, bit op6, bit op4, 3318 InstrItinClass itin, string OpcodeStr, string Dt, 3319 SDNode OpNode> { 3320 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3321 itin, OpcodeStr, !strconcat(Dt, "16"), 3322 v8i8, v8i16, OpNode>; 3323 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3324 itin, OpcodeStr, !strconcat(Dt, "32"), 3325 v4i16, v4i32, OpNode>; 3326 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3327 itin, OpcodeStr, !strconcat(Dt, "64"), 3328 v2i32, v2i64, OpNode>; 3329 } 3330 3331 // Neon Narrowing 2-register vector intrinsics, 3332 // source operand element sizes of 16, 32 and 64 bits: 3333 multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3334 bits<5> op11_7, bit op6, bit op4, 3335 InstrItinClass itin, string OpcodeStr, string Dt, 3336 SDPatternOperator IntOp> { 3337 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3338 itin, OpcodeStr, !strconcat(Dt, "16"), 3339 v8i8, v8i16, IntOp>; 3340 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3341 itin, OpcodeStr, !strconcat(Dt, "32"), 3342 v4i16, v4i32, IntOp>; 3343 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3344 itin, OpcodeStr, !strconcat(Dt, "64"), 3345 v2i32, v2i64, IntOp>; 3346 } 3347 3348 3349 // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3350 // source operand element sizes of 16, 32 and 64 bits: 3351 multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3352 string OpcodeStr, string Dt, SDNode OpNode> { 3353 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3354 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3355 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3356 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3357 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3358 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3359 } 3360 3361 3362 // Neon 3-register vector operations. 3363 3364 // First with only element sizes of 8, 16 and 32 bits: 3365 multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3366 InstrItinClass itinD16, InstrItinClass itinD32, 3367 InstrItinClass itinQ16, InstrItinClass itinQ32, 3368 string OpcodeStr, string Dt, 3369 SDNode OpNode, bit Commutable = 0> { 3370 // 64-bit vector types. 3371 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3372 OpcodeStr, !strconcat(Dt, "8"), 3373 v8i8, v8i8, OpNode, Commutable>; 3374 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3375 OpcodeStr, !strconcat(Dt, "16"), 3376 v4i16, v4i16, OpNode, Commutable>; 3377 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3378 OpcodeStr, !strconcat(Dt, "32"), 3379 v2i32, v2i32, OpNode, Commutable>; 3380 3381 // 128-bit vector types. 3382 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3383 OpcodeStr, !strconcat(Dt, "8"), 3384 v16i8, v16i8, OpNode, Commutable>; 3385 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3386 OpcodeStr, !strconcat(Dt, "16"), 3387 v8i16, v8i16, OpNode, Commutable>; 3388 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3389 OpcodeStr, !strconcat(Dt, "32"), 3390 v4i32, v4i32, OpNode, Commutable>; 3391 } 3392 3393 multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3394 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3395 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3396 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3397 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3398 v4i32, v2i32, ShOp>; 3399 } 3400 3401 // ....then also with element size 64 bits: 3402 multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3403 InstrItinClass itinD, InstrItinClass itinQ, 3404 string OpcodeStr, string Dt, 3405 SDNode OpNode, bit Commutable = 0> 3406 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3407 OpcodeStr, Dt, OpNode, Commutable> { 3408 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3409 OpcodeStr, !strconcat(Dt, "64"), 3410 v1i64, v1i64, OpNode, Commutable>; 3411 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3412 OpcodeStr, !strconcat(Dt, "64"), 3413 v2i64, v2i64, OpNode, Commutable>; 3414 } 3415 3416 3417 // Neon 3-register vector intrinsics. 3418 3419 // First with only element sizes of 16 and 32 bits: 3420 multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3421 InstrItinClass itinD16, InstrItinClass itinD32, 3422 InstrItinClass itinQ16, InstrItinClass itinQ32, 3423 string OpcodeStr, string Dt, 3424 SDPatternOperator IntOp, bit Commutable = 0> { 3425 // 64-bit vector types. 3426 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3427 OpcodeStr, !strconcat(Dt, "16"), 3428 v4i16, v4i16, IntOp, Commutable>; 3429 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3430 OpcodeStr, !strconcat(Dt, "32"), 3431 v2i32, v2i32, IntOp, Commutable>; 3432 3433 // 128-bit vector types. 3434 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3435 OpcodeStr, !strconcat(Dt, "16"), 3436 v8i16, v8i16, IntOp, Commutable>; 3437 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3438 OpcodeStr, !strconcat(Dt, "32"), 3439 v4i32, v4i32, IntOp, Commutable>; 3440 } 3441 multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3442 InstrItinClass itinD16, InstrItinClass itinD32, 3443 InstrItinClass itinQ16, InstrItinClass itinQ32, 3444 string OpcodeStr, string Dt, 3445 SDPatternOperator IntOp> { 3446 // 64-bit vector types. 3447 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3448 OpcodeStr, !strconcat(Dt, "16"), 3449 v4i16, v4i16, IntOp>; 3450 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3451 OpcodeStr, !strconcat(Dt, "32"), 3452 v2i32, v2i32, IntOp>; 3453 3454 // 128-bit vector types. 3455 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3456 OpcodeStr, !strconcat(Dt, "16"), 3457 v8i16, v8i16, IntOp>; 3458 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3459 OpcodeStr, !strconcat(Dt, "32"), 3460 v4i32, v4i32, IntOp>; 3461 } 3462 3463 multiclass N3VIntSL_HS<bits<4> op11_8, 3464 InstrItinClass itinD16, InstrItinClass itinD32, 3465 InstrItinClass itinQ16, InstrItinClass itinQ32, 3466 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3467 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3468 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3469 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3470 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3471 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3472 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3473 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3474 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3475 } 3476 3477 // ....then also with element size of 8 bits: 3478 multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3479 InstrItinClass itinD16, InstrItinClass itinD32, 3480 InstrItinClass itinQ16, InstrItinClass itinQ32, 3481 string OpcodeStr, string Dt, 3482 SDPatternOperator IntOp, bit Commutable = 0> 3483 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3484 OpcodeStr, Dt, IntOp, Commutable> { 3485 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3486 OpcodeStr, !strconcat(Dt, "8"), 3487 v8i8, v8i8, IntOp, Commutable>; 3488 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3489 OpcodeStr, !strconcat(Dt, "8"), 3490 v16i8, v16i8, IntOp, Commutable>; 3491 } 3492 multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3493 InstrItinClass itinD16, InstrItinClass itinD32, 3494 InstrItinClass itinQ16, InstrItinClass itinQ32, 3495 string OpcodeStr, string Dt, 3496 SDPatternOperator IntOp> 3497 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3498 OpcodeStr, Dt, IntOp> { 3499 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3500 OpcodeStr, !strconcat(Dt, "8"), 3501 v8i8, v8i8, IntOp>; 3502 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3503 OpcodeStr, !strconcat(Dt, "8"), 3504 v16i8, v16i8, IntOp>; 3505 } 3506 3507 3508 // ....then also with element size of 64 bits: 3509 multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3510 InstrItinClass itinD16, InstrItinClass itinD32, 3511 InstrItinClass itinQ16, InstrItinClass itinQ32, 3512 string OpcodeStr, string Dt, 3513 SDPatternOperator IntOp, bit Commutable = 0> 3514 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3515 OpcodeStr, Dt, IntOp, Commutable> { 3516 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3517 OpcodeStr, !strconcat(Dt, "64"), 3518 v1i64, v1i64, IntOp, Commutable>; 3519 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3520 OpcodeStr, !strconcat(Dt, "64"), 3521 v2i64, v2i64, IntOp, Commutable>; 3522 } 3523 multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3524 InstrItinClass itinD16, InstrItinClass itinD32, 3525 InstrItinClass itinQ16, InstrItinClass itinQ32, 3526 string OpcodeStr, string Dt, 3527 SDPatternOperator IntOp> 3528 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3529 OpcodeStr, Dt, IntOp> { 3530 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3531 OpcodeStr, !strconcat(Dt, "64"), 3532 v1i64, v1i64, IntOp>; 3533 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3534 OpcodeStr, !strconcat(Dt, "64"), 3535 v2i64, v2i64, IntOp>; 3536 } 3537 3538 // Neon Narrowing 3-register vector intrinsics, 3539 // source operand element sizes of 16, 32 and 64 bits: 3540 multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3541 string OpcodeStr, string Dt, 3542 SDPatternOperator IntOp, bit Commutable = 0> { 3543 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3544 OpcodeStr, !strconcat(Dt, "16"), 3545 v8i8, v8i16, IntOp, Commutable>; 3546 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3547 OpcodeStr, !strconcat(Dt, "32"), 3548 v4i16, v4i32, IntOp, Commutable>; 3549 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3550 OpcodeStr, !strconcat(Dt, "64"), 3551 v2i32, v2i64, IntOp, Commutable>; 3552 } 3553 3554 3555 // Neon Long 3-register vector operations. 3556 3557 multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3558 InstrItinClass itin16, InstrItinClass itin32, 3559 string OpcodeStr, string Dt, 3560 SDNode OpNode, bit Commutable = 0> { 3561 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3562 OpcodeStr, !strconcat(Dt, "8"), 3563 v8i16, v8i8, OpNode, Commutable>; 3564 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3565 OpcodeStr, !strconcat(Dt, "16"), 3566 v4i32, v4i16, OpNode, Commutable>; 3567 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3568 OpcodeStr, !strconcat(Dt, "32"), 3569 v2i64, v2i32, OpNode, Commutable>; 3570 } 3571 3572 multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3573 InstrItinClass itin, string OpcodeStr, string Dt, 3574 SDNode OpNode> { 3575 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3576 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3577 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3578 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3579 } 3580 3581 multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3582 InstrItinClass itin16, InstrItinClass itin32, 3583 string OpcodeStr, string Dt, 3584 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3585 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3586 OpcodeStr, !strconcat(Dt, "8"), 3587 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3588 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3589 OpcodeStr, !strconcat(Dt, "16"), 3590 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3591 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3592 OpcodeStr, !strconcat(Dt, "32"), 3593 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3594 } 3595 3596 // Neon Long 3-register vector intrinsics. 3597 3598 // First with only element sizes of 16 and 32 bits: 3599 multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3600 InstrItinClass itin16, InstrItinClass itin32, 3601 string OpcodeStr, string Dt, 3602 SDPatternOperator IntOp, bit Commutable = 0> { 3603 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3604 OpcodeStr, !strconcat(Dt, "16"), 3605 v4i32, v4i16, IntOp, Commutable>; 3606 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3607 OpcodeStr, !strconcat(Dt, "32"), 3608 v2i64, v2i32, IntOp, Commutable>; 3609 } 3610 3611 multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3612 InstrItinClass itin, string OpcodeStr, string Dt, 3613 SDPatternOperator IntOp> { 3614 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3615 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3616 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3617 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3618 } 3619 3620 // ....then also with element size of 8 bits: 3621 multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3622 InstrItinClass itin16, InstrItinClass itin32, 3623 string OpcodeStr, string Dt, 3624 SDPatternOperator IntOp, bit Commutable = 0> 3625 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3626 IntOp, Commutable> { 3627 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3628 OpcodeStr, !strconcat(Dt, "8"), 3629 v8i16, v8i8, IntOp, Commutable>; 3630 } 3631 3632 // ....with explicit extend (VABDL). 3633 multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3634 InstrItinClass itin, string OpcodeStr, string Dt, 3635 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3636 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3637 OpcodeStr, !strconcat(Dt, "8"), 3638 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3639 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3640 OpcodeStr, !strconcat(Dt, "16"), 3641 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3642 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3643 OpcodeStr, !strconcat(Dt, "32"), 3644 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3645 } 3646 3647 3648 // Neon Wide 3-register vector intrinsics, 3649 // source operand element sizes of 8, 16 and 32 bits: 3650 multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3651 string OpcodeStr, string Dt, 3652 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3653 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3654 OpcodeStr, !strconcat(Dt, "8"), 3655 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3656 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3657 OpcodeStr, !strconcat(Dt, "16"), 3658 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3659 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3660 OpcodeStr, !strconcat(Dt, "32"), 3661 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3662 } 3663 3664 3665 // Neon Multiply-Op vector operations, 3666 // element sizes of 8, 16 and 32 bits: 3667 multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3668 InstrItinClass itinD16, InstrItinClass itinD32, 3669 InstrItinClass itinQ16, InstrItinClass itinQ32, 3670 string OpcodeStr, string Dt, SDNode OpNode> { 3671 // 64-bit vector types. 3672 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3673 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3674 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3675 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3676 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3677 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3678 3679 // 128-bit vector types. 3680 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3681 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3682 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3683 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3684 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3685 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3686 } 3687 3688 multiclass N3VMulOpSL_HS<bits<4> op11_8, 3689 InstrItinClass itinD16, InstrItinClass itinD32, 3690 InstrItinClass itinQ16, InstrItinClass itinQ32, 3691 string OpcodeStr, string Dt, SDPatternOperator ShOp> { 3692 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3693 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3694 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3695 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3696 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3697 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3698 mul, ShOp>; 3699 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3700 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3701 mul, ShOp>; 3702 } 3703 3704 // Neon Intrinsic-Op vector operations, 3705 // element sizes of 8, 16 and 32 bits: 3706 multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3707 InstrItinClass itinD, InstrItinClass itinQ, 3708 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3709 SDNode OpNode> { 3710 // 64-bit vector types. 3711 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3712 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3713 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3714 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3715 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3716 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3717 3718 // 128-bit vector types. 3719 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3720 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3721 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3722 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3723 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3724 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3725 } 3726 3727 // Neon 3-argument intrinsics, 3728 // element sizes of 16 and 32 bits: 3729 multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3730 InstrItinClass itinD16, InstrItinClass itinD32, 3731 InstrItinClass itinQ16, InstrItinClass itinQ32, 3732 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3733 // 64-bit vector types. 3734 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16, 3735 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3736 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32, 3737 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3738 3739 // 128-bit vector types. 3740 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16, 3741 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3742 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32, 3743 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3744 } 3745 3746 // element sizes of 8, 16 and 32 bits: 3747 multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3748 InstrItinClass itinD16, InstrItinClass itinD32, 3749 InstrItinClass itinQ16, InstrItinClass itinQ32, 3750 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3751 :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32, 3752 itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{ 3753 // 64-bit vector types. 3754 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16, 3755 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3756 // 128-bit vector types. 3757 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16, 3758 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3759 } 3760 3761 // Neon Long Multiply-Op vector operations, 3762 // element sizes of 8, 16 and 32 bits: 3763 multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3764 InstrItinClass itin16, InstrItinClass itin32, 3765 string OpcodeStr, string Dt, SDNode MulOp, 3766 SDNode OpNode> { 3767 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3768 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3769 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3770 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3771 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3772 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3773 } 3774 3775 multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3776 string Dt, SDNode MulOp, SDNode OpNode> { 3777 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3778 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3779 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3780 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3781 } 3782 3783 3784 // Neon Long 3-argument intrinsics. 3785 3786 // First with only element sizes of 16 and 32 bits: 3787 multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3788 InstrItinClass itin16, InstrItinClass itin32, 3789 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3790 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3791 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3792 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3793 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3794 } 3795 3796 multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3797 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3798 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3799 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3800 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3801 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3802 } 3803 3804 // ....then also with element size of 8 bits: 3805 multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3806 InstrItinClass itin16, InstrItinClass itin32, 3807 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3808 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3809 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3810 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3811 } 3812 3813 // ....with explicit extend (VABAL). 3814 multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3815 InstrItinClass itin, string OpcodeStr, string Dt, 3816 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3817 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3818 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3819 IntOp, ExtOp, OpNode>; 3820 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3821 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3822 IntOp, ExtOp, OpNode>; 3823 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3824 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3825 IntOp, ExtOp, OpNode>; 3826 } 3827 3828 3829 // Neon Pairwise long 2-register intrinsics, 3830 // element sizes of 8, 16 and 32 bits: 3831 multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3832 bits<5> op11_7, bit op4, 3833 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3834 // 64-bit vector types. 3835 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3836 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3837 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3838 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3839 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3840 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3841 3842 // 128-bit vector types. 3843 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3844 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3845 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3846 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3847 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3848 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3849 } 3850 3851 3852 // Neon Pairwise long 2-register accumulate intrinsics, 3853 // element sizes of 8, 16 and 32 bits: 3854 multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3855 bits<5> op11_7, bit op4, 3856 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3857 // 64-bit vector types. 3858 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3859 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3860 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3861 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3862 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3863 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3864 3865 // 128-bit vector types. 3866 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3867 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3868 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3869 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3870 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3871 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3872 } 3873 3874 3875 // Neon 2-register vector shift by immediate, 3876 // with f of either N2RegVShLFrm or N2RegVShRFrm 3877 // element sizes of 8, 16, 32 and 64 bits: 3878 multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3879 InstrItinClass itin, string OpcodeStr, string Dt, 3880 SDNode OpNode> { 3881 // 64-bit vector types. 3882 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3883 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3884 let Inst{21-19} = 0b001; // imm6 = 001xxx 3885 } 3886 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3887 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3888 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3889 } 3890 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3891 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3892 let Inst{21} = 0b1; // imm6 = 1xxxxx 3893 } 3894 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3895 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3896 // imm6 = xxxxxx 3897 3898 // 128-bit vector types. 3899 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3900 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3901 let Inst{21-19} = 0b001; // imm6 = 001xxx 3902 } 3903 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3904 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3905 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3906 } 3907 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3908 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3909 let Inst{21} = 0b1; // imm6 = 1xxxxx 3910 } 3911 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3912 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3913 // imm6 = xxxxxx 3914 } 3915 multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3916 InstrItinClass itin, string OpcodeStr, string Dt, 3917 string baseOpc, SDNode OpNode> { 3918 // 64-bit vector types. 3919 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3920 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3921 let Inst{21-19} = 0b001; // imm6 = 001xxx 3922 } 3923 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3924 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3925 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3926 } 3927 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3928 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3929 let Inst{21} = 0b1; // imm6 = 1xxxxx 3930 } 3931 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3932 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3933 // imm6 = xxxxxx 3934 3935 // 128-bit vector types. 3936 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3937 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3938 let Inst{21-19} = 0b001; // imm6 = 001xxx 3939 } 3940 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3941 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3942 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3943 } 3944 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3945 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3946 let Inst{21} = 0b1; // imm6 = 1xxxxx 3947 } 3948 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3949 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3950 // imm6 = xxxxxx 3951 } 3952 3953 // Neon Shift-Accumulate vector operations, 3954 // element sizes of 8, 16, 32 and 64 bits: 3955 multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3956 string OpcodeStr, string Dt, SDNode ShOp> { 3957 // 64-bit vector types. 3958 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3959 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 3960 let Inst{21-19} = 0b001; // imm6 = 001xxx 3961 } 3962 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3963 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 3964 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3965 } 3966 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3967 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 3968 let Inst{21} = 0b1; // imm6 = 1xxxxx 3969 } 3970 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3971 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 3972 // imm6 = xxxxxx 3973 3974 // 128-bit vector types. 3975 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3976 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 3977 let Inst{21-19} = 0b001; // imm6 = 001xxx 3978 } 3979 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3980 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 3981 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3982 } 3983 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3984 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 3985 let Inst{21} = 0b1; // imm6 = 1xxxxx 3986 } 3987 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3988 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 3989 // imm6 = xxxxxx 3990 } 3991 3992 // Neon Shift-Insert vector operations, 3993 // with f of either N2RegVShLFrm or N2RegVShRFrm 3994 // element sizes of 8, 16, 32 and 64 bits: 3995 multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3996 string OpcodeStr> { 3997 // 64-bit vector types. 3998 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3999 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> { 4000 let Inst{21-19} = 0b001; // imm6 = 001xxx 4001 } 4002 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4003 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> { 4004 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4005 } 4006 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4007 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> { 4008 let Inst{21} = 0b1; // imm6 = 1xxxxx 4009 } 4010 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 4011 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>; 4012 // imm6 = xxxxxx 4013 4014 // 128-bit vector types. 4015 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4016 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> { 4017 let Inst{21-19} = 0b001; // imm6 = 001xxx 4018 } 4019 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4020 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> { 4021 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4022 } 4023 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4024 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> { 4025 let Inst{21} = 0b1; // imm6 = 1xxxxx 4026 } 4027 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 4028 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>; 4029 // imm6 = xxxxxx 4030 } 4031 multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4032 string OpcodeStr> { 4033 // 64-bit vector types. 4034 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4035 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> { 4036 let Inst{21-19} = 0b001; // imm6 = 001xxx 4037 } 4038 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4039 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> { 4040 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4041 } 4042 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4043 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> { 4044 let Inst{21} = 0b1; // imm6 = 1xxxxx 4045 } 4046 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4047 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>; 4048 // imm6 = xxxxxx 4049 4050 // 128-bit vector types. 4051 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4052 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> { 4053 let Inst{21-19} = 0b001; // imm6 = 001xxx 4054 } 4055 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4056 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> { 4057 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4058 } 4059 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4060 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> { 4061 let Inst{21} = 0b1; // imm6 = 1xxxxx 4062 } 4063 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4064 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>; 4065 // imm6 = xxxxxx 4066 } 4067 4068 // Neon Shift Long operations, 4069 // element sizes of 8, 16, 32 bits: 4070 multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4071 bit op4, string OpcodeStr, string Dt, 4072 SDPatternOperator OpNode> { 4073 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4074 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 4075 let Inst{21-19} = 0b001; // imm6 = 001xxx 4076 } 4077 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4078 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 4079 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4080 } 4081 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4082 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 4083 let Inst{21} = 0b1; // imm6 = 1xxxxx 4084 } 4085 } 4086 4087 // Neon Shift Narrow operations, 4088 // element sizes of 16, 32, 64 bits: 4089 multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4090 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 4091 SDPatternOperator OpNode> { 4092 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4093 OpcodeStr, !strconcat(Dt, "16"), 4094 v8i8, v8i16, shr_imm8, OpNode> { 4095 let Inst{21-19} = 0b001; // imm6 = 001xxx 4096 } 4097 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4098 OpcodeStr, !strconcat(Dt, "32"), 4099 v4i16, v4i32, shr_imm16, OpNode> { 4100 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4101 } 4102 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4103 OpcodeStr, !strconcat(Dt, "64"), 4104 v2i32, v2i64, shr_imm32, OpNode> { 4105 let Inst{21} = 0b1; // imm6 = 1xxxxx 4106 } 4107 } 4108 4109 //===----------------------------------------------------------------------===// 4110 // Instruction Definitions. 4111 //===----------------------------------------------------------------------===// 4112 4113 // Vector Add Operations. 4114 4115 // VADD : Vector Add (integer and floating-point) 4116 defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 4117 add, 1>; 4118 def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 4119 v2f32, v2f32, fadd, 1>; 4120 def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 4121 v4f32, v4f32, fadd, 1>; 4122 def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16", 4123 v4f16, v4f16, fadd, 1>, 4124 Requires<[HasNEON,HasFullFP16]>; 4125 def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16", 4126 v8f16, v8f16, fadd, 1>, 4127 Requires<[HasNEON,HasFullFP16]>; 4128 // VADDL : Vector Add Long (Q = D + D) 4129 defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4130 "vaddl", "s", add, sext, 1>; 4131 defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4132 "vaddl", "u", add, zext, 1>; 4133 // VADDW : Vector Add Wide (Q = Q + D) 4134 defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 4135 defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 4136 // VHADD : Vector Halving Add 4137 defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 4138 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4139 "vhadd", "s", int_arm_neon_vhadds, 1>; 4140 defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 4141 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4142 "vhadd", "u", int_arm_neon_vhaddu, 1>; 4143 // VRHADD : Vector Rounding Halving Add 4144 defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 4145 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4146 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 4147 defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 4148 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4149 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 4150 // VQADD : Vector Saturating Add 4151 defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 4152 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4153 "vqadd", "s", int_arm_neon_vqadds, 1>; 4154 defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 4155 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4156 "vqadd", "u", int_arm_neon_vqaddu, 1>; 4157 // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 4158 defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; 4159 // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 4160 defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 4161 int_arm_neon_vraddhn, 1>; 4162 4163 def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4164 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; 4165 def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4166 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; 4167 def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4168 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; 4169 4170 // Vector Multiply Operations. 4171 4172 // VMUL : Vector Multiply (integer, polynomial and floating-point) 4173 defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 4174 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 4175 def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 4176 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 4177 def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 4178 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 4179 def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 4180 v2f32, v2f32, fmul, 1>; 4181 def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 4182 v4f32, v4f32, fmul, 1>; 4183 def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16", 4184 v4f16, v4f16, fmul, 1>, 4185 Requires<[HasNEON,HasFullFP16]>; 4186 def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16", 4187 v8f16, v8f16, fmul, 1>, 4188 Requires<[HasNEON,HasFullFP16]>; 4189 defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 4190 def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 4191 def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 4192 v2f32, fmul>; 4193 def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>, 4194 Requires<[HasNEON,HasFullFP16]>; 4195 def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16, 4196 v4f16, fmul>, 4197 Requires<[HasNEON,HasFullFP16]>; 4198 4199 def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 4200 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 4201 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 4202 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4203 (DSubReg_i16_reg imm:$lane))), 4204 (SubReg_i16_lane imm:$lane)))>; 4205 def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 4206 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 4207 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4208 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4209 (DSubReg_i32_reg imm:$lane))), 4210 (SubReg_i32_lane imm:$lane)))>; 4211 def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4212 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 4213 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4214 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4215 (DSubReg_i32_reg imm:$lane))), 4216 (SubReg_i32_lane imm:$lane)))>; 4217 4218 4219 def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), 4220 (VMULslfd DPR:$Rn, 4221 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4222 (i32 0))>; 4223 def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), 4224 (VMULslfq QPR:$Rn, 4225 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4226 (i32 0))>; 4227 4228 4229 // VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4230 defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4231 IIC_VMULi16Q, IIC_VMULi32Q, 4232 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4233 defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4234 IIC_VMULi16Q, IIC_VMULi32Q, 4235 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4236 def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4237 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4238 imm:$lane)))), 4239 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4240 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4241 (DSubReg_i16_reg imm:$lane))), 4242 (SubReg_i16_lane imm:$lane)))>; 4243 def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4244 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4245 imm:$lane)))), 4246 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4247 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4248 (DSubReg_i32_reg imm:$lane))), 4249 (SubReg_i32_lane imm:$lane)))>; 4250 4251 // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4252 defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4253 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4254 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4255 defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4256 IIC_VMULi16Q, IIC_VMULi32Q, 4257 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4258 def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4259 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4260 imm:$lane)))), 4261 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4262 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4263 (DSubReg_i16_reg imm:$lane))), 4264 (SubReg_i16_lane imm:$lane)))>; 4265 def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4266 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4267 imm:$lane)))), 4268 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4269 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4270 (DSubReg_i32_reg imm:$lane))), 4271 (SubReg_i32_lane imm:$lane)))>; 4272 4273 // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4274 let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 4275 DecoderNamespace = "NEONData" in { 4276 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4277 "vmull", "s", NEONvmulls, 1>; 4278 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4279 "vmull", "u", NEONvmullu, 1>; 4280 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4281 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4282 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, 4283 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, 4284 Requires<[HasV8, HasCrypto]>; 4285 } 4286 defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; 4287 defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; 4288 4289 // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4290 defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4291 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4292 defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4293 "vqdmull", "s", int_arm_neon_vqdmull>; 4294 4295 // Vector Multiply-Accumulate and Multiply-Subtract Operations. 4296 4297 // VMLA : Vector Multiply Accumulate (integer and floating-point) 4298 defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4299 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4300 def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4301 v2f32, fmul_su, fadd_mlx>, 4302 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4303 def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4304 v4f32, fmul_su, fadd_mlx>, 4305 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4306 def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16", 4307 v4f16, fmul_su, fadd_mlx>, 4308 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; 4309 def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16", 4310 v8f16, fmul_su, fadd_mlx>, 4311 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; 4312 defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4313 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4314 def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4315 v2f32, fmul_su, fadd_mlx>, 4316 Requires<[HasNEON, UseFPVMLx]>; 4317 def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4318 v4f32, v2f32, fmul_su, fadd_mlx>, 4319 Requires<[HasNEON, UseFPVMLx]>; 4320 def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16", 4321 v4f16, fmul, fadd>, 4322 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4323 def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16", 4324 v8f16, v4f16, fmul, fadd>, 4325 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4326 4327 def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4328 (mul (v8i16 QPR:$src2), 4329 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4330 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4331 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4332 (DSubReg_i16_reg imm:$lane))), 4333 (SubReg_i16_lane imm:$lane)))>; 4334 4335 def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4336 (mul (v4i32 QPR:$src2), 4337 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4338 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4339 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4340 (DSubReg_i32_reg imm:$lane))), 4341 (SubReg_i32_lane imm:$lane)))>; 4342 4343 def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4344 (fmul_su (v4f32 QPR:$src2), 4345 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4346 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4347 (v4f32 QPR:$src2), 4348 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4349 (DSubReg_i32_reg imm:$lane))), 4350 (SubReg_i32_lane imm:$lane)))>, 4351 Requires<[HasNEON, UseFPVMLx]>; 4352 4353 // VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4354 defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4355 "vmlal", "s", NEONvmulls, add>; 4356 defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4357 "vmlal", "u", NEONvmullu, add>; 4358 4359 defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; 4360 defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; 4361 4362 let Predicates = [HasNEON, HasV8_1a] in { 4363 // v8.1a Neon Rounding Double Multiply-Op vector operations, 4364 // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long 4365 // (Q += D * D) 4366 defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, 4367 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4368 null_frag>; 4369 def : Pat<(v4i16 (int_arm_neon_vqadds 4370 (v4i16 DPR:$src1), 4371 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4372 (v4i16 DPR:$Vm))))), 4373 (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4374 def : Pat<(v2i32 (int_arm_neon_vqadds 4375 (v2i32 DPR:$src1), 4376 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4377 (v2i32 DPR:$Vm))))), 4378 (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4379 def : Pat<(v8i16 (int_arm_neon_vqadds 4380 (v8i16 QPR:$src1), 4381 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4382 (v8i16 QPR:$Vm))))), 4383 (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4384 def : Pat<(v4i32 (int_arm_neon_vqadds 4385 (v4i32 QPR:$src1), 4386 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4387 (v4i32 QPR:$Vm))))), 4388 (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4389 4390 defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, 4391 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4392 null_frag>; 4393 def : Pat<(v4i16 (int_arm_neon_vqadds 4394 (v4i16 DPR:$src1), 4395 (v4i16 (int_arm_neon_vqrdmulh 4396 (v4i16 DPR:$Vn), 4397 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4398 imm:$lane)))))), 4399 (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, 4400 imm:$lane))>; 4401 def : Pat<(v2i32 (int_arm_neon_vqadds 4402 (v2i32 DPR:$src1), 4403 (v2i32 (int_arm_neon_vqrdmulh 4404 (v2i32 DPR:$Vn), 4405 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4406 imm:$lane)))))), 4407 (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4408 imm:$lane))>; 4409 def : Pat<(v8i16 (int_arm_neon_vqadds 4410 (v8i16 QPR:$src1), 4411 (v8i16 (int_arm_neon_vqrdmulh 4412 (v8i16 QPR:$src2), 4413 (v8i16 (NEONvduplane (v8i16 QPR:$src3), 4414 imm:$lane)))))), 4415 (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), 4416 (v8i16 QPR:$src2), 4417 (v4i16 (EXTRACT_SUBREG 4418 QPR:$src3, 4419 (DSubReg_i16_reg imm:$lane))), 4420 (SubReg_i16_lane imm:$lane)))>; 4421 def : Pat<(v4i32 (int_arm_neon_vqadds 4422 (v4i32 QPR:$src1), 4423 (v4i32 (int_arm_neon_vqrdmulh 4424 (v4i32 QPR:$src2), 4425 (v4i32 (NEONvduplane (v4i32 QPR:$src3), 4426 imm:$lane)))))), 4427 (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), 4428 (v4i32 QPR:$src2), 4429 (v2i32 (EXTRACT_SUBREG 4430 QPR:$src3, 4431 (DSubReg_i32_reg imm:$lane))), 4432 (SubReg_i32_lane imm:$lane)))>; 4433 4434 // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long 4435 // (Q -= D * D) 4436 defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, 4437 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4438 null_frag>; 4439 def : Pat<(v4i16 (int_arm_neon_vqsubs 4440 (v4i16 DPR:$src1), 4441 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4442 (v4i16 DPR:$Vm))))), 4443 (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4444 def : Pat<(v2i32 (int_arm_neon_vqsubs 4445 (v2i32 DPR:$src1), 4446 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4447 (v2i32 DPR:$Vm))))), 4448 (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4449 def : Pat<(v8i16 (int_arm_neon_vqsubs 4450 (v8i16 QPR:$src1), 4451 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4452 (v8i16 QPR:$Vm))))), 4453 (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4454 def : Pat<(v4i32 (int_arm_neon_vqsubs 4455 (v4i32 QPR:$src1), 4456 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4457 (v4i32 QPR:$Vm))))), 4458 (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4459 4460 defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, 4461 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4462 null_frag>; 4463 def : Pat<(v4i16 (int_arm_neon_vqsubs 4464 (v4i16 DPR:$src1), 4465 (v4i16 (int_arm_neon_vqrdmulh 4466 (v4i16 DPR:$Vn), 4467 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4468 imm:$lane)))))), 4469 (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; 4470 def : Pat<(v2i32 (int_arm_neon_vqsubs 4471 (v2i32 DPR:$src1), 4472 (v2i32 (int_arm_neon_vqrdmulh 4473 (v2i32 DPR:$Vn), 4474 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4475 imm:$lane)))))), 4476 (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4477 imm:$lane))>; 4478 def : Pat<(v8i16 (int_arm_neon_vqsubs 4479 (v8i16 QPR:$src1), 4480 (v8i16 (int_arm_neon_vqrdmulh 4481 (v8i16 QPR:$src2), 4482 (v8i16 (NEONvduplane (v8i16 QPR:$src3), 4483 imm:$lane)))))), 4484 (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), 4485 (v8i16 QPR:$src2), 4486 (v4i16 (EXTRACT_SUBREG 4487 QPR:$src3, 4488 (DSubReg_i16_reg imm:$lane))), 4489 (SubReg_i16_lane imm:$lane)))>; 4490 def : Pat<(v4i32 (int_arm_neon_vqsubs 4491 (v4i32 QPR:$src1), 4492 (v4i32 (int_arm_neon_vqrdmulh 4493 (v4i32 QPR:$src2), 4494 (v4i32 (NEONvduplane (v4i32 QPR:$src3), 4495 imm:$lane)))))), 4496 (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), 4497 (v4i32 QPR:$src2), 4498 (v2i32 (EXTRACT_SUBREG 4499 QPR:$src3, 4500 (DSubReg_i32_reg imm:$lane))), 4501 (SubReg_i32_lane imm:$lane)))>; 4502 } 4503 // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4504 defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4505 "vqdmlal", "s", null_frag>; 4506 defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; 4507 4508 def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4509 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4510 (v4i16 DPR:$Vm))))), 4511 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4512 def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4513 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4514 (v2i32 DPR:$Vm))))), 4515 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4516 def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4517 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4518 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4519 imm:$lane)))))), 4520 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4521 def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4522 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4523 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4524 imm:$lane)))))), 4525 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4526 4527 // VMLS : Vector Multiply Subtract (integer and floating-point) 4528 defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4529 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4530 def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4531 v2f32, fmul_su, fsub_mlx>, 4532 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4533 def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4534 v4f32, fmul_su, fsub_mlx>, 4535 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4536 def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16", 4537 v4f16, fmul, fsub>, 4538 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; 4539 def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16", 4540 v8f16, fmul, fsub>, 4541 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; 4542 defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4543 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4544 def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4545 v2f32, fmul_su, fsub_mlx>, 4546 Requires<[HasNEON, UseFPVMLx]>; 4547 def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4548 v4f32, v2f32, fmul_su, fsub_mlx>, 4549 Requires<[HasNEON, UseFPVMLx]>; 4550 def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16", 4551 v4f16, fmul, fsub>, 4552 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4553 def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16", 4554 v8f16, v4f16, fmul, fsub>, 4555 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4556 4557 def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4558 (mul (v8i16 QPR:$src2), 4559 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4560 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4561 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4562 (DSubReg_i16_reg imm:$lane))), 4563 (SubReg_i16_lane imm:$lane)))>; 4564 4565 def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4566 (mul (v4i32 QPR:$src2), 4567 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4568 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4569 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4570 (DSubReg_i32_reg imm:$lane))), 4571 (SubReg_i32_lane imm:$lane)))>; 4572 4573 def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4574 (fmul_su (v4f32 QPR:$src2), 4575 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4576 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4577 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4578 (DSubReg_i32_reg imm:$lane))), 4579 (SubReg_i32_lane imm:$lane)))>, 4580 Requires<[HasNEON, UseFPVMLx]>; 4581 4582 // VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4583 defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4584 "vmlsl", "s", NEONvmulls, sub>; 4585 defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4586 "vmlsl", "u", NEONvmullu, sub>; 4587 4588 defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; 4589 defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; 4590 4591 // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4592 defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4593 "vqdmlsl", "s", null_frag>; 4594 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; 4595 4596 def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4597 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4598 (v4i16 DPR:$Vm))))), 4599 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4600 def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4601 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4602 (v2i32 DPR:$Vm))))), 4603 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4604 def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4605 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4606 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4607 imm:$lane)))))), 4608 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4609 def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4610 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4611 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4612 imm:$lane)))))), 4613 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4614 4615 // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4616 def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4617 v2f32, fmul_su, fadd_mlx>, 4618 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4619 4620 def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4621 v4f32, fmul_su, fadd_mlx>, 4622 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4623 def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16", 4624 v4f16, fmul, fadd>, 4625 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4626 4627 def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16", 4628 v8f16, fmul, fadd>, 4629 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4630 4631 // Fused Vector Multiply Subtract (floating-point) 4632 def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4633 v2f32, fmul_su, fsub_mlx>, 4634 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4635 def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4636 v4f32, fmul_su, fsub_mlx>, 4637 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4638 def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16", 4639 v4f16, fmul, fsub>, 4640 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4641 def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16", 4642 v8f16, fmul, fsub>, 4643 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4644 4645 // Match @llvm.fma.* intrinsics 4646 def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4647 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4648 Requires<[HasVFP4]>; 4649 def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4650 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4651 Requires<[HasVFP4]>; 4652 def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4653 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4654 Requires<[HasVFP4]>; 4655 def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4656 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4657 Requires<[HasVFP4]>; 4658 4659 // Vector Subtract Operations. 4660 4661 // VSUB : Vector Subtract (integer and floating-point) 4662 defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 4663 "vsub", "i", sub, 0>; 4664 def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 4665 v2f32, v2f32, fsub, 0>; 4666 def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 4667 v4f32, v4f32, fsub, 0>; 4668 def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16", 4669 v4f16, v4f16, fsub, 0>, 4670 Requires<[HasNEON,HasFullFP16]>; 4671 def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16", 4672 v8f16, v8f16, fsub, 0>, 4673 Requires<[HasNEON,HasFullFP16]>; 4674 // VSUBL : Vector Subtract Long (Q = D - D) 4675 defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4676 "vsubl", "s", sub, sext, 0>; 4677 defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4678 "vsubl", "u", sub, zext, 0>; 4679 // VSUBW : Vector Subtract Wide (Q = Q - D) 4680 defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 4681 defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 4682 // VHSUB : Vector Halving Subtract 4683 defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 4684 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4685 "vhsub", "s", int_arm_neon_vhsubs, 0>; 4686 defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 4687 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4688 "vhsub", "u", int_arm_neon_vhsubu, 0>; 4689 // VQSUB : Vector Saturing Subtract 4690 defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 4691 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4692 "vqsub", "s", int_arm_neon_vqsubs, 0>; 4693 defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 4694 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4695 "vqsub", "u", int_arm_neon_vqsubu, 0>; 4696 // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 4697 defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; 4698 // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 4699 defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 4700 int_arm_neon_vrsubhn, 0>; 4701 4702 def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4703 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; 4704 def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4705 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; 4706 def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4707 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; 4708 4709 // Vector Comparisons. 4710 4711 // VCEQ : Vector Compare Equal 4712 defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4713 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 4714 def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 4715 NEONvceq, 1>; 4716 def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 4717 NEONvceq, 1>; 4718 def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, 4719 NEONvceq, 1>, 4720 Requires<[HasNEON, HasFullFP16]>; 4721 def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, 4722 NEONvceq, 1>, 4723 Requires<[HasNEON, HasFullFP16]>; 4724 4725 let TwoOperandAliasConstraint = "$Vm = $Vd" in 4726 defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 4727 "$Vd, $Vm, #0", NEONvceqz>; 4728 4729 // VCGE : Vector Compare Greater Than or Equal 4730 defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4731 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 4732 defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4733 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 4734 def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 4735 NEONvcge, 0>; 4736 def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 4737 NEONvcge, 0>; 4738 def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, 4739 NEONvcge, 0>, 4740 Requires<[HasNEON, HasFullFP16]>; 4741 def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, 4742 NEONvcge, 0>, 4743 Requires<[HasNEON, HasFullFP16]>; 4744 4745 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4746 defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 4747 "$Vd, $Vm, #0", NEONvcgez>; 4748 defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 4749 "$Vd, $Vm, #0", NEONvclez>; 4750 } 4751 4752 // VCGT : Vector Compare Greater Than 4753 defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4754 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 4755 defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4756 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 4757 def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 4758 NEONvcgt, 0>; 4759 def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 4760 NEONvcgt, 0>; 4761 def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, 4762 NEONvcgt, 0>, 4763 Requires<[HasNEON, HasFullFP16]>; 4764 def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, 4765 NEONvcgt, 0>, 4766 Requires<[HasNEON, HasFullFP16]>; 4767 4768 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4769 defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 4770 "$Vd, $Vm, #0", NEONvcgtz>; 4771 defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 4772 "$Vd, $Vm, #0", NEONvcltz>; 4773 } 4774 4775 // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 4776 def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 4777 "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; 4778 def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 4779 "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; 4780 def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 4781 "f16", v4i16, v4f16, int_arm_neon_vacge, 0>, 4782 Requires<[HasNEON, HasFullFP16]>; 4783 def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 4784 "f16", v8i16, v8f16, int_arm_neon_vacge, 0>, 4785 Requires<[HasNEON, HasFullFP16]>; 4786 // VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 4787 def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 4788 "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; 4789 def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 4790 "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; 4791 def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 4792 "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>, 4793 Requires<[HasNEON, HasFullFP16]>; 4794 def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 4795 "f16", v8f16, v8f16, int_arm_neon_vacgt, 0>, 4796 Requires<[HasNEON, HasFullFP16]>; 4797 // VTST : Vector Test Bits 4798 defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 4799 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 4800 4801 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 4802 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4803 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 4804 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4805 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 4806 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4807 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 4808 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4809 let Predicates = [HasNEON, HasFullFP16] in { 4810 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 4811 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4812 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 4813 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4814 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 4815 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4816 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 4817 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4818 } 4819 4820 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 4821 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4822 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 4823 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4824 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 4825 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4826 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 4827 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4828 let Predicates = [HasNEON, HasFullFP16] in { 4829 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 4830 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4831 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 4832 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4833 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 4834 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4835 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 4836 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4837 } 4838 4839 // Vector Bitwise Operations. 4840 4841 def vnotd : PatFrag<(ops node:$in), 4842 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 4843 def vnotq : PatFrag<(ops node:$in), 4844 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 4845 4846 4847 // VAND : Vector Bitwise AND 4848 def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 4849 v2i32, v2i32, and, 1>; 4850 def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 4851 v4i32, v4i32, and, 1>; 4852 4853 // VEOR : Vector Bitwise Exclusive OR 4854 def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 4855 v2i32, v2i32, xor, 1>; 4856 def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 4857 v4i32, v4i32, xor, 1>; 4858 4859 // VORR : Vector Bitwise OR 4860 def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 4861 v2i32, v2i32, or, 1>; 4862 def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 4863 v4i32, v4i32, or, 1>; 4864 4865 def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 4866 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4867 IIC_VMOVImm, 4868 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4869 [(set DPR:$Vd, 4870 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4871 let Inst{9} = SIMM{9}; 4872 } 4873 4874 def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 4875 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4876 IIC_VMOVImm, 4877 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4878 [(set DPR:$Vd, 4879 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4880 let Inst{10-9} = SIMM{10-9}; 4881 } 4882 4883 def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 4884 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4885 IIC_VMOVImm, 4886 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4887 [(set QPR:$Vd, 4888 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4889 let Inst{9} = SIMM{9}; 4890 } 4891 4892 def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 4893 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4894 IIC_VMOVImm, 4895 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4896 [(set QPR:$Vd, 4897 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4898 let Inst{10-9} = SIMM{10-9}; 4899 } 4900 4901 4902 // VBIC : Vector Bitwise Bit Clear (AND NOT) 4903 let TwoOperandAliasConstraint = "$Vn = $Vd" in { 4904 def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4905 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4906 "vbic", "$Vd, $Vn, $Vm", "", 4907 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 4908 (vnotd DPR:$Vm))))]>; 4909 def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4910 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4911 "vbic", "$Vd, $Vn, $Vm", "", 4912 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 4913 (vnotq QPR:$Vm))))]>; 4914 } 4915 4916 def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 4917 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4918 IIC_VMOVImm, 4919 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4920 [(set DPR:$Vd, 4921 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4922 let Inst{9} = SIMM{9}; 4923 } 4924 4925 def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 4926 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4927 IIC_VMOVImm, 4928 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4929 [(set DPR:$Vd, 4930 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4931 let Inst{10-9} = SIMM{10-9}; 4932 } 4933 4934 def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 4935 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4936 IIC_VMOVImm, 4937 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4938 [(set QPR:$Vd, 4939 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4940 let Inst{9} = SIMM{9}; 4941 } 4942 4943 def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 4944 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4945 IIC_VMOVImm, 4946 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4947 [(set QPR:$Vd, 4948 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4949 let Inst{10-9} = SIMM{10-9}; 4950 } 4951 4952 // VORN : Vector Bitwise OR NOT 4953 def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 4954 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4955 "vorn", "$Vd, $Vn, $Vm", "", 4956 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 4957 (vnotd DPR:$Vm))))]>; 4958 def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 4959 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4960 "vorn", "$Vd, $Vn, $Vm", "", 4961 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 4962 (vnotq QPR:$Vm))))]>; 4963 4964 // VMVN : Vector Bitwise NOT (Immediate) 4965 4966 let isReMaterializable = 1 in { 4967 4968 def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 4969 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4970 "vmvn", "i16", "$Vd, $SIMM", "", 4971 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { 4972 let Inst{9} = SIMM{9}; 4973 } 4974 4975 def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 4976 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4977 "vmvn", "i16", "$Vd, $SIMM", "", 4978 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { 4979 let Inst{9} = SIMM{9}; 4980 } 4981 4982 def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 4983 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4984 "vmvn", "i32", "$Vd, $SIMM", "", 4985 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { 4986 let Inst{11-8} = SIMM{11-8}; 4987 } 4988 4989 def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 4990 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4991 "vmvn", "i32", "$Vd, $SIMM", "", 4992 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { 4993 let Inst{11-8} = SIMM{11-8}; 4994 } 4995 } 4996 4997 // VMVN : Vector Bitwise NOT 4998 def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 4999 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 5000 "vmvn", "$Vd, $Vm", "", 5001 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 5002 def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 5003 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 5004 "vmvn", "$Vd, $Vm", "", 5005 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 5006 def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 5007 def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 5008 5009 // VBSL : Vector Bitwise Select 5010 def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5011 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5012 N3RegFrm, IIC_VCNTiD, 5013 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5014 [(set DPR:$Vd, 5015 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 5016 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 5017 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 5018 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 5019 Requires<[HasNEON]>; 5020 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 5021 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 5022 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 5023 Requires<[HasNEON]>; 5024 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 5025 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 5026 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 5027 Requires<[HasNEON]>; 5028 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 5029 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 5030 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 5031 Requires<[HasNEON]>; 5032 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 5033 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 5034 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 5035 Requires<[HasNEON]>; 5036 5037 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 5038 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5039 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 5040 Requires<[HasNEON]>; 5041 5042 def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 5043 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5044 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 5045 Requires<[HasNEON]>; 5046 5047 def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5048 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5049 N3RegFrm, IIC_VCNTiQ, 5050 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5051 [(set QPR:$Vd, 5052 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 5053 5054 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 5055 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 5056 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 5057 Requires<[HasNEON]>; 5058 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 5059 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 5060 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 5061 Requires<[HasNEON]>; 5062 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 5063 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 5064 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 5065 Requires<[HasNEON]>; 5066 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 5067 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 5068 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 5069 Requires<[HasNEON]>; 5070 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 5071 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 5072 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 5073 Requires<[HasNEON]>; 5074 5075 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 5076 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5077 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 5078 Requires<[HasNEON]>; 5079 def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 5080 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5081 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 5082 Requires<[HasNEON]>; 5083 5084 // VBIF : Vector Bitwise Insert if False 5085 // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 5086 // FIXME: This instruction's encoding MAY NOT BE correct. 5087 def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 5088 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5089 N3RegFrm, IIC_VBINiD, 5090 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5091 []>; 5092 def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 5093 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5094 N3RegFrm, IIC_VBINiQ, 5095 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5096 []>; 5097 5098 // VBIT : Vector Bitwise Insert if True 5099 // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 5100 // FIXME: This instruction's encoding MAY NOT BE correct. 5101 def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 5102 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5103 N3RegFrm, IIC_VBINiD, 5104 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5105 []>; 5106 def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 5107 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5108 N3RegFrm, IIC_VBINiQ, 5109 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5110 []>; 5111 5112 // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 5113 // for equivalent operations with different register constraints; it just 5114 // inserts copies. 5115 5116 // Vector Absolute Differences. 5117 5118 // VABD : Vector Absolute Difference 5119 defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 5120 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5121 "vabd", "s", int_arm_neon_vabds, 1>; 5122 defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 5123 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5124 "vabd", "u", int_arm_neon_vabdu, 1>; 5125 def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 5126 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 5127 def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5128 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 5129 def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND, 5130 "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>, 5131 Requires<[HasNEON, HasFullFP16]>; 5132 def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5133 "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>, 5134 Requires<[HasNEON, HasFullFP16]>; 5135 5136 // VABDL : Vector Absolute Difference Long (Q = | D - D |) 5137 defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 5138 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 5139 defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 5140 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 5141 5142 def abd_shr : 5143 PatFrag<(ops node:$in1, node:$in2, node:$shift), 5144 (NEONvshrs (sub (zext node:$in1), 5145 (zext node:$in2)), (i32 $shift))>; 5146 5147 def : Pat<(xor (v4i32 (bitconvert (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15)))), 5148 (v4i32 (bitconvert (v8i16 (add (sub (zext (v8i8 DPR:$opA)), 5149 (zext (v8i8 DPR:$opB))), 5150 (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15))))))), 5151 (VABDLuv8i16 DPR:$opA, DPR:$opB)>; 5152 5153 def : Pat<(xor (v4i32 (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)), 5154 (v4i32 (add (sub (zext (v4i16 DPR:$opA)), 5155 (zext (v4i16 DPR:$opB))), 5156 (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)))), 5157 (VABDLuv4i32 DPR:$opA, DPR:$opB)>; 5158 5159 def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), 5160 (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)), 5161 (zext (v2i32 DPR:$opB))), 5162 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))), 5163 (VABDLuv2i64 DPR:$opA, DPR:$opB)>; 5164 5165 // VABA : Vector Absolute Difference and Accumulate 5166 defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5167 "vaba", "s", int_arm_neon_vabds, add>; 5168 defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5169 "vaba", "u", int_arm_neon_vabdu, add>; 5170 5171 // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 5172 defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 5173 "vabal", "s", int_arm_neon_vabds, zext, add>; 5174 defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 5175 "vabal", "u", int_arm_neon_vabdu, zext, add>; 5176 5177 // Vector Maximum and Minimum. 5178 5179 // VMAX : Vector Maximum 5180 defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 5181 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5182 "vmax", "s", smax, 1>; 5183 defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 5184 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5185 "vmax", "u", umax, 1>; 5186 def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 5187 "vmax", "f32", 5188 v2f32, v2f32, fmaxnan, 1>; 5189 def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5190 "vmax", "f32", 5191 v4f32, v4f32, fmaxnan, 1>; 5192 def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND, 5193 "vmax", "f16", 5194 v4f16, v4f16, fmaxnan, 1>, 5195 Requires<[HasNEON, HasFullFP16]>; 5196 def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5197 "vmax", "f16", 5198 v8f16, v8f16, fmaxnan, 1>, 5199 Requires<[HasNEON, HasFullFP16]>; 5200 5201 // VMAXNM 5202 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5203 def VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, 5204 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5205 v2f32, v2f32, fmaxnum, 1>, 5206 Requires<[HasV8, HasNEON]>; 5207 def VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, 5208 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5209 v4f32, v4f32, fmaxnum, 1>, 5210 Requires<[HasV8, HasNEON]>; 5211 def VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1, 5212 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5213 v4f16, v4f16, fmaxnum, 1>, 5214 Requires<[HasV8, HasNEON, HasFullFP16]>; 5215 def VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1, 5216 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5217 v8f16, v8f16, fmaxnum, 1>, 5218 Requires<[HasV8, HasNEON, HasFullFP16]>; 5219 } 5220 5221 // VMIN : Vector Minimum 5222 defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 5223 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5224 "vmin", "s", smin, 1>; 5225 defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 5226 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5227 "vmin", "u", umin, 1>; 5228 def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 5229 "vmin", "f32", 5230 v2f32, v2f32, fminnan, 1>; 5231 def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5232 "vmin", "f32", 5233 v4f32, v4f32, fminnan, 1>; 5234 def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND, 5235 "vmin", "f16", 5236 v4f16, v4f16, fminnan, 1>, 5237 Requires<[HasNEON, HasFullFP16]>; 5238 def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5239 "vmin", "f16", 5240 v8f16, v8f16, fminnan, 1>, 5241 Requires<[HasNEON, HasFullFP16]>; 5242 5243 // VMINNM 5244 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5245 def VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, 5246 N3RegFrm, NoItinerary, "vminnm", "f32", 5247 v2f32, v2f32, fminnum, 1>, 5248 Requires<[HasV8, HasNEON]>; 5249 def VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, 5250 N3RegFrm, NoItinerary, "vminnm", "f32", 5251 v4f32, v4f32, fminnum, 1>, 5252 Requires<[HasV8, HasNEON]>; 5253 def VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1, 5254 N3RegFrm, NoItinerary, "vminnm", "f16", 5255 v4f16, v4f16, fminnum, 1>, 5256 Requires<[HasV8, HasNEON, HasFullFP16]>; 5257 def VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1, 5258 N3RegFrm, NoItinerary, "vminnm", "f16", 5259 v8f16, v8f16, fminnum, 1>, 5260 Requires<[HasV8, HasNEON, HasFullFP16]>; 5261 } 5262 5263 // Vector Pairwise Operations. 5264 5265 // VPADD : Vector Pairwise Add 5266 def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5267 "vpadd", "i8", 5268 v8i8, v8i8, int_arm_neon_vpadd, 0>; 5269 def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5270 "vpadd", "i16", 5271 v4i16, v4i16, int_arm_neon_vpadd, 0>; 5272 def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5273 "vpadd", "i32", 5274 v2i32, v2i32, int_arm_neon_vpadd, 0>; 5275 def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 5276 IIC_VPBIND, "vpadd", "f32", 5277 v2f32, v2f32, int_arm_neon_vpadd, 0>; 5278 def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm, 5279 IIC_VPBIND, "vpadd", "f16", 5280 v4f16, v4f16, int_arm_neon_vpadd, 0>, 5281 Requires<[HasNEON, HasFullFP16]>; 5282 5283 // VPADDL : Vector Pairwise Add Long 5284 defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 5285 int_arm_neon_vpaddls>; 5286 defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 5287 int_arm_neon_vpaddlu>; 5288 5289 // VPADAL : Vector Pairwise Add and Accumulate Long 5290 defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 5291 int_arm_neon_vpadals>; 5292 defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 5293 int_arm_neon_vpadalu>; 5294 5295 // VPMAX : Vector Pairwise Maximum 5296 def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5297 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 5298 def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5299 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 5300 def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5301 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 5302 def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5303 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 5304 def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5305 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 5306 def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5307 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 5308 def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5309 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 5310 def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5311 "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>, 5312 Requires<[HasNEON, HasFullFP16]>; 5313 5314 // VPMIN : Vector Pairwise Minimum 5315 def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5316 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 5317 def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5318 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 5319 def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5320 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 5321 def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5322 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 5323 def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5324 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 5325 def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5326 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 5327 def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5328 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 5329 def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5330 "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>, 5331 Requires<[HasNEON, HasFullFP16]>; 5332 5333 // Vector Reciprocal and Reciprocal Square Root Estimate and Step. 5334 5335 // VRECPE : Vector Reciprocal Estimate 5336 def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5337 IIC_VUNAD, "vrecpe", "u32", 5338 v2i32, v2i32, int_arm_neon_vrecpe>; 5339 def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5340 IIC_VUNAQ, "vrecpe", "u32", 5341 v4i32, v4i32, int_arm_neon_vrecpe>; 5342 def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5343 IIC_VUNAD, "vrecpe", "f32", 5344 v2f32, v2f32, int_arm_neon_vrecpe>; 5345 def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5346 IIC_VUNAQ, "vrecpe", "f32", 5347 v4f32, v4f32, int_arm_neon_vrecpe>; 5348 def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5349 IIC_VUNAD, "vrecpe", "f16", 5350 v4f16, v4f16, int_arm_neon_vrecpe>, 5351 Requires<[HasNEON, HasFullFP16]>; 5352 def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5353 IIC_VUNAQ, "vrecpe", "f16", 5354 v8f16, v8f16, int_arm_neon_vrecpe>, 5355 Requires<[HasNEON, HasFullFP16]>; 5356 5357 // VRECPS : Vector Reciprocal Step 5358 def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5359 IIC_VRECSD, "vrecps", "f32", 5360 v2f32, v2f32, int_arm_neon_vrecps, 1>; 5361 def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5362 IIC_VRECSQ, "vrecps", "f32", 5363 v4f32, v4f32, int_arm_neon_vrecps, 1>; 5364 def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5365 IIC_VRECSD, "vrecps", "f16", 5366 v4f16, v4f16, int_arm_neon_vrecps, 1>, 5367 Requires<[HasNEON, HasFullFP16]>; 5368 def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5369 IIC_VRECSQ, "vrecps", "f16", 5370 v8f16, v8f16, int_arm_neon_vrecps, 1>, 5371 Requires<[HasNEON, HasFullFP16]>; 5372 5373 // VRSQRTE : Vector Reciprocal Square Root Estimate 5374 def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5375 IIC_VUNAD, "vrsqrte", "u32", 5376 v2i32, v2i32, int_arm_neon_vrsqrte>; 5377 def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5378 IIC_VUNAQ, "vrsqrte", "u32", 5379 v4i32, v4i32, int_arm_neon_vrsqrte>; 5380 def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5381 IIC_VUNAD, "vrsqrte", "f32", 5382 v2f32, v2f32, int_arm_neon_vrsqrte>; 5383 def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5384 IIC_VUNAQ, "vrsqrte", "f32", 5385 v4f32, v4f32, int_arm_neon_vrsqrte>; 5386 def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5387 IIC_VUNAD, "vrsqrte", "f16", 5388 v4f16, v4f16, int_arm_neon_vrsqrte>, 5389 Requires<[HasNEON, HasFullFP16]>; 5390 def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5391 IIC_VUNAQ, "vrsqrte", "f16", 5392 v8f16, v8f16, int_arm_neon_vrsqrte>, 5393 Requires<[HasNEON, HasFullFP16]>; 5394 5395 // VRSQRTS : Vector Reciprocal Square Root Step 5396 def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5397 IIC_VRECSD, "vrsqrts", "f32", 5398 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 5399 def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5400 IIC_VRECSQ, "vrsqrts", "f32", 5401 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 5402 def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5403 IIC_VRECSD, "vrsqrts", "f16", 5404 v4f16, v4f16, int_arm_neon_vrsqrts, 1>, 5405 Requires<[HasNEON, HasFullFP16]>; 5406 def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5407 IIC_VRECSQ, "vrsqrts", "f16", 5408 v8f16, v8f16, int_arm_neon_vrsqrts, 1>, 5409 Requires<[HasNEON, HasFullFP16]>; 5410 5411 // Vector Shifts. 5412 5413 // VSHL : Vector Shift 5414 defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 5415 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5416 "vshl", "s", int_arm_neon_vshifts>; 5417 defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 5418 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5419 "vshl", "u", int_arm_neon_vshiftu>; 5420 5421 // VSHL : Vector Shift Left (Immediate) 5422 defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; 5423 5424 // VSHR : Vector Shift Right (Immediate) 5425 defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", 5426 NEONvshrs>; 5427 defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", 5428 NEONvshru>; 5429 5430 // VSHLL : Vector Shift Left Long 5431 defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", 5432 PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (sext node:$LHS), node:$RHS)>>; 5433 defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", 5434 PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (zext node:$LHS), node:$RHS)>>; 5435 5436 // VSHLL : Vector Shift Left Long (with maximum shift count) 5437 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 5438 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 5439 ValueType OpTy, Operand ImmTy> 5440 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 5441 ResTy, OpTy, ImmTy, null_frag> { 5442 let Inst{21-16} = op21_16; 5443 let DecoderMethod = "DecodeVSHLMaxInstruction"; 5444 } 5445 def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 5446 v8i16, v8i8, imm8>; 5447 def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 5448 v4i32, v4i16, imm16>; 5449 def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 5450 v2i64, v2i32, imm32>; 5451 5452 def : Pat<(v8i16 (NEONvshl (zext (v8i8 DPR:$Rn)), (i32 8))), 5453 (VSHLLi8 DPR:$Rn, 8)>; 5454 def : Pat<(v4i32 (NEONvshl (zext (v4i16 DPR:$Rn)), (i32 16))), 5455 (VSHLLi16 DPR:$Rn, 16)>; 5456 def : Pat<(v2i64 (NEONvshl (zext (v2i32 DPR:$Rn)), (i32 32))), 5457 (VSHLLi32 DPR:$Rn, 32)>; 5458 def : Pat<(v8i16 (NEONvshl (sext (v8i8 DPR:$Rn)), (i32 8))), 5459 (VSHLLi8 DPR:$Rn, 8)>; 5460 def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))), 5461 (VSHLLi16 DPR:$Rn, 16)>; 5462 def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))), 5463 (VSHLLi32 DPR:$Rn, 32)>; 5464 5465 // VSHRN : Vector Shift Right and Narrow 5466 defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 5467 PatFrag<(ops node:$Rn, node:$amt), 5468 (trunc (NEONvshrs node:$Rn, node:$amt))>>; 5469 5470 def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))), 5471 (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>; 5472 def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))), 5473 (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>; 5474 def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))), 5475 (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>; 5476 5477 // VRSHL : Vector Rounding Shift 5478 defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 5479 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5480 "vrshl", "s", int_arm_neon_vrshifts>; 5481 defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 5482 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5483 "vrshl", "u", int_arm_neon_vrshiftu>; 5484 // VRSHR : Vector Rounding Shift Right 5485 defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", 5486 NEONvrshrs>; 5487 defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", 5488 NEONvrshru>; 5489 5490 // VRSHRN : Vector Rounding Shift Right and Narrow 5491 defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 5492 NEONvrshrn>; 5493 5494 // VQSHL : Vector Saturating Shift 5495 defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 5496 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5497 "vqshl", "s", int_arm_neon_vqshifts>; 5498 defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 5499 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5500 "vqshl", "u", int_arm_neon_vqshiftu>; 5501 // VQSHL : Vector Saturating Shift Left (Immediate) 5502 defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; 5503 defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; 5504 5505 // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 5506 defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; 5507 5508 // VQSHRN : Vector Saturating Shift Right and Narrow 5509 defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 5510 NEONvqshrns>; 5511 defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 5512 NEONvqshrnu>; 5513 5514 // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 5515 defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 5516 NEONvqshrnsu>; 5517 5518 // VQRSHL : Vector Saturating Rounding Shift 5519 defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 5520 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5521 "vqrshl", "s", int_arm_neon_vqrshifts>; 5522 defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 5523 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5524 "vqrshl", "u", int_arm_neon_vqrshiftu>; 5525 5526 // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 5527 defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 5528 NEONvqrshrns>; 5529 defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 5530 NEONvqrshrnu>; 5531 5532 // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 5533 defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 5534 NEONvqrshrnsu>; 5535 5536 // VSRA : Vector Shift Right and Accumulate 5537 defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 5538 defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 5539 // VRSRA : Vector Rounding Shift Right and Accumulate 5540 defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 5541 defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 5542 5543 // VSLI : Vector Shift Left and Insert 5544 defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 5545 5546 // VSRI : Vector Shift Right and Insert 5547 defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 5548 5549 // Vector Absolute and Saturating Absolute. 5550 5551 // VABS : Vector Absolute Value 5552 defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 5553 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", 5554 int_arm_neon_vabs>; 5555 def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 5556 "vabs", "f32", 5557 v2f32, v2f32, fabs>; 5558 def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 5559 "vabs", "f32", 5560 v4f32, v4f32, fabs>; 5561 def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 5562 "vabs", "f16", 5563 v4f16, v4f16, fabs>, 5564 Requires<[HasNEON, HasFullFP16]>; 5565 def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 5566 "vabs", "f16", 5567 v8f16, v8f16, fabs>, 5568 Requires<[HasNEON, HasFullFP16]>; 5569 5570 def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))), 5571 (v2i32 (bitconvert (v8i8 (add DPR:$src, 5572 (NEONvshrs DPR:$src, (i32 7))))))), 5573 (VABSv8i8 DPR:$src)>; 5574 def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))), 5575 (v2i32 (bitconvert (v4i16 (add DPR:$src, 5576 (NEONvshrs DPR:$src, (i32 15))))))), 5577 (VABSv4i16 DPR:$src)>; 5578 def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))), 5579 (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))), 5580 (VABSv2i32 DPR:$src)>; 5581 def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))), 5582 (v4i32 (bitconvert (v16i8 (add QPR:$src, 5583 (NEONvshrs QPR:$src, (i32 7))))))), 5584 (VABSv16i8 QPR:$src)>; 5585 def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))), 5586 (v4i32 (bitconvert (v8i16 (add QPR:$src, 5587 (NEONvshrs QPR:$src, (i32 15))))))), 5588 (VABSv8i16 QPR:$src)>; 5589 def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))), 5590 (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))), 5591 (VABSv4i32 QPR:$src)>; 5592 5593 // VQABS : Vector Saturating Absolute Value 5594 defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 5595 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 5596 int_arm_neon_vqabs>; 5597 5598 // Vector Negate. 5599 5600 def vnegd : PatFrag<(ops node:$in), 5601 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 5602 def vnegq : PatFrag<(ops node:$in), 5603 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 5604 5605 class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5606 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 5607 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 5608 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 5609 class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5610 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 5611 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 5612 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 5613 5614 // VNEG : Vector Negate (integer) 5615 def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 5616 def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 5617 def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 5618 def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 5619 def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 5620 def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 5621 5622 // VNEG : Vector Negate (floating-point) 5623 def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 5624 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 5625 "vneg", "f32", "$Vd, $Vm", "", 5626 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 5627 def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 5628 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 5629 "vneg", "f32", "$Vd, $Vm", "", 5630 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 5631 def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0, 5632 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 5633 "vneg", "f16", "$Vd, $Vm", "", 5634 [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>, 5635 Requires<[HasNEON, HasFullFP16]>; 5636 def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0, 5637 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 5638 "vneg", "f16", "$Vd, $Vm", "", 5639 [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>, 5640 Requires<[HasNEON, HasFullFP16]>; 5641 5642 def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 5643 def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 5644 def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 5645 def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 5646 def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 5647 def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 5648 5649 // VQNEG : Vector Saturating Negate 5650 defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 5651 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 5652 int_arm_neon_vqneg>; 5653 5654 // Vector Bit Counting Operations. 5655 5656 // VCLS : Vector Count Leading Sign Bits 5657 defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 5658 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 5659 int_arm_neon_vcls>; 5660 // VCLZ : Vector Count Leading Zeros 5661 defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 5662 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 5663 ctlz>; 5664 // VCNT : Vector Count One Bits 5665 def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5666 IIC_VCNTiD, "vcnt", "8", 5667 v8i8, v8i8, ctpop>; 5668 def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5669 IIC_VCNTiQ, "vcnt", "8", 5670 v16i8, v16i8, ctpop>; 5671 5672 // Vector Swap 5673 def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 5674 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 5675 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5676 []>; 5677 def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 5678 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 5679 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5680 []>; 5681 5682 // Vector Move Operations. 5683 5684 // VMOV : Vector Move (Register) 5685 def : NEONInstAlias<"vmov${p} $Vd, $Vm", 5686 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 5687 def : NEONInstAlias<"vmov${p} $Vd, $Vm", 5688 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 5689 5690 // VMOV : Vector Move (Immediate) 5691 5692 // Although VMOVs are not strictly speaking cheap, they are as expensive 5693 // as their copies counterpart (VORR), so we should prefer rematerialization 5694 // over splitting when it applies. 5695 let isReMaterializable = 1, isAsCheapAsAMove=1 in { 5696 def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 5697 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5698 "vmov", "i8", "$Vd, $SIMM", "", 5699 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; 5700 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 5701 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5702 "vmov", "i8", "$Vd, $SIMM", "", 5703 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; 5704 5705 def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 5706 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5707 "vmov", "i16", "$Vd, $SIMM", "", 5708 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { 5709 let Inst{9} = SIMM{9}; 5710 } 5711 5712 def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 5713 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5714 "vmov", "i16", "$Vd, $SIMM", "", 5715 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { 5716 let Inst{9} = SIMM{9}; 5717 } 5718 5719 def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 5720 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5721 "vmov", "i32", "$Vd, $SIMM", "", 5722 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { 5723 let Inst{11-8} = SIMM{11-8}; 5724 } 5725 5726 def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 5727 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5728 "vmov", "i32", "$Vd, $SIMM", "", 5729 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { 5730 let Inst{11-8} = SIMM{11-8}; 5731 } 5732 5733 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 5734 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5735 "vmov", "i64", "$Vd, $SIMM", "", 5736 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; 5737 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 5738 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5739 "vmov", "i64", "$Vd, $SIMM", "", 5740 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; 5741 5742 def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 5743 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5744 "vmov", "f32", "$Vd, $SIMM", "", 5745 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; 5746 def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 5747 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5748 "vmov", "f32", "$Vd, $SIMM", "", 5749 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; 5750 } // isReMaterializable, isAsCheapAsAMove 5751 5752 // Add support for bytes replication feature, so it could be GAS compatible. 5753 // E.g. instructions below: 5754 // "vmov.i32 d0, 0xffffffff" 5755 // "vmov.i32 d0, 0xabababab" 5756 // "vmov.i16 d0, 0xabab" 5757 // are incorrect, but we could deal with such cases. 5758 // For last two instructions, for example, it should emit: 5759 // "vmov.i8 d0, 0xab" 5760 def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm", 5761 (VMOVv8i8 DPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>; 5762 def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm", 5763 (VMOVv8i8 DPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>; 5764 def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm", 5765 (VMOVv16i8 QPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>; 5766 def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm", 5767 (VMOVv16i8 QPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>; 5768 5769 // Also add same support for VMVN instructions. So instruction: 5770 // "vmvn.i32 d0, 0xabababab" 5771 // actually means: 5772 // "vmov.i8 d0, 0x54" 5773 def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm", 5774 (VMOVv8i8 DPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>; 5775 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm", 5776 (VMOVv8i8 DPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>; 5777 def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm", 5778 (VMOVv16i8 QPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>; 5779 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm", 5780 (VMOVv16i8 QPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>; 5781 5782 // On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" 5783 // require zero cycles to execute so they should be used wherever possible for 5784 // setting a register to zero. 5785 5786 // Even without these pseudo-insts we would probably end up with the correct 5787 // instruction, but we could not mark the general ones with "isAsCheapAsAMove" 5788 // since they are sometimes rather expensive (in general). 5789 5790 let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { 5791 def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, 5792 [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))], 5793 (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, 5794 Requires<[HasZCZ]>; 5795 def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, 5796 [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))], 5797 (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, 5798 Requires<[HasZCZ]>; 5799 } 5800 5801 // VMOV : Vector Get Lane (move scalar to ARM core register) 5802 5803 def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 5804 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5805 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 5806 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), 5807 imm:$lane))]> { 5808 let Inst{21} = lane{2}; 5809 let Inst{6-5} = lane{1-0}; 5810 } 5811 def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 5812 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5813 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 5814 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), 5815 imm:$lane))]> { 5816 let Inst{21} = lane{1}; 5817 let Inst{6} = lane{0}; 5818 } 5819 def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 5820 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5821 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 5822 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), 5823 imm:$lane))]> { 5824 let Inst{21} = lane{2}; 5825 let Inst{6-5} = lane{1-0}; 5826 } 5827 def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 5828 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5829 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 5830 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), 5831 imm:$lane))]> { 5832 let Inst{21} = lane{1}; 5833 let Inst{6} = lane{0}; 5834 } 5835 def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 5836 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 5837 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 5838 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 5839 imm:$lane))]>, 5840 Requires<[HasVFP2, HasFastVGETLNi32]> { 5841 let Inst{21} = lane{0}; 5842 } 5843 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 5844 def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 5845 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5846 (DSubReg_i8_reg imm:$lane))), 5847 (SubReg_i8_lane imm:$lane))>; 5848 def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 5849 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5850 (DSubReg_i16_reg imm:$lane))), 5851 (SubReg_i16_lane imm:$lane))>; 5852 def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 5853 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5854 (DSubReg_i8_reg imm:$lane))), 5855 (SubReg_i8_lane imm:$lane))>; 5856 def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 5857 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5858 (DSubReg_i16_reg imm:$lane))), 5859 (SubReg_i16_lane imm:$lane))>; 5860 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5861 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 5862 (DSubReg_i32_reg imm:$lane))), 5863 (SubReg_i32_lane imm:$lane))>, 5864 Requires<[HasNEON, HasFastVGETLNi32]>; 5865 def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 5866 (COPY_TO_REGCLASS 5867 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5868 Requires<[HasNEON, HasSlowVGETLNi32]>; 5869 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5870 (COPY_TO_REGCLASS 5871 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5872 Requires<[HasNEON, HasSlowVGETLNi32]>; 5873 def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 5874 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 5875 (SSubReg_f32_reg imm:$src2))>; 5876 def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 5877 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 5878 (SSubReg_f32_reg imm:$src2))>; 5879 //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 5880 // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5881 def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 5882 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5883 5884 5885 // VMOV : Vector Set Lane (move ARM core register to scalar) 5886 5887 let Constraints = "$src1 = $V" in { 5888 def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 5889 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 5890 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 5891 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 5892 GPR:$R, imm:$lane))]> { 5893 let Inst{21} = lane{2}; 5894 let Inst{6-5} = lane{1-0}; 5895 } 5896 def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 5897 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 5898 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 5899 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 5900 GPR:$R, imm:$lane))]> { 5901 let Inst{21} = lane{1}; 5902 let Inst{6} = lane{0}; 5903 } 5904 def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 5905 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 5906 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 5907 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 5908 GPR:$R, imm:$lane))]>, 5909 Requires<[HasVFP2]> { 5910 let Inst{21} = lane{0}; 5911 // This instruction is equivalent as 5912 // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm) 5913 let isInsertSubreg = 1; 5914 } 5915 } 5916 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 5917 (v16i8 (INSERT_SUBREG QPR:$src1, 5918 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 5919 (DSubReg_i8_reg imm:$lane))), 5920 GPR:$src2, (SubReg_i8_lane imm:$lane))), 5921 (DSubReg_i8_reg imm:$lane)))>; 5922 def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 5923 (v8i16 (INSERT_SUBREG QPR:$src1, 5924 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 5925 (DSubReg_i16_reg imm:$lane))), 5926 GPR:$src2, (SubReg_i16_lane imm:$lane))), 5927 (DSubReg_i16_reg imm:$lane)))>; 5928 def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 5929 (v4i32 (INSERT_SUBREG QPR:$src1, 5930 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 5931 (DSubReg_i32_reg imm:$lane))), 5932 GPR:$src2, (SubReg_i32_lane imm:$lane))), 5933 (DSubReg_i32_reg imm:$lane)))>; 5934 5935 def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 5936 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 5937 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5938 def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 5939 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 5940 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5941 5942 //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5943 // (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5944 def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5945 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5946 5947 def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 5948 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5949 def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 5950 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 5951 def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 5952 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5953 5954 def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 5955 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5956 def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 5957 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5958 def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 5959 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5960 5961 def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 5962 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 5963 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5964 dsub_0)>; 5965 def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 5966 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 5967 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5968 dsub_0)>; 5969 def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 5970 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 5971 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5972 dsub_0)>; 5973 5974 // VDUP : Vector Duplicate (from ARM core register to all elements) 5975 5976 class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5977 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 5978 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5979 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5980 class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5981 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 5982 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5983 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5984 5985 def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 5986 def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 5987 def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 5988 Requires<[HasNEON, HasFastVDUP32]>; 5989 def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 5990 def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 5991 def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 5992 5993 // NEONvdup patterns for uarchs with fast VDUP.32. 5994 def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 5995 Requires<[HasNEON,HasFastVDUP32]>; 5996 def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; 5997 5998 // NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 5999 def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 6000 Requires<[HasNEON,HasSlowVDUP32]>; 6001 def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 6002 Requires<[HasNEON,HasSlowVDUP32]>; 6003 6004 // VDUP : Vector Duplicate Lane (from scalar to all elements) 6005 6006 class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 6007 ValueType Ty, Operand IdxTy> 6008 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6009 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 6010 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>; 6011 6012 class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 6013 ValueType ResTy, ValueType OpTy, Operand IdxTy> 6014 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6015 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 6016 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm), 6017 VectorIndex32:$lane)))]>; 6018 6019 // Inst{19-16} is partially specified depending on the element size. 6020 6021 def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 6022 bits<3> lane; 6023 let Inst{19-17} = lane{2-0}; 6024 } 6025 def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 6026 bits<2> lane; 6027 let Inst{19-18} = lane{1-0}; 6028 } 6029 def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 6030 bits<1> lane; 6031 let Inst{19} = lane{0}; 6032 } 6033 def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 6034 bits<3> lane; 6035 let Inst{19-17} = lane{2-0}; 6036 } 6037 def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 6038 bits<2> lane; 6039 let Inst{19-18} = lane{1-0}; 6040 } 6041 def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 6042 bits<1> lane; 6043 let Inst{19} = lane{0}; 6044 } 6045 6046 def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 6047 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6048 6049 def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 6050 (VDUPLN32q DPR:$Vm, imm:$lane)>; 6051 6052 def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 6053 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 6054 (DSubReg_i8_reg imm:$lane))), 6055 (SubReg_i8_lane imm:$lane)))>; 6056 def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 6057 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 6058 (DSubReg_i16_reg imm:$lane))), 6059 (SubReg_i16_lane imm:$lane)))>; 6060 def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 6061 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 6062 (DSubReg_i32_reg imm:$lane))), 6063 (SubReg_i32_lane imm:$lane)))>; 6064 def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 6065 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 6066 (DSubReg_i32_reg imm:$lane))), 6067 (SubReg_i32_lane imm:$lane)))>; 6068 6069 def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))), 6070 (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6071 SPR:$src, ssub_0), (i32 0)))>; 6072 def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))), 6073 (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6074 SPR:$src, ssub_0), (i32 0)))>; 6075 6076 // VMOVN : Vector Narrowing Move 6077 defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 6078 "vmovn", "i", trunc>; 6079 // VQMOVN : Vector Saturating Narrowing Move 6080 defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 6081 "vqmovn", "s", int_arm_neon_vqmovns>; 6082 defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 6083 "vqmovn", "u", int_arm_neon_vqmovnu>; 6084 defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 6085 "vqmovun", "s", int_arm_neon_vqmovnsu>; 6086 // VMOVL : Vector Lengthening Move 6087 defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 6088 defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 6089 def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 6090 def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 6091 def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 6092 6093 // Vector Conversions. 6094 6095 // VCVT : Vector Convert Between Floating-Point and Integers 6096 def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6097 v2i32, v2f32, fp_to_sint>; 6098 def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6099 v2i32, v2f32, fp_to_uint>; 6100 def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6101 v2f32, v2i32, sint_to_fp>; 6102 def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6103 v2f32, v2i32, uint_to_fp>; 6104 6105 def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6106 v4i32, v4f32, fp_to_sint>; 6107 def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6108 v4i32, v4f32, fp_to_uint>; 6109 def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6110 v4f32, v4i32, sint_to_fp>; 6111 def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6112 v4f32, v4i32, uint_to_fp>; 6113 6114 def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6115 v4i16, v4f16, fp_to_sint>, 6116 Requires<[HasNEON, HasFullFP16]>; 6117 def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6118 v4i16, v4f16, fp_to_uint>, 6119 Requires<[HasNEON, HasFullFP16]>; 6120 def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6121 v4f16, v4i16, sint_to_fp>, 6122 Requires<[HasNEON, HasFullFP16]>; 6123 def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6124 v4f16, v4i16, uint_to_fp>, 6125 Requires<[HasNEON, HasFullFP16]>; 6126 6127 def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6128 v8i16, v8f16, fp_to_sint>, 6129 Requires<[HasNEON, HasFullFP16]>; 6130 def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6131 v8i16, v8f16, fp_to_uint>, 6132 Requires<[HasNEON, HasFullFP16]>; 6133 def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6134 v8f16, v8i16, sint_to_fp>, 6135 Requires<[HasNEON, HasFullFP16]>; 6136 def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6137 v8f16, v8i16, uint_to_fp>, 6138 Requires<[HasNEON, HasFullFP16]>; 6139 6140 // VCVT{A, N, P, M} 6141 multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, 6142 SDPatternOperator IntU> { 6143 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 6144 def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6145 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; 6146 def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6147 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; 6148 def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6149 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; 6150 def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6151 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; 6152 def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6153 "s16.f16", v4i16, v4f16, IntS>, 6154 Requires<[HasV8, HasNEON, HasFullFP16]>; 6155 def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6156 "s16.f16", v8i16, v8f16, IntS>, 6157 Requires<[HasV8, HasNEON, HasFullFP16]>; 6158 def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6159 "u16.f16", v4i16, v4f16, IntU>, 6160 Requires<[HasV8, HasNEON, HasFullFP16]>; 6161 def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6162 "u16.f16", v8i16, v8f16, IntU>, 6163 Requires<[HasV8, HasNEON, HasFullFP16]>; 6164 } 6165 } 6166 6167 defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; 6168 defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; 6169 defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; 6170 defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; 6171 6172 // VCVT : Vector Convert Between Floating-Point and Fixed-Point. 6173 let DecoderMethod = "DecodeVCVTD" in { 6174 def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6175 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 6176 def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6177 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 6178 def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6179 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 6180 def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6181 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 6182 let Predicates = [HasNEON, HasFullFP16] in { 6183 def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6184 v4i16, v4f16, int_arm_neon_vcvtfp2fxs>; 6185 def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6186 v4i16, v4f16, int_arm_neon_vcvtfp2fxu>; 6187 def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6188 v4f16, v4i16, int_arm_neon_vcvtfxs2fp>; 6189 def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6190 v4f16, v4i16, int_arm_neon_vcvtfxu2fp>; 6191 } // Predicates = [HasNEON, HasFullFP16] 6192 } 6193 6194 let DecoderMethod = "DecodeVCVTQ" in { 6195 def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6196 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 6197 def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6198 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 6199 def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6200 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 6201 def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6202 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 6203 let Predicates = [HasNEON, HasFullFP16] in { 6204 def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6205 v8i16, v8f16, int_arm_neon_vcvtfp2fxs>; 6206 def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6207 v8i16, v8f16, int_arm_neon_vcvtfp2fxu>; 6208 def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6209 v8f16, v8i16, int_arm_neon_vcvtfxs2fp>; 6210 def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6211 v8f16, v8i16, int_arm_neon_vcvtfxu2fp>; 6212 } // Predicates = [HasNEON, HasFullFP16] 6213 } 6214 6215 def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", 6216 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6217 def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", 6218 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6219 def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", 6220 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6221 def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", 6222 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6223 6224 def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", 6225 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6226 def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", 6227 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6228 def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", 6229 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6230 def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", 6231 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6232 6233 def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0", 6234 (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6235 def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0", 6236 (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6237 def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0", 6238 (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6239 def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0", 6240 (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6241 6242 def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0", 6243 (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6244 def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0", 6245 (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6246 def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0", 6247 (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6248 def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0", 6249 (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6250 6251 6252 // VCVT : Vector Convert Between Half-Precision and Single-Precision. 6253 def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 6254 IIC_VUNAQ, "vcvt", "f16.f32", 6255 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 6256 Requires<[HasNEON, HasFP16]>; 6257 def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 6258 IIC_VUNAQ, "vcvt", "f32.f16", 6259 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 6260 Requires<[HasNEON, HasFP16]>; 6261 6262 // Vector Reverse. 6263 6264 // VREV64 : Vector Reverse elements within 64-bit doublewords 6265 6266 class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6267 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 6268 (ins DPR:$Vm), IIC_VMOVD, 6269 OpcodeStr, Dt, "$Vd, $Vm", "", 6270 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>; 6271 class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6272 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 6273 (ins QPR:$Vm), IIC_VMOVQ, 6274 OpcodeStr, Dt, "$Vd, $Vm", "", 6275 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>; 6276 6277 def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 6278 def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 6279 def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 6280 def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 6281 6282 def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 6283 def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 6284 def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 6285 def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; 6286 6287 // VREV32 : Vector Reverse elements within 32-bit words 6288 6289 class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6290 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 6291 (ins DPR:$Vm), IIC_VMOVD, 6292 OpcodeStr, Dt, "$Vd, $Vm", "", 6293 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>; 6294 class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6295 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 6296 (ins QPR:$Vm), IIC_VMOVQ, 6297 OpcodeStr, Dt, "$Vd, $Vm", "", 6298 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>; 6299 6300 def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 6301 def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 6302 6303 def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 6304 def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 6305 6306 // VREV16 : Vector Reverse elements within 16-bit halfwords 6307 6308 class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6309 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 6310 (ins DPR:$Vm), IIC_VMOVD, 6311 OpcodeStr, Dt, "$Vd, $Vm", "", 6312 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>; 6313 class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6314 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 6315 (ins QPR:$Vm), IIC_VMOVQ, 6316 OpcodeStr, Dt, "$Vd, $Vm", "", 6317 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>; 6318 6319 def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 6320 def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 6321 6322 // Other Vector Shuffles. 6323 6324 // Aligned extractions: really just dropping registers 6325 6326 class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 6327 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 6328 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>; 6329 6330 def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 6331 6332 def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 6333 6334 def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 6335 6336 def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 6337 6338 def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 6339 6340 6341 // VEXT : Vector Extract 6342 6343 6344 // All of these have a two-operand InstAlias. 6345 let TwoOperandAliasConstraint = "$Vn = $Vd" in { 6346 class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6347 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 6348 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 6349 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6350 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 6351 (Ty DPR:$Vm), imm:$index)))]> { 6352 bits<3> index; 6353 let Inst{11} = 0b0; 6354 let Inst{10-8} = index{2-0}; 6355 } 6356 6357 class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6358 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 6359 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 6360 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6361 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 6362 (Ty QPR:$Vm), imm:$index)))]> { 6363 bits<4> index; 6364 let Inst{11-8} = index{3-0}; 6365 } 6366 } 6367 6368 def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 6369 let Inst{10-8} = index{2-0}; 6370 } 6371 def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 6372 let Inst{10-9} = index{1-0}; 6373 let Inst{8} = 0b0; 6374 } 6375 def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 6376 let Inst{10} = index{0}; 6377 let Inst{9-8} = 0b00; 6378 } 6379 def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), 6380 (v2f32 DPR:$Vm), 6381 (i32 imm:$index))), 6382 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 6383 6384 def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 6385 let Inst{11-8} = index{3-0}; 6386 } 6387 def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 6388 let Inst{11-9} = index{2-0}; 6389 let Inst{8} = 0b0; 6390 } 6391 def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 6392 let Inst{11-10} = index{1-0}; 6393 let Inst{9-8} = 0b00; 6394 } 6395 def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 6396 let Inst{11} = index{0}; 6397 let Inst{10-8} = 0b000; 6398 } 6399 def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), 6400 (v4f32 QPR:$Vm), 6401 (i32 imm:$index))), 6402 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 6403 6404 // VTRN : Vector Transpose 6405 6406 def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 6407 def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 6408 def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 6409 6410 def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 6411 def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 6412 def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 6413 6414 // VUZP : Vector Unzip (Deinterleave) 6415 6416 def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 6417 def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 6418 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 6419 def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 6420 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 6421 6422 def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 6423 def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 6424 def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 6425 6426 // VZIP : Vector Zip (Interleave) 6427 6428 def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 6429 def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 6430 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 6431 def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 6432 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 6433 6434 def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 6435 def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 6436 def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 6437 6438 // Vector Table Lookup and Table Extension. 6439 6440 // VTBL : Vector Table Lookup 6441 let DecoderMethod = "DecodeTBLInstruction" in { 6442 def VTBL1 6443 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 6444 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 6445 "vtbl", "8", "$Vd, $Vn, $Vm", "", 6446 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 6447 let hasExtraSrcRegAllocReq = 1 in { 6448 def VTBL2 6449 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 6450 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 6451 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 6452 def VTBL3 6453 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 6454 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 6455 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 6456 def VTBL4 6457 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 6458 (ins VecListFourD:$Vn, DPR:$Vm), 6459 NVTBLFrm, IIC_VTB4, 6460 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 6461 } // hasExtraSrcRegAllocReq = 1 6462 6463 def VTBL3Pseudo 6464 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 6465 def VTBL4Pseudo 6466 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 6467 6468 // VTBX : Vector Table Extension 6469 def VTBX1 6470 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 6471 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 6472 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 6473 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 6474 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 6475 let hasExtraSrcRegAllocReq = 1 in { 6476 def VTBX2 6477 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 6478 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 6479 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 6480 def VTBX3 6481 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 6482 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 6483 NVTBLFrm, IIC_VTBX3, 6484 "vtbx", "8", "$Vd, $Vn, $Vm", 6485 "$orig = $Vd", []>; 6486 def VTBX4 6487 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 6488 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 6489 "vtbx", "8", "$Vd, $Vn, $Vm", 6490 "$orig = $Vd", []>; 6491 } // hasExtraSrcRegAllocReq = 1 6492 6493 def VTBX3Pseudo 6494 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 6495 IIC_VTBX3, "$orig = $dst", []>; 6496 def VTBX4Pseudo 6497 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 6498 IIC_VTBX4, "$orig = $dst", []>; 6499 } // DecoderMethod = "DecodeTBLInstruction" 6500 6501 // VRINT : Vector Rounding 6502 multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { 6503 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 6504 def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 6505 !strconcat("vrint", op), "f32", 6506 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { 6507 let Inst{9-7} = op9_7; 6508 } 6509 def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 6510 !strconcat("vrint", op), "f32", 6511 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { 6512 let Inst{9-7} = op9_7; 6513 } 6514 def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 6515 !strconcat("vrint", op), "f16", 6516 v4f16, v4f16, Int>, 6517 Requires<[HasV8, HasNEON, HasFullFP16]> { 6518 let Inst{9-7} = op9_7; 6519 } 6520 def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 6521 !strconcat("vrint", op), "f16", 6522 v8f16, v8f16, Int>, 6523 Requires<[HasV8, HasNEON, HasFullFP16]> { 6524 let Inst{9-7} = op9_7; 6525 } 6526 } 6527 6528 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), 6529 (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>; 6530 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), 6531 (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>; 6532 let Predicates = [HasNEON, HasFullFP16] in { 6533 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"), 6534 (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>; 6535 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"), 6536 (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>; 6537 } 6538 } 6539 6540 defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; 6541 defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; 6542 defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; 6543 defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; 6544 defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; 6545 defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; 6546 6547 // Cryptography instructions 6548 let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 6549 DecoderNamespace = "v8Crypto", hasSideEffects = 0 in { 6550 class AES<string op, bit op7, bit op6, SDPatternOperator Int> 6551 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, 6552 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 6553 Requires<[HasV8, HasCrypto]>; 6554 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int> 6555 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, 6556 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 6557 Requires<[HasV8, HasCrypto]>; 6558 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 6559 SDPatternOperator Int> 6560 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, 6561 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 6562 Requires<[HasV8, HasCrypto]>; 6563 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 6564 SDPatternOperator Int> 6565 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, 6566 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 6567 Requires<[HasV8, HasCrypto]>; 6568 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int> 6569 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary, 6570 !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>, 6571 Requires<[HasV8, HasCrypto]>; 6572 } 6573 6574 def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; 6575 def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; 6576 def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; 6577 def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; 6578 6579 def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>; 6580 def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; 6581 def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; 6582 def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>; 6583 def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>; 6584 def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>; 6585 def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; 6586 def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; 6587 def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; 6588 def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; 6589 6590 def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), 6591 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG 6592 (SHA1H (SUBREG_TO_REG (i64 0), 6593 (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)), 6594 ssub_0)), 6595 ssub_0)), GPR)>; 6596 6597 def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 6598 (SHA1C v4i32:$hash_abcd, 6599 (SUBREG_TO_REG (i64 0), 6600 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 6601 ssub_0), 6602 v4i32:$wk)>; 6603 6604 def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 6605 (SHA1M v4i32:$hash_abcd, 6606 (SUBREG_TO_REG (i64 0), 6607 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 6608 ssub_0), 6609 v4i32:$wk)>; 6610 6611 def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 6612 (SHA1P v4i32:$hash_abcd, 6613 (SUBREG_TO_REG (i64 0), 6614 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 6615 ssub_0), 6616 v4i32:$wk)>; 6617 6618 //===----------------------------------------------------------------------===// 6619 // NEON instructions for single-precision FP math 6620 //===----------------------------------------------------------------------===// 6621 6622 class N2VSPat<SDNode OpNode, NeonI Inst> 6623 : NEONFPPat<(f32 (OpNode SPR:$a)), 6624 (EXTRACT_SUBREG 6625 (v2f32 (COPY_TO_REGCLASS (Inst 6626 (INSERT_SUBREG 6627 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6628 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 6629 6630 class N3VSPat<SDNode OpNode, NeonI Inst> 6631 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 6632 (EXTRACT_SUBREG 6633 (v2f32 (COPY_TO_REGCLASS (Inst 6634 (INSERT_SUBREG 6635 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6636 SPR:$a, ssub_0), 6637 (INSERT_SUBREG 6638 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6639 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 6640 6641 class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 6642 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 6643 (EXTRACT_SUBREG 6644 (v2f32 (COPY_TO_REGCLASS (Inst 6645 (INSERT_SUBREG 6646 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6647 SPR:$acc, ssub_0), 6648 (INSERT_SUBREG 6649 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6650 SPR:$a, ssub_0), 6651 (INSERT_SUBREG 6652 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6653 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 6654 6655 class NVCVTIFPat<SDNode OpNode, NeonI Inst> 6656 : NEONFPPat<(f32 (OpNode GPR:$a)), 6657 (f32 (EXTRACT_SUBREG 6658 (v2f32 (Inst 6659 (INSERT_SUBREG 6660 (v2f32 (IMPLICIT_DEF)), 6661 (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))), 6662 ssub_0))>; 6663 class NVCVTFIPat<SDNode OpNode, NeonI Inst> 6664 : NEONFPPat<(i32 (OpNode SPR:$a)), 6665 (i32 (EXTRACT_SUBREG 6666 (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6667 SPR:$a, ssub_0))), 6668 ssub_0))>; 6669 6670 def : N3VSPat<fadd, VADDfd>; 6671 def : N3VSPat<fsub, VSUBfd>; 6672 def : N3VSPat<fmul, VMULfd>; 6673 def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 6674 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 6675 def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 6676 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 6677 def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 6678 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 6679 def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 6680 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 6681 def : N2VSPat<fabs, VABSfd>; 6682 def : N2VSPat<fneg, VNEGfd>; 6683 def : N3VSPat<fmaxnan, VMAXfd>, Requires<[HasNEON]>; 6684 def : N3VSPat<fminnan, VMINfd>, Requires<[HasNEON]>; 6685 def : NVCVTFIPat<fp_to_sint, VCVTf2sd>; 6686 def : NVCVTFIPat<fp_to_uint, VCVTf2ud>; 6687 def : NVCVTIFPat<sint_to_fp, VCVTs2fd>; 6688 def : NVCVTIFPat<uint_to_fp, VCVTu2fd>; 6689 6690 // NEON doesn't have any f64 conversions, so provide patterns to make 6691 // sure the VFP conversions match when extracting from a vector. 6692 def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 6693 (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6694 def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 6695 (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6696 def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 6697 (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6698 def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 6699 (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6700 6701 6702 // Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 6703 def : Pat<(f32 (bitconvert GPR:$a)), 6704 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 6705 Requires<[HasNEON, DontUseVMOVSR]>; 6706 6707 //===----------------------------------------------------------------------===// 6708 // Non-Instruction Patterns 6709 //===----------------------------------------------------------------------===// 6710 6711 // bit_convert 6712 let Predicates = [IsLE] in { 6713 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 6714 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 6715 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 6716 } 6717 def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 6718 let Predicates = [IsLE] in { 6719 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 6720 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 6721 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 6722 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 6723 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 6724 } 6725 def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 6726 let Predicates = [IsLE] in { 6727 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 6728 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 6729 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 6730 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 6731 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 6732 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 6733 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 6734 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 6735 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 6736 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 6737 } 6738 def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 6739 let Predicates = [IsLE] in { 6740 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 6741 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 6742 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 6743 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 6744 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 6745 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 6746 } 6747 def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 6748 let Predicates = [IsLE] in { 6749 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 6750 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 6751 } 6752 6753 let Predicates = [IsLE] in { 6754 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 6755 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 6756 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 6757 } 6758 def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 6759 let Predicates = [IsLE] in { 6760 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 6761 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 6762 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 6763 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 6764 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 6765 } 6766 def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 6767 let Predicates = [IsLE] in { 6768 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 6769 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 6770 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 6771 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 6772 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 6773 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 6774 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 6775 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 6776 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 6777 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 6778 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 6779 } 6780 def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 6781 let Predicates = [IsLE] in { 6782 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 6783 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 6784 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 6785 } 6786 def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 6787 let Predicates = [IsLE] in { 6788 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 6789 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 6790 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 6791 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 6792 } 6793 6794 let Predicates = [IsBE] in { 6795 // 64 bit conversions 6796 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 6797 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 6798 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 6799 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 6800 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 6801 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 6802 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 6803 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 6804 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 6805 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 6806 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 6807 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 6808 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 6809 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; 6810 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; 6811 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; 6812 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; 6813 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; 6814 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 6815 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 6816 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 6817 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 6818 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 6819 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 6820 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 6821 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 6822 6823 // 128 bit conversions 6824 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 6825 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 6826 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 6827 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 6828 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 6829 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 6830 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 6831 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 6832 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 6833 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 6834 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 6835 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 6836 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 6837 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; 6838 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; 6839 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; 6840 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; 6841 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; 6842 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 6843 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 6844 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 6845 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 6846 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 6847 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 6848 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 6849 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 6850 } 6851 6852 // Fold extracting an element out of a v2i32 into a vfp register. 6853 def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 6854 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6855 6856 // Vector lengthening move with load, matching extending loads. 6857 6858 // extload, zextload and sextload for a standard lengthening load. Example: 6859 // Lengthen_Single<"8", "i16", "8"> = 6860 // Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 6861 // (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 6862 // (f64 (IMPLICIT_DEF)), (i32 0)))>; 6863 multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 6864 let AddedComplexity = 10 in { 6865 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6866 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 6867 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 6868 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6869 6870 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6871 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 6872 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 6873 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6874 6875 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6876 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 6877 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 6878 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6879 } 6880 } 6881 6882 // extload, zextload and sextload for a lengthening load which only uses 6883 // half the lanes available. Example: 6884 // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 6885 // Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 6886 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 6887 // (f64 (IMPLICIT_DEF)), (i32 0))), 6888 // dsub_0)>; 6889 multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 6890 string InsnLanes, string InsnTy> { 6891 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6892 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6893 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6894 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6895 dsub_0)>; 6896 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6897 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6898 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6899 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6900 dsub_0)>; 6901 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6902 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6903 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 6904 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6905 dsub_0)>; 6906 } 6907 6908 // The following class definition is basically a copy of the 6909 // Lengthen_HalfSingle definition above, however with an additional parameter 6910 // "RevLanes" to select the correct VREV32dXX instruction. This is to convert 6911 // data loaded by VLD1LN into proper vector format in big endian mode. 6912 multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy, 6913 string InsnLanes, string InsnTy, string RevLanes> { 6914 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6915 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6916 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6917 (!cast<Instruction>("VREV32d" # RevLanes) 6918 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6919 dsub_0)>; 6920 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6921 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6922 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6923 (!cast<Instruction>("VREV32d" # RevLanes) 6924 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6925 dsub_0)>; 6926 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6927 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6928 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 6929 (!cast<Instruction>("VREV32d" # RevLanes) 6930 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6931 dsub_0)>; 6932 } 6933 6934 // extload, zextload and sextload for a lengthening load followed by another 6935 // lengthening load, to quadruple the initial length. 6936 // 6937 // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 6938 // Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 6939 // (EXTRACT_SUBREG (VMOVLuv4i32 6940 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 6941 // (f64 (IMPLICIT_DEF)), 6942 // (i32 0))), 6943 // dsub_0)), 6944 // dsub_0)>; 6945 multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 6946 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6947 string Insn2Ty> { 6948 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6949 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6950 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6951 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6952 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6953 dsub_0))>; 6954 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6955 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6956 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6957 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6958 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6959 dsub_0))>; 6960 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6961 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6962 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6963 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6964 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6965 dsub_0))>; 6966 } 6967 6968 // The following class definition is basically a copy of the 6969 // Lengthen_Double definition above, however with an additional parameter 6970 // "RevLanes" to select the correct VREV32dXX instruction. This is to convert 6971 // data loaded by VLD1LN into proper vector format in big endian mode. 6972 multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy, 6973 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6974 string Insn2Ty, string RevLanes> { 6975 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6976 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6977 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6978 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6979 (!cast<Instruction>("VREV32d" # RevLanes) 6980 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6981 dsub_0))>; 6982 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6983 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6984 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6985 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6986 (!cast<Instruction>("VREV32d" # RevLanes) 6987 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6988 dsub_0))>; 6989 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6990 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6991 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6992 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6993 (!cast<Instruction>("VREV32d" # RevLanes) 6994 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6995 dsub_0))>; 6996 } 6997 6998 // extload, zextload and sextload for a lengthening load followed by another 6999 // lengthening load, to quadruple the initial length, but which ends up only 7000 // requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 7001 // 7002 // Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 7003 // Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 7004 // (EXTRACT_SUBREG (VMOVLuv4i32 7005 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 7006 // (f64 (IMPLICIT_DEF)), (i32 0))), 7007 // dsub_0)), 7008 // dsub_0)>; 7009 multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 7010 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7011 string Insn2Ty> { 7012 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7013 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7014 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7015 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7016 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7017 dsub_0)), 7018 dsub_0)>; 7019 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7020 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7021 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7022 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7023 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7024 dsub_0)), 7025 dsub_0)>; 7026 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7027 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7028 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7029 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7030 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7031 dsub_0)), 7032 dsub_0)>; 7033 } 7034 7035 // The following class definition is basically a copy of the 7036 // Lengthen_HalfDouble definition above, however with an additional VREV16d8 7037 // instruction to convert data loaded by VLD1LN into proper vector format 7038 // in big endian mode. 7039 multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7040 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7041 string Insn2Ty> { 7042 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7043 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7044 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7045 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7046 (!cast<Instruction>("VREV16d8") 7047 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7048 dsub_0)), 7049 dsub_0)>; 7050 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7051 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7052 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7053 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7054 (!cast<Instruction>("VREV16d8") 7055 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7056 dsub_0)), 7057 dsub_0)>; 7058 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7059 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7060 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7061 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7062 (!cast<Instruction>("VREV16d8") 7063 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7064 dsub_0)), 7065 dsub_0)>; 7066 } 7067 7068 defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 7069 defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 7070 defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 7071 7072 let Predicates = [IsLE] in { 7073 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 7074 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 7075 7076 // Double lengthening - v4i8 -> v4i16 -> v4i32 7077 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 7078 // v2i8 -> v2i16 -> v2i32 7079 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 7080 // v2i16 -> v2i32 -> v2i64 7081 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 7082 } 7083 7084 let Predicates = [IsBE] in { 7085 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 7086 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 7087 7088 // Double lengthening - v4i8 -> v4i16 -> v4i32 7089 defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; 7090 // v2i8 -> v2i16 -> v2i32 7091 defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; 7092 // v2i16 -> v2i32 -> v2i64 7093 defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; 7094 } 7095 7096 // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 7097 let Predicates = [IsLE] in { 7098 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 7099 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7100 (VLD1LNd16 addrmode6:$addr, 7101 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7102 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 7103 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7104 (VLD1LNd16 addrmode6:$addr, 7105 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7106 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 7107 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 7108 (VLD1LNd16 addrmode6:$addr, 7109 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7110 } 7111 // The following patterns are basically a copy of the patterns above, 7112 // however with an additional VREV16d instruction to convert data 7113 // loaded by VLD1LN into proper vector format in big endian mode. 7114 let Predicates = [IsBE] in { 7115 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 7116 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7117 (!cast<Instruction>("VREV16d8") 7118 (VLD1LNd16 addrmode6:$addr, 7119 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7120 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 7121 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7122 (!cast<Instruction>("VREV16d8") 7123 (VLD1LNd16 addrmode6:$addr, 7124 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7125 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 7126 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 7127 (!cast<Instruction>("VREV16d8") 7128 (VLD1LNd16 addrmode6:$addr, 7129 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7130 } 7131 7132 //===----------------------------------------------------------------------===// 7133 // Assembler aliases 7134 // 7135 7136 def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 7137 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 7138 def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 7139 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 7140 7141 // VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 7142 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 7143 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7144 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 7145 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7146 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 7147 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7148 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 7149 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7150 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 7151 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7152 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 7153 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7154 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 7155 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7156 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 7157 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7158 // ... two-operand aliases 7159 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 7160 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7161 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 7162 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7163 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 7164 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7165 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 7166 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7167 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 7168 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7169 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 7170 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7171 // ... immediates 7172 def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 7173 (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 7174 def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 7175 (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 7176 def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 7177 (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 7178 def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 7179 (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 7180 7181 7182 // VLD1 single-lane pseudo-instructions. These need special handling for 7183 // the lane index that an InstAlias can't handle, so we use these instead. 7184 def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 7185 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7186 pred:$p)>; 7187 def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 7188 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7189 pred:$p)>; 7190 def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 7191 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7192 pred:$p)>; 7193 7194 def VLD1LNdWB_fixed_Asm_8 : 7195 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 7196 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7197 pred:$p)>; 7198 def VLD1LNdWB_fixed_Asm_16 : 7199 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 7200 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7201 pred:$p)>; 7202 def VLD1LNdWB_fixed_Asm_32 : 7203 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 7204 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7205 pred:$p)>; 7206 def VLD1LNdWB_register_Asm_8 : 7207 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 7208 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7209 rGPR:$Rm, pred:$p)>; 7210 def VLD1LNdWB_register_Asm_16 : 7211 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 7212 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7213 rGPR:$Rm, pred:$p)>; 7214 def VLD1LNdWB_register_Asm_32 : 7215 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 7216 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7217 rGPR:$Rm, pred:$p)>; 7218 7219 7220 // VST1 single-lane pseudo-instructions. These need special handling for 7221 // the lane index that an InstAlias can't handle, so we use these instead. 7222 def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 7223 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7224 pred:$p)>; 7225 def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 7226 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7227 pred:$p)>; 7228 def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 7229 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7230 pred:$p)>; 7231 7232 def VST1LNdWB_fixed_Asm_8 : 7233 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 7234 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7235 pred:$p)>; 7236 def VST1LNdWB_fixed_Asm_16 : 7237 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 7238 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7239 pred:$p)>; 7240 def VST1LNdWB_fixed_Asm_32 : 7241 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 7242 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7243 pred:$p)>; 7244 def VST1LNdWB_register_Asm_8 : 7245 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 7246 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7247 rGPR:$Rm, pred:$p)>; 7248 def VST1LNdWB_register_Asm_16 : 7249 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 7250 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7251 rGPR:$Rm, pred:$p)>; 7252 def VST1LNdWB_register_Asm_32 : 7253 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 7254 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7255 rGPR:$Rm, pred:$p)>; 7256 7257 // VLD2 single-lane pseudo-instructions. These need special handling for 7258 // the lane index that an InstAlias can't handle, so we use these instead. 7259 def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 7260 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7261 pred:$p)>; 7262 def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 7263 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7264 pred:$p)>; 7265 def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 7266 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; 7267 def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 7268 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7269 pred:$p)>; 7270 def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 7271 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7272 pred:$p)>; 7273 7274 def VLD2LNdWB_fixed_Asm_8 : 7275 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 7276 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7277 pred:$p)>; 7278 def VLD2LNdWB_fixed_Asm_16 : 7279 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 7280 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7281 pred:$p)>; 7282 def VLD2LNdWB_fixed_Asm_32 : 7283 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 7284 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7285 pred:$p)>; 7286 def VLD2LNqWB_fixed_Asm_16 : 7287 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 7288 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7289 pred:$p)>; 7290 def VLD2LNqWB_fixed_Asm_32 : 7291 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 7292 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7293 pred:$p)>; 7294 def VLD2LNdWB_register_Asm_8 : 7295 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 7296 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7297 rGPR:$Rm, pred:$p)>; 7298 def VLD2LNdWB_register_Asm_16 : 7299 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 7300 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7301 rGPR:$Rm, pred:$p)>; 7302 def VLD2LNdWB_register_Asm_32 : 7303 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 7304 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7305 rGPR:$Rm, pred:$p)>; 7306 def VLD2LNqWB_register_Asm_16 : 7307 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 7308 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7309 rGPR:$Rm, pred:$p)>; 7310 def VLD2LNqWB_register_Asm_32 : 7311 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 7312 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7313 rGPR:$Rm, pred:$p)>; 7314 7315 7316 // VST2 single-lane pseudo-instructions. These need special handling for 7317 // the lane index that an InstAlias can't handle, so we use these instead. 7318 def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 7319 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7320 pred:$p)>; 7321 def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 7322 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7323 pred:$p)>; 7324 def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 7325 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7326 pred:$p)>; 7327 def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 7328 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7329 pred:$p)>; 7330 def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 7331 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7332 pred:$p)>; 7333 7334 def VST2LNdWB_fixed_Asm_8 : 7335 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 7336 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7337 pred:$p)>; 7338 def VST2LNdWB_fixed_Asm_16 : 7339 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 7340 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7341 pred:$p)>; 7342 def VST2LNdWB_fixed_Asm_32 : 7343 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 7344 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7345 pred:$p)>; 7346 def VST2LNqWB_fixed_Asm_16 : 7347 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 7348 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7349 pred:$p)>; 7350 def VST2LNqWB_fixed_Asm_32 : 7351 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 7352 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7353 pred:$p)>; 7354 def VST2LNdWB_register_Asm_8 : 7355 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 7356 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7357 rGPR:$Rm, pred:$p)>; 7358 def VST2LNdWB_register_Asm_16 : 7359 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 7360 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7361 rGPR:$Rm, pred:$p)>; 7362 def VST2LNdWB_register_Asm_32 : 7363 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 7364 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7365 rGPR:$Rm, pred:$p)>; 7366 def VST2LNqWB_register_Asm_16 : 7367 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 7368 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7369 rGPR:$Rm, pred:$p)>; 7370 def VST2LNqWB_register_Asm_32 : 7371 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 7372 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7373 rGPR:$Rm, pred:$p)>; 7374 7375 // VLD3 all-lanes pseudo-instructions. These need special handling for 7376 // the lane index that an InstAlias can't handle, so we use these instead. 7377 def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7378 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7379 pred:$p)>; 7380 def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7381 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7382 pred:$p)>; 7383 def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7384 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7385 pred:$p)>; 7386 def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7387 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7388 pred:$p)>; 7389 def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7390 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7391 pred:$p)>; 7392 def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7393 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7394 pred:$p)>; 7395 7396 def VLD3DUPdWB_fixed_Asm_8 : 7397 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7398 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7399 pred:$p)>; 7400 def VLD3DUPdWB_fixed_Asm_16 : 7401 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7402 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7403 pred:$p)>; 7404 def VLD3DUPdWB_fixed_Asm_32 : 7405 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7406 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7407 pred:$p)>; 7408 def VLD3DUPqWB_fixed_Asm_8 : 7409 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7410 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7411 pred:$p)>; 7412 def VLD3DUPqWB_fixed_Asm_16 : 7413 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7414 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7415 pred:$p)>; 7416 def VLD3DUPqWB_fixed_Asm_32 : 7417 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7418 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7419 pred:$p)>; 7420 def VLD3DUPdWB_register_Asm_8 : 7421 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7422 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7423 rGPR:$Rm, pred:$p)>; 7424 def VLD3DUPdWB_register_Asm_16 : 7425 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7426 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7427 rGPR:$Rm, pred:$p)>; 7428 def VLD3DUPdWB_register_Asm_32 : 7429 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7430 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7431 rGPR:$Rm, pred:$p)>; 7432 def VLD3DUPqWB_register_Asm_8 : 7433 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7434 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7435 rGPR:$Rm, pred:$p)>; 7436 def VLD3DUPqWB_register_Asm_16 : 7437 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7438 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7439 rGPR:$Rm, pred:$p)>; 7440 def VLD3DUPqWB_register_Asm_32 : 7441 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7442 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7443 rGPR:$Rm, pred:$p)>; 7444 7445 7446 // VLD3 single-lane pseudo-instructions. These need special handling for 7447 // the lane index that an InstAlias can't handle, so we use these instead. 7448 def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7449 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7450 pred:$p)>; 7451 def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7452 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7453 pred:$p)>; 7454 def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7455 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7456 pred:$p)>; 7457 def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7458 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7459 pred:$p)>; 7460 def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7461 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7462 pred:$p)>; 7463 7464 def VLD3LNdWB_fixed_Asm_8 : 7465 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7466 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7467 pred:$p)>; 7468 def VLD3LNdWB_fixed_Asm_16 : 7469 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7470 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7471 pred:$p)>; 7472 def VLD3LNdWB_fixed_Asm_32 : 7473 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7474 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7475 pred:$p)>; 7476 def VLD3LNqWB_fixed_Asm_16 : 7477 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7478 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7479 pred:$p)>; 7480 def VLD3LNqWB_fixed_Asm_32 : 7481 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7482 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7483 pred:$p)>; 7484 def VLD3LNdWB_register_Asm_8 : 7485 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7486 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7487 rGPR:$Rm, pred:$p)>; 7488 def VLD3LNdWB_register_Asm_16 : 7489 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7490 (ins VecListThreeDHWordIndexed:$list, 7491 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7492 def VLD3LNdWB_register_Asm_32 : 7493 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7494 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7495 rGPR:$Rm, pred:$p)>; 7496 def VLD3LNqWB_register_Asm_16 : 7497 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7498 (ins VecListThreeQHWordIndexed:$list, 7499 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7500 def VLD3LNqWB_register_Asm_32 : 7501 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7502 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7503 rGPR:$Rm, pred:$p)>; 7504 7505 // VLD3 multiple structure pseudo-instructions. These need special handling for 7506 // the vector operands that the normal instructions don't yet model. 7507 // FIXME: Remove these when the register classes and instructions are updated. 7508 def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7509 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7510 def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7511 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7512 def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7513 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7514 def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7515 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7516 def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7517 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7518 def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7519 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7520 7521 def VLD3dWB_fixed_Asm_8 : 7522 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7523 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7524 def VLD3dWB_fixed_Asm_16 : 7525 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7526 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7527 def VLD3dWB_fixed_Asm_32 : 7528 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7529 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7530 def VLD3qWB_fixed_Asm_8 : 7531 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7532 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7533 def VLD3qWB_fixed_Asm_16 : 7534 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7535 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7536 def VLD3qWB_fixed_Asm_32 : 7537 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7538 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7539 def VLD3dWB_register_Asm_8 : 7540 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7541 (ins VecListThreeD:$list, addrmode6align64:$addr, 7542 rGPR:$Rm, pred:$p)>; 7543 def VLD3dWB_register_Asm_16 : 7544 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7545 (ins VecListThreeD:$list, addrmode6align64:$addr, 7546 rGPR:$Rm, pred:$p)>; 7547 def VLD3dWB_register_Asm_32 : 7548 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7549 (ins VecListThreeD:$list, addrmode6align64:$addr, 7550 rGPR:$Rm, pred:$p)>; 7551 def VLD3qWB_register_Asm_8 : 7552 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7553 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7554 rGPR:$Rm, pred:$p)>; 7555 def VLD3qWB_register_Asm_16 : 7556 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7557 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7558 rGPR:$Rm, pred:$p)>; 7559 def VLD3qWB_register_Asm_32 : 7560 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7561 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7562 rGPR:$Rm, pred:$p)>; 7563 7564 // VST3 single-lane pseudo-instructions. These need special handling for 7565 // the lane index that an InstAlias can't handle, so we use these instead. 7566 def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 7567 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7568 pred:$p)>; 7569 def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7570 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7571 pred:$p)>; 7572 def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7573 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7574 pred:$p)>; 7575 def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7576 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7577 pred:$p)>; 7578 def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7579 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7580 pred:$p)>; 7581 7582 def VST3LNdWB_fixed_Asm_8 : 7583 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 7584 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7585 pred:$p)>; 7586 def VST3LNdWB_fixed_Asm_16 : 7587 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7588 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7589 pred:$p)>; 7590 def VST3LNdWB_fixed_Asm_32 : 7591 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7592 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7593 pred:$p)>; 7594 def VST3LNqWB_fixed_Asm_16 : 7595 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7596 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7597 pred:$p)>; 7598 def VST3LNqWB_fixed_Asm_32 : 7599 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7600 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7601 pred:$p)>; 7602 def VST3LNdWB_register_Asm_8 : 7603 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 7604 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7605 rGPR:$Rm, pred:$p)>; 7606 def VST3LNdWB_register_Asm_16 : 7607 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7608 (ins VecListThreeDHWordIndexed:$list, 7609 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7610 def VST3LNdWB_register_Asm_32 : 7611 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7612 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7613 rGPR:$Rm, pred:$p)>; 7614 def VST3LNqWB_register_Asm_16 : 7615 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7616 (ins VecListThreeQHWordIndexed:$list, 7617 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7618 def VST3LNqWB_register_Asm_32 : 7619 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7620 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7621 rGPR:$Rm, pred:$p)>; 7622 7623 7624 // VST3 multiple structure pseudo-instructions. These need special handling for 7625 // the vector operands that the normal instructions don't yet model. 7626 // FIXME: Remove these when the register classes and instructions are updated. 7627 def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 7628 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7629 def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7630 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7631 def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7632 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7633 def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 7634 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7635 def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7636 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7637 def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7638 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7639 7640 def VST3dWB_fixed_Asm_8 : 7641 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 7642 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7643 def VST3dWB_fixed_Asm_16 : 7644 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7645 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7646 def VST3dWB_fixed_Asm_32 : 7647 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7648 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7649 def VST3qWB_fixed_Asm_8 : 7650 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 7651 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7652 def VST3qWB_fixed_Asm_16 : 7653 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7654 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7655 def VST3qWB_fixed_Asm_32 : 7656 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7657 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7658 def VST3dWB_register_Asm_8 : 7659 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 7660 (ins VecListThreeD:$list, addrmode6align64:$addr, 7661 rGPR:$Rm, pred:$p)>; 7662 def VST3dWB_register_Asm_16 : 7663 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7664 (ins VecListThreeD:$list, addrmode6align64:$addr, 7665 rGPR:$Rm, pred:$p)>; 7666 def VST3dWB_register_Asm_32 : 7667 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7668 (ins VecListThreeD:$list, addrmode6align64:$addr, 7669 rGPR:$Rm, pred:$p)>; 7670 def VST3qWB_register_Asm_8 : 7671 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 7672 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7673 rGPR:$Rm, pred:$p)>; 7674 def VST3qWB_register_Asm_16 : 7675 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7676 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7677 rGPR:$Rm, pred:$p)>; 7678 def VST3qWB_register_Asm_32 : 7679 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7680 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7681 rGPR:$Rm, pred:$p)>; 7682 7683 // VLD4 all-lanes pseudo-instructions. These need special handling for 7684 // the lane index that an InstAlias can't handle, so we use these instead. 7685 def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7686 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 7687 pred:$p)>; 7688 def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7689 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 7690 pred:$p)>; 7691 def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7692 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 7693 pred:$p)>; 7694 def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7695 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 7696 pred:$p)>; 7697 def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7698 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 7699 pred:$p)>; 7700 def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7701 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 7702 pred:$p)>; 7703 7704 def VLD4DUPdWB_fixed_Asm_8 : 7705 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7706 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 7707 pred:$p)>; 7708 def VLD4DUPdWB_fixed_Asm_16 : 7709 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7710 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 7711 pred:$p)>; 7712 def VLD4DUPdWB_fixed_Asm_32 : 7713 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7714 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 7715 pred:$p)>; 7716 def VLD4DUPqWB_fixed_Asm_8 : 7717 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7718 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 7719 pred:$p)>; 7720 def VLD4DUPqWB_fixed_Asm_16 : 7721 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7722 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 7723 pred:$p)>; 7724 def VLD4DUPqWB_fixed_Asm_32 : 7725 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7726 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 7727 pred:$p)>; 7728 def VLD4DUPdWB_register_Asm_8 : 7729 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7730 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 7731 rGPR:$Rm, pred:$p)>; 7732 def VLD4DUPdWB_register_Asm_16 : 7733 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7734 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 7735 rGPR:$Rm, pred:$p)>; 7736 def VLD4DUPdWB_register_Asm_32 : 7737 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7738 (ins VecListFourDAllLanes:$list, 7739 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 7740 def VLD4DUPqWB_register_Asm_8 : 7741 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7742 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 7743 rGPR:$Rm, pred:$p)>; 7744 def VLD4DUPqWB_register_Asm_16 : 7745 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7746 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 7747 rGPR:$Rm, pred:$p)>; 7748 def VLD4DUPqWB_register_Asm_32 : 7749 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7750 (ins VecListFourQAllLanes:$list, 7751 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 7752 7753 7754 // VLD4 single-lane pseudo-instructions. These need special handling for 7755 // the lane index that an InstAlias can't handle, so we use these instead. 7756 def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7757 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7758 pred:$p)>; 7759 def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7760 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7761 pred:$p)>; 7762 def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7763 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 7764 pred:$p)>; 7765 def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7766 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7767 pred:$p)>; 7768 def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7769 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 7770 pred:$p)>; 7771 7772 def VLD4LNdWB_fixed_Asm_8 : 7773 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7774 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7775 pred:$p)>; 7776 def VLD4LNdWB_fixed_Asm_16 : 7777 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7778 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7779 pred:$p)>; 7780 def VLD4LNdWB_fixed_Asm_32 : 7781 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7782 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 7783 pred:$p)>; 7784 def VLD4LNqWB_fixed_Asm_16 : 7785 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7786 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7787 pred:$p)>; 7788 def VLD4LNqWB_fixed_Asm_32 : 7789 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7790 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 7791 pred:$p)>; 7792 def VLD4LNdWB_register_Asm_8 : 7793 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7794 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7795 rGPR:$Rm, pred:$p)>; 7796 def VLD4LNdWB_register_Asm_16 : 7797 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7798 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7799 rGPR:$Rm, pred:$p)>; 7800 def VLD4LNdWB_register_Asm_32 : 7801 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7802 (ins VecListFourDWordIndexed:$list, 7803 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 7804 def VLD4LNqWB_register_Asm_16 : 7805 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7806 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7807 rGPR:$Rm, pred:$p)>; 7808 def VLD4LNqWB_register_Asm_32 : 7809 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7810 (ins VecListFourQWordIndexed:$list, 7811 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 7812 7813 7814 7815 // VLD4 multiple structure pseudo-instructions. These need special handling for 7816 // the vector operands that the normal instructions don't yet model. 7817 // FIXME: Remove these when the register classes and instructions are updated. 7818 def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7819 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7820 pred:$p)>; 7821 def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7822 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7823 pred:$p)>; 7824 def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7825 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7826 pred:$p)>; 7827 def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7828 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7829 pred:$p)>; 7830 def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7831 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7832 pred:$p)>; 7833 def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7834 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7835 pred:$p)>; 7836 7837 def VLD4dWB_fixed_Asm_8 : 7838 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7839 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7840 pred:$p)>; 7841 def VLD4dWB_fixed_Asm_16 : 7842 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7843 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7844 pred:$p)>; 7845 def VLD4dWB_fixed_Asm_32 : 7846 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7847 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7848 pred:$p)>; 7849 def VLD4qWB_fixed_Asm_8 : 7850 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7851 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7852 pred:$p)>; 7853 def VLD4qWB_fixed_Asm_16 : 7854 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7855 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7856 pred:$p)>; 7857 def VLD4qWB_fixed_Asm_32 : 7858 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7859 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7860 pred:$p)>; 7861 def VLD4dWB_register_Asm_8 : 7862 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7863 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7864 rGPR:$Rm, pred:$p)>; 7865 def VLD4dWB_register_Asm_16 : 7866 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7867 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7868 rGPR:$Rm, pred:$p)>; 7869 def VLD4dWB_register_Asm_32 : 7870 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7871 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7872 rGPR:$Rm, pred:$p)>; 7873 def VLD4qWB_register_Asm_8 : 7874 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7875 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7876 rGPR:$Rm, pred:$p)>; 7877 def VLD4qWB_register_Asm_16 : 7878 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7879 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7880 rGPR:$Rm, pred:$p)>; 7881 def VLD4qWB_register_Asm_32 : 7882 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7883 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7884 rGPR:$Rm, pred:$p)>; 7885 7886 // VST4 single-lane pseudo-instructions. These need special handling for 7887 // the lane index that an InstAlias can't handle, so we use these instead. 7888 def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 7889 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7890 pred:$p)>; 7891 def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 7892 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7893 pred:$p)>; 7894 def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 7895 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 7896 pred:$p)>; 7897 def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 7898 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7899 pred:$p)>; 7900 def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 7901 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 7902 pred:$p)>; 7903 7904 def VST4LNdWB_fixed_Asm_8 : 7905 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 7906 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7907 pred:$p)>; 7908 def VST4LNdWB_fixed_Asm_16 : 7909 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 7910 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7911 pred:$p)>; 7912 def VST4LNdWB_fixed_Asm_32 : 7913 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 7914 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 7915 pred:$p)>; 7916 def VST4LNqWB_fixed_Asm_16 : 7917 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 7918 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7919 pred:$p)>; 7920 def VST4LNqWB_fixed_Asm_32 : 7921 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 7922 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 7923 pred:$p)>; 7924 def VST4LNdWB_register_Asm_8 : 7925 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 7926 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7927 rGPR:$Rm, pred:$p)>; 7928 def VST4LNdWB_register_Asm_16 : 7929 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 7930 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7931 rGPR:$Rm, pred:$p)>; 7932 def VST4LNdWB_register_Asm_32 : 7933 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 7934 (ins VecListFourDWordIndexed:$list, 7935 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 7936 def VST4LNqWB_register_Asm_16 : 7937 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 7938 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7939 rGPR:$Rm, pred:$p)>; 7940 def VST4LNqWB_register_Asm_32 : 7941 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 7942 (ins VecListFourQWordIndexed:$list, 7943 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 7944 7945 7946 // VST4 multiple structure pseudo-instructions. These need special handling for 7947 // the vector operands that the normal instructions don't yet model. 7948 // FIXME: Remove these when the register classes and instructions are updated. 7949 def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 7950 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7951 pred:$p)>; 7952 def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 7953 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7954 pred:$p)>; 7955 def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 7956 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7957 pred:$p)>; 7958 def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 7959 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7960 pred:$p)>; 7961 def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 7962 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7963 pred:$p)>; 7964 def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 7965 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7966 pred:$p)>; 7967 7968 def VST4dWB_fixed_Asm_8 : 7969 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 7970 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7971 pred:$p)>; 7972 def VST4dWB_fixed_Asm_16 : 7973 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 7974 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7975 pred:$p)>; 7976 def VST4dWB_fixed_Asm_32 : 7977 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 7978 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7979 pred:$p)>; 7980 def VST4qWB_fixed_Asm_8 : 7981 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 7982 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7983 pred:$p)>; 7984 def VST4qWB_fixed_Asm_16 : 7985 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 7986 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7987 pred:$p)>; 7988 def VST4qWB_fixed_Asm_32 : 7989 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 7990 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7991 pred:$p)>; 7992 def VST4dWB_register_Asm_8 : 7993 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 7994 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7995 rGPR:$Rm, pred:$p)>; 7996 def VST4dWB_register_Asm_16 : 7997 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 7998 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7999 rGPR:$Rm, pred:$p)>; 8000 def VST4dWB_register_Asm_32 : 8001 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8002 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8003 rGPR:$Rm, pred:$p)>; 8004 def VST4qWB_register_Asm_8 : 8005 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8006 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8007 rGPR:$Rm, pred:$p)>; 8008 def VST4qWB_register_Asm_16 : 8009 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8010 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8011 rGPR:$Rm, pred:$p)>; 8012 def VST4qWB_register_Asm_32 : 8013 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8014 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8015 rGPR:$Rm, pred:$p)>; 8016 8017 // VMOV/VMVN takes an optional datatype suffix 8018 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8019 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 8020 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8021 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 8022 8023 defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8024 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; 8025 defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8026 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; 8027 8028 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 8029 // D-register versions. 8030 def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 8031 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8032 def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 8033 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8034 def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 8035 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8036 def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 8037 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8038 def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 8039 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8040 def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 8041 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8042 def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 8043 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8044 let Predicates = [HasNEON, HasFullFP16] in 8045 def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm", 8046 (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8047 // Q-register versions. 8048 def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 8049 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8050 def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 8051 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8052 def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 8053 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8054 def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 8055 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8056 def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 8057 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8058 def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 8059 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8060 def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 8061 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8062 let Predicates = [HasNEON, HasFullFP16] in 8063 def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm", 8064 (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8065 8066 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 8067 // D-register versions. 8068 def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 8069 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8070 def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 8071 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8072 def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 8073 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8074 def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 8075 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8076 def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 8077 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8078 def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 8079 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8080 def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 8081 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8082 let Predicates = [HasNEON, HasFullFP16] in 8083 def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm", 8084 (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8085 // Q-register versions. 8086 def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 8087 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8088 def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 8089 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8090 def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 8091 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8092 def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 8093 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8094 def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 8095 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8096 def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 8097 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8098 def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 8099 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8100 let Predicates = [HasNEON, HasFullFP16] in 8101 def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm", 8102 (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8103 8104 // VSWP allows, but does not require, a type suffix. 8105 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 8106 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 8107 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 8108 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 8109 8110 // VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 8111 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 8112 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8113 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 8114 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8115 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 8116 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8117 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 8118 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8119 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 8120 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8121 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 8122 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8123 8124 // "vmov Rd, #-imm" can be handled via "vmvn". 8125 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 8126 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8127 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 8128 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8129 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 8130 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8131 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 8132 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8133 8134 // 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 8135 // these should restrict to just the Q register variants, but the register 8136 // classes are enough to match correctly regardless, so we keep it simple 8137 // and just use MnemonicAlias. 8138 def : NEONMnemonicAlias<"vbicq", "vbic">; 8139 def : NEONMnemonicAlias<"vandq", "vand">; 8140 def : NEONMnemonicAlias<"veorq", "veor">; 8141 def : NEONMnemonicAlias<"vorrq", "vorr">; 8142 8143 def : NEONMnemonicAlias<"vmovq", "vmov">; 8144 def : NEONMnemonicAlias<"vmvnq", "vmvn">; 8145 // Explicit versions for floating point so that the FPImm variants get 8146 // handled early. The parser gets confused otherwise. 8147 def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 8148 def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 8149 8150 def : NEONMnemonicAlias<"vaddq", "vadd">; 8151 def : NEONMnemonicAlias<"vsubq", "vsub">; 8152 8153 def : NEONMnemonicAlias<"vminq", "vmin">; 8154 def : NEONMnemonicAlias<"vmaxq", "vmax">; 8155 8156 def : NEONMnemonicAlias<"vmulq", "vmul">; 8157 8158 def : NEONMnemonicAlias<"vabsq", "vabs">; 8159 8160 def : NEONMnemonicAlias<"vshlq", "vshl">; 8161 def : NEONMnemonicAlias<"vshrq", "vshr">; 8162 8163 def : NEONMnemonicAlias<"vcvtq", "vcvt">; 8164 8165 def : NEONMnemonicAlias<"vcleq", "vcle">; 8166 def : NEONMnemonicAlias<"vceqq", "vceq">; 8167 8168 def : NEONMnemonicAlias<"vzipq", "vzip">; 8169 def : NEONMnemonicAlias<"vswpq", "vswp">; 8170 8171 def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 8172 def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 8173 8174 8175 // Alias for loading floating point immediates that aren't representable 8176 // using the vmov.f32 encoding but the bitpattern is representable using 8177 // the .i32 encoding. 8178 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 8179 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 8180 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 8181 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 8182