1 //===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file describes the ARM NEON instruction set. 11 // 12 //===----------------------------------------------------------------------===// 13 14 15 //===----------------------------------------------------------------------===// 16 // NEON-specific Operands. 17 //===----------------------------------------------------------------------===// 18 def nModImm : Operand<i32> { 19 let PrintMethod = "printNEONModImmOperand"; 20 } 21 22 def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 23 def nImmSplatI8 : Operand<i32> { 24 let PrintMethod = "printNEONModImmOperand"; 25 let ParserMatchClass = nImmSplatI8AsmOperand; 26 } 27 def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 28 def nImmSplatI16 : Operand<i32> { 29 let PrintMethod = "printNEONModImmOperand"; 30 let ParserMatchClass = nImmSplatI16AsmOperand; 31 } 32 def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 33 def nImmSplatI32 : Operand<i32> { 34 let PrintMethod = "printNEONModImmOperand"; 35 let ParserMatchClass = nImmSplatI32AsmOperand; 36 } 37 def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 38 def nImmVMOVI32 : Operand<i32> { 39 let PrintMethod = "printNEONModImmOperand"; 40 let ParserMatchClass = nImmVMOVI32AsmOperand; 41 } 42 def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 43 def nImmVMOVI32Neg : Operand<i32> { 44 let PrintMethod = "printNEONModImmOperand"; 45 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 46 } 47 def nImmVMOVF32 : Operand<i32> { 48 let PrintMethod = "printFPImmOperand"; 49 let ParserMatchClass = FPImmOperand; 50 } 51 def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 52 def nImmSplatI64 : Operand<i32> { 53 let PrintMethod = "printNEONModImmOperand"; 54 let ParserMatchClass = nImmSplatI64AsmOperand; 55 } 56 57 def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 58 def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 59 def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 60 def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 61 return ((uint64_t)Imm) < 8; 62 }]> { 63 let ParserMatchClass = VectorIndex8Operand; 64 let PrintMethod = "printVectorIndex"; 65 let MIOperandInfo = (ops i32imm); 66 } 67 def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 68 return ((uint64_t)Imm) < 4; 69 }]> { 70 let ParserMatchClass = VectorIndex16Operand; 71 let PrintMethod = "printVectorIndex"; 72 let MIOperandInfo = (ops i32imm); 73 } 74 def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 75 return ((uint64_t)Imm) < 2; 76 }]> { 77 let ParserMatchClass = VectorIndex32Operand; 78 let PrintMethod = "printVectorIndex"; 79 let MIOperandInfo = (ops i32imm); 80 } 81 82 // Register list of one D register. 83 def VecListOneDAsmOperand : AsmOperandClass { 84 let Name = "VecListOneD"; 85 let ParserMethod = "parseVectorList"; 86 let RenderMethod = "addVecListOperands"; 87 } 88 def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 89 let ParserMatchClass = VecListOneDAsmOperand; 90 } 91 // Register list of two sequential D registers. 92 def VecListDPairAsmOperand : AsmOperandClass { 93 let Name = "VecListDPair"; 94 let ParserMethod = "parseVectorList"; 95 let RenderMethod = "addVecListOperands"; 96 } 97 def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 98 let ParserMatchClass = VecListDPairAsmOperand; 99 } 100 // Register list of three sequential D registers. 101 def VecListThreeDAsmOperand : AsmOperandClass { 102 let Name = "VecListThreeD"; 103 let ParserMethod = "parseVectorList"; 104 let RenderMethod = "addVecListOperands"; 105 } 106 def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 107 let ParserMatchClass = VecListThreeDAsmOperand; 108 } 109 // Register list of four sequential D registers. 110 def VecListFourDAsmOperand : AsmOperandClass { 111 let Name = "VecListFourD"; 112 let ParserMethod = "parseVectorList"; 113 let RenderMethod = "addVecListOperands"; 114 } 115 def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 116 let ParserMatchClass = VecListFourDAsmOperand; 117 } 118 // Register list of two D registers spaced by 2 (two sequential Q registers). 119 def VecListDPairSpacedAsmOperand : AsmOperandClass { 120 let Name = "VecListDPairSpaced"; 121 let ParserMethod = "parseVectorList"; 122 let RenderMethod = "addVecListOperands"; 123 } 124 def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 125 let ParserMatchClass = VecListDPairSpacedAsmOperand; 126 } 127 // Register list of three D registers spaced by 2 (three Q registers). 128 def VecListThreeQAsmOperand : AsmOperandClass { 129 let Name = "VecListThreeQ"; 130 let ParserMethod = "parseVectorList"; 131 let RenderMethod = "addVecListOperands"; 132 } 133 def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 134 let ParserMatchClass = VecListThreeQAsmOperand; 135 } 136 // Register list of three D registers spaced by 2 (three Q registers). 137 def VecListFourQAsmOperand : AsmOperandClass { 138 let Name = "VecListFourQ"; 139 let ParserMethod = "parseVectorList"; 140 let RenderMethod = "addVecListOperands"; 141 } 142 def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 143 let ParserMatchClass = VecListFourQAsmOperand; 144 } 145 146 // Register list of one D register, with "all lanes" subscripting. 147 def VecListOneDAllLanesAsmOperand : AsmOperandClass { 148 let Name = "VecListOneDAllLanes"; 149 let ParserMethod = "parseVectorList"; 150 let RenderMethod = "addVecListOperands"; 151 } 152 def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 153 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 154 } 155 // Register list of two D registers, with "all lanes" subscripting. 156 def VecListDPairAllLanesAsmOperand : AsmOperandClass { 157 let Name = "VecListDPairAllLanes"; 158 let ParserMethod = "parseVectorList"; 159 let RenderMethod = "addVecListOperands"; 160 } 161 def VecListDPairAllLanes : RegisterOperand<DPair, 162 "printVectorListTwoAllLanes"> { 163 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 164 } 165 // Register list of two D registers spaced by 2 (two sequential Q registers). 166 def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 167 let Name = "VecListDPairSpacedAllLanes"; 168 let ParserMethod = "parseVectorList"; 169 let RenderMethod = "addVecListOperands"; 170 } 171 def VecListDPairSpacedAllLanes : RegisterOperand<DPair, 172 "printVectorListTwoSpacedAllLanes"> { 173 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 174 } 175 // Register list of three D registers, with "all lanes" subscripting. 176 def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 177 let Name = "VecListThreeDAllLanes"; 178 let ParserMethod = "parseVectorList"; 179 let RenderMethod = "addVecListOperands"; 180 } 181 def VecListThreeDAllLanes : RegisterOperand<DPR, 182 "printVectorListThreeAllLanes"> { 183 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 184 } 185 // Register list of three D registers spaced by 2 (three sequential Q regs). 186 def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 187 let Name = "VecListThreeQAllLanes"; 188 let ParserMethod = "parseVectorList"; 189 let RenderMethod = "addVecListOperands"; 190 } 191 def VecListThreeQAllLanes : RegisterOperand<DPR, 192 "printVectorListThreeSpacedAllLanes"> { 193 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 194 } 195 // Register list of four D registers, with "all lanes" subscripting. 196 def VecListFourDAllLanesAsmOperand : AsmOperandClass { 197 let Name = "VecListFourDAllLanes"; 198 let ParserMethod = "parseVectorList"; 199 let RenderMethod = "addVecListOperands"; 200 } 201 def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 202 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 203 } 204 // Register list of four D registers spaced by 2 (four sequential Q regs). 205 def VecListFourQAllLanesAsmOperand : AsmOperandClass { 206 let Name = "VecListFourQAllLanes"; 207 let ParserMethod = "parseVectorList"; 208 let RenderMethod = "addVecListOperands"; 209 } 210 def VecListFourQAllLanes : RegisterOperand<DPR, 211 "printVectorListFourSpacedAllLanes"> { 212 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 213 } 214 215 216 // Register list of one D register, with byte lane subscripting. 217 def VecListOneDByteIndexAsmOperand : AsmOperandClass { 218 let Name = "VecListOneDByteIndexed"; 219 let ParserMethod = "parseVectorList"; 220 let RenderMethod = "addVecListIndexedOperands"; 221 } 222 def VecListOneDByteIndexed : Operand<i32> { 223 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 224 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 225 } 226 // ...with half-word lane subscripting. 227 def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 228 let Name = "VecListOneDHWordIndexed"; 229 let ParserMethod = "parseVectorList"; 230 let RenderMethod = "addVecListIndexedOperands"; 231 } 232 def VecListOneDHWordIndexed : Operand<i32> { 233 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 234 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 235 } 236 // ...with word lane subscripting. 237 def VecListOneDWordIndexAsmOperand : AsmOperandClass { 238 let Name = "VecListOneDWordIndexed"; 239 let ParserMethod = "parseVectorList"; 240 let RenderMethod = "addVecListIndexedOperands"; 241 } 242 def VecListOneDWordIndexed : Operand<i32> { 243 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 244 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 245 } 246 247 // Register list of two D registers with byte lane subscripting. 248 def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 249 let Name = "VecListTwoDByteIndexed"; 250 let ParserMethod = "parseVectorList"; 251 let RenderMethod = "addVecListIndexedOperands"; 252 } 253 def VecListTwoDByteIndexed : Operand<i32> { 254 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 255 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 256 } 257 // ...with half-word lane subscripting. 258 def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 259 let Name = "VecListTwoDHWordIndexed"; 260 let ParserMethod = "parseVectorList"; 261 let RenderMethod = "addVecListIndexedOperands"; 262 } 263 def VecListTwoDHWordIndexed : Operand<i32> { 264 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 265 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 266 } 267 // ...with word lane subscripting. 268 def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 269 let Name = "VecListTwoDWordIndexed"; 270 let ParserMethod = "parseVectorList"; 271 let RenderMethod = "addVecListIndexedOperands"; 272 } 273 def VecListTwoDWordIndexed : Operand<i32> { 274 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 275 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 276 } 277 // Register list of two Q registers with half-word lane subscripting. 278 def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 279 let Name = "VecListTwoQHWordIndexed"; 280 let ParserMethod = "parseVectorList"; 281 let RenderMethod = "addVecListIndexedOperands"; 282 } 283 def VecListTwoQHWordIndexed : Operand<i32> { 284 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 285 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 286 } 287 // ...with word lane subscripting. 288 def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 289 let Name = "VecListTwoQWordIndexed"; 290 let ParserMethod = "parseVectorList"; 291 let RenderMethod = "addVecListIndexedOperands"; 292 } 293 def VecListTwoQWordIndexed : Operand<i32> { 294 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 296 } 297 298 299 // Register list of three D registers with byte lane subscripting. 300 def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 301 let Name = "VecListThreeDByteIndexed"; 302 let ParserMethod = "parseVectorList"; 303 let RenderMethod = "addVecListIndexedOperands"; 304 } 305 def VecListThreeDByteIndexed : Operand<i32> { 306 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 307 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 308 } 309 // ...with half-word lane subscripting. 310 def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 311 let Name = "VecListThreeDHWordIndexed"; 312 let ParserMethod = "parseVectorList"; 313 let RenderMethod = "addVecListIndexedOperands"; 314 } 315 def VecListThreeDHWordIndexed : Operand<i32> { 316 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 317 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 318 } 319 // ...with word lane subscripting. 320 def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 321 let Name = "VecListThreeDWordIndexed"; 322 let ParserMethod = "parseVectorList"; 323 let RenderMethod = "addVecListIndexedOperands"; 324 } 325 def VecListThreeDWordIndexed : Operand<i32> { 326 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 327 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 328 } 329 // Register list of three Q registers with half-word lane subscripting. 330 def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 331 let Name = "VecListThreeQHWordIndexed"; 332 let ParserMethod = "parseVectorList"; 333 let RenderMethod = "addVecListIndexedOperands"; 334 } 335 def VecListThreeQHWordIndexed : Operand<i32> { 336 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 337 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 338 } 339 // ...with word lane subscripting. 340 def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 341 let Name = "VecListThreeQWordIndexed"; 342 let ParserMethod = "parseVectorList"; 343 let RenderMethod = "addVecListIndexedOperands"; 344 } 345 def VecListThreeQWordIndexed : Operand<i32> { 346 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 347 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 348 } 349 350 // Register list of four D registers with byte lane subscripting. 351 def VecListFourDByteIndexAsmOperand : AsmOperandClass { 352 let Name = "VecListFourDByteIndexed"; 353 let ParserMethod = "parseVectorList"; 354 let RenderMethod = "addVecListIndexedOperands"; 355 } 356 def VecListFourDByteIndexed : Operand<i32> { 357 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 358 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 359 } 360 // ...with half-word lane subscripting. 361 def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 362 let Name = "VecListFourDHWordIndexed"; 363 let ParserMethod = "parseVectorList"; 364 let RenderMethod = "addVecListIndexedOperands"; 365 } 366 def VecListFourDHWordIndexed : Operand<i32> { 367 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 368 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 369 } 370 // ...with word lane subscripting. 371 def VecListFourDWordIndexAsmOperand : AsmOperandClass { 372 let Name = "VecListFourDWordIndexed"; 373 let ParserMethod = "parseVectorList"; 374 let RenderMethod = "addVecListIndexedOperands"; 375 } 376 def VecListFourDWordIndexed : Operand<i32> { 377 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 378 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 379 } 380 // Register list of four Q registers with half-word lane subscripting. 381 def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 382 let Name = "VecListFourQHWordIndexed"; 383 let ParserMethod = "parseVectorList"; 384 let RenderMethod = "addVecListIndexedOperands"; 385 } 386 def VecListFourQHWordIndexed : Operand<i32> { 387 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 388 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 389 } 390 // ...with word lane subscripting. 391 def VecListFourQWordIndexAsmOperand : AsmOperandClass { 392 let Name = "VecListFourQWordIndexed"; 393 let ParserMethod = "parseVectorList"; 394 let RenderMethod = "addVecListIndexedOperands"; 395 } 396 def VecListFourQWordIndexed : Operand<i32> { 397 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 399 } 400 401 def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 402 return cast<LoadSDNode>(N)->getAlignment() >= 8; 403 }]>; 404 def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 405 (store node:$val, node:$ptr), [{ 406 return cast<StoreSDNode>(N)->getAlignment() >= 8; 407 }]>; 408 def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 409 return cast<LoadSDNode>(N)->getAlignment() == 4; 410 }]>; 411 def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 412 (store node:$val, node:$ptr), [{ 413 return cast<StoreSDNode>(N)->getAlignment() == 4; 414 }]>; 415 def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 416 return cast<LoadSDNode>(N)->getAlignment() == 2; 417 }]>; 418 def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 419 (store node:$val, node:$ptr), [{ 420 return cast<StoreSDNode>(N)->getAlignment() == 2; 421 }]>; 422 def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 423 return cast<LoadSDNode>(N)->getAlignment() == 1; 424 }]>; 425 def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 426 (store node:$val, node:$ptr), [{ 427 return cast<StoreSDNode>(N)->getAlignment() == 1; 428 }]>; 429 def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 430 return cast<LoadSDNode>(N)->getAlignment() < 4; 431 }]>; 432 def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 433 (store node:$val, node:$ptr), [{ 434 return cast<StoreSDNode>(N)->getAlignment() < 4; 435 }]>; 436 437 //===----------------------------------------------------------------------===// 438 // NEON-specific DAG Nodes. 439 //===----------------------------------------------------------------------===// 440 441 def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 442 def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; 443 444 def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 445 def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; 446 def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 447 def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; 448 def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; 449 def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 450 def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 451 def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; 452 def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; 453 def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 454 def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 455 456 // Types for vector shift by immediates. The "SHX" version is for long and 457 // narrow operations where the source and destination vectors have different 458 // types. The "SHINS" version is for shift and insert operations. 459 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 460 SDTCisVT<2, i32>]>; 461 def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 462 SDTCisVT<2, i32>]>; 463 def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 464 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 465 466 def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 467 def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 468 def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 469 def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 470 def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 471 def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 472 def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 473 474 def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 475 def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 476 def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 477 478 def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 479 def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 480 def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 481 def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 482 def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 483 def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 484 485 def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 486 def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 487 def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 488 489 def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 490 def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 491 492 def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 493 SDTCisVT<2, i32>]>; 494 def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 495 def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 496 497 def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 498 def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; 499 def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; 500 def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; 501 502 def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 503 SDTCisVT<2, i32>]>; 504 def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; 505 def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; 506 507 def NEONvbsl : SDNode<"ARMISD::VBSL", 508 SDTypeProfile<1, 3, [SDTCisVec<0>, 509 SDTCisSameAs<0, 1>, 510 SDTCisSameAs<0, 2>, 511 SDTCisSameAs<0, 3>]>>; 512 513 def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 514 515 // VDUPLANE can produce a quad-register result from a double-register source, 516 // so the result is not constrained to match the source. 517 def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 518 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 519 SDTCisVT<2, i32>]>>; 520 521 def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 522 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 523 def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 524 525 def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 526 def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 527 def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 528 def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 529 530 def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 531 SDTCisSameAs<0, 2>, 532 SDTCisSameAs<0, 3>]>; 533 def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 534 def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 535 def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 536 537 def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 538 SDTCisSameAs<1, 2>]>; 539 def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; 540 def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; 541 542 def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, 543 SDTCisSameAs<0, 2>]>; 544 def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; 545 def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; 546 547 def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 548 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 549 unsigned EltBits = 0; 550 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 551 return (EltBits == 32 && EltVal == 0); 552 }]>; 553 554 def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 555 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 556 unsigned EltBits = 0; 557 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 558 return (EltBits == 8 && EltVal == 0xff); 559 }]>; 560 561 //===----------------------------------------------------------------------===// 562 // NEON load / store instructions 563 //===----------------------------------------------------------------------===// 564 565 // Use VLDM to load a Q register as a D register pair. 566 // This is a pseudo instruction that is expanded to VLDMD after reg alloc. 567 def VLDMQIA 568 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 569 IIC_fpLoad_m, "", 570 [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; 571 572 // Use VSTM to store a Q register as a D register pair. 573 // This is a pseudo instruction that is expanded to VSTMD after reg alloc. 574 def VSTMQIA 575 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 576 IIC_fpStore_m, "", 577 [(store (v2f64 DPair:$src), GPR:$Rn)]>; 578 579 // Classes for VLD* pseudo-instructions with multi-register operands. 580 // These are expanded to real instructions after register allocation. 581 class VLDQPseudo<InstrItinClass itin> 582 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 583 class VLDQWBPseudo<InstrItinClass itin> 584 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 585 (ins addrmode6:$addr, am6offset:$offset), itin, 586 "$addr.addr = $wb">; 587 class VLDQWBfixedPseudo<InstrItinClass itin> 588 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 589 (ins addrmode6:$addr), itin, 590 "$addr.addr = $wb">; 591 class VLDQWBregisterPseudo<InstrItinClass itin> 592 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 593 (ins addrmode6:$addr, rGPR:$offset), itin, 594 "$addr.addr = $wb">; 595 596 class VLDQQPseudo<InstrItinClass itin> 597 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 598 class VLDQQWBPseudo<InstrItinClass itin> 599 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 600 (ins addrmode6:$addr, am6offset:$offset), itin, 601 "$addr.addr = $wb">; 602 class VLDQQWBfixedPseudo<InstrItinClass itin> 603 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 604 (ins addrmode6:$addr), itin, 605 "$addr.addr = $wb">; 606 class VLDQQWBregisterPseudo<InstrItinClass itin> 607 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 608 (ins addrmode6:$addr, rGPR:$offset), itin, 609 "$addr.addr = $wb">; 610 611 612 class VLDQQQQPseudo<InstrItinClass itin> 613 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 614 "$src = $dst">; 615 class VLDQQQQWBPseudo<InstrItinClass itin> 616 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 617 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 618 "$addr.addr = $wb, $src = $dst">; 619 620 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 621 622 // VLD1 : Vector Load (multiple single elements) 623 class VLD1D<bits<4> op7_4, string Dt> 624 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 625 (ins addrmode6:$Rn), IIC_VLD1, 626 "vld1", Dt, "$Vd, $Rn", "", []> { 627 let Rm = 0b1111; 628 let Inst{4} = Rn{4}; 629 let DecoderMethod = "DecodeVLDST1Instruction"; 630 } 631 class VLD1Q<bits<4> op7_4, string Dt> 632 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 633 (ins addrmode6:$Rn), IIC_VLD1x2, 634 "vld1", Dt, "$Vd, $Rn", "", []> { 635 let Rm = 0b1111; 636 let Inst{5-4} = Rn{5-4}; 637 let DecoderMethod = "DecodeVLDST1Instruction"; 638 } 639 640 def VLD1d8 : VLD1D<{0,0,0,?}, "8">; 641 def VLD1d16 : VLD1D<{0,1,0,?}, "16">; 642 def VLD1d32 : VLD1D<{1,0,0,?}, "32">; 643 def VLD1d64 : VLD1D<{1,1,0,?}, "64">; 644 645 def VLD1q8 : VLD1Q<{0,0,?,?}, "8">; 646 def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; 647 def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; 648 def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; 649 650 // ...with address register writeback: 651 multiclass VLD1DWB<bits<4> op7_4, string Dt> { 652 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 653 (ins addrmode6:$Rn), IIC_VLD1u, 654 "vld1", Dt, "$Vd, $Rn!", 655 "$Rn.addr = $wb", []> { 656 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 657 let Inst{4} = Rn{4}; 658 let DecoderMethod = "DecodeVLDST1Instruction"; 659 } 660 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 661 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, 662 "vld1", Dt, "$Vd, $Rn, $Rm", 663 "$Rn.addr = $wb", []> { 664 let Inst{4} = Rn{4}; 665 let DecoderMethod = "DecodeVLDST1Instruction"; 666 } 667 } 668 multiclass VLD1QWB<bits<4> op7_4, string Dt> { 669 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 670 (ins addrmode6:$Rn), IIC_VLD1x2u, 671 "vld1", Dt, "$Vd, $Rn!", 672 "$Rn.addr = $wb", []> { 673 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 674 let Inst{5-4} = Rn{5-4}; 675 let DecoderMethod = "DecodeVLDST1Instruction"; 676 } 677 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 678 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 679 "vld1", Dt, "$Vd, $Rn, $Rm", 680 "$Rn.addr = $wb", []> { 681 let Inst{5-4} = Rn{5-4}; 682 let DecoderMethod = "DecodeVLDST1Instruction"; 683 } 684 } 685 686 defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">; 687 defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">; 688 defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">; 689 defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">; 690 defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">; 691 defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; 692 defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; 693 defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; 694 695 // ...with 3 registers 696 class VLD1D3<bits<4> op7_4, string Dt> 697 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 698 (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, 699 "$Vd, $Rn", "", []> { 700 let Rm = 0b1111; 701 let Inst{4} = Rn{4}; 702 let DecoderMethod = "DecodeVLDST1Instruction"; 703 } 704 multiclass VLD1D3WB<bits<4> op7_4, string Dt> { 705 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 706 (ins addrmode6:$Rn), IIC_VLD1x2u, 707 "vld1", Dt, "$Vd, $Rn!", 708 "$Rn.addr = $wb", []> { 709 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 710 let Inst{4} = Rn{4}; 711 let DecoderMethod = "DecodeVLDST1Instruction"; 712 } 713 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 714 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 715 "vld1", Dt, "$Vd, $Rn, $Rm", 716 "$Rn.addr = $wb", []> { 717 let Inst{4} = Rn{4}; 718 let DecoderMethod = "DecodeVLDST1Instruction"; 719 } 720 } 721 722 def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; 723 def VLD1d16T : VLD1D3<{0,1,0,?}, "16">; 724 def VLD1d32T : VLD1D3<{1,0,0,?}, "32">; 725 def VLD1d64T : VLD1D3<{1,1,0,?}, "64">; 726 727 defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">; 728 defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">; 729 defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">; 730 defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">; 731 732 def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; 733 734 // ...with 4 registers 735 class VLD1D4<bits<4> op7_4, string Dt> 736 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 737 (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, 738 "$Vd, $Rn", "", []> { 739 let Rm = 0b1111; 740 let Inst{5-4} = Rn{5-4}; 741 let DecoderMethod = "DecodeVLDST1Instruction"; 742 } 743 multiclass VLD1D4WB<bits<4> op7_4, string Dt> { 744 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 745 (ins addrmode6:$Rn), IIC_VLD1x2u, 746 "vld1", Dt, "$Vd, $Rn!", 747 "$Rn.addr = $wb", []> { 748 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 749 let Inst{5-4} = Rn{5-4}; 750 let DecoderMethod = "DecodeVLDST1Instruction"; 751 } 752 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 753 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 754 "vld1", Dt, "$Vd, $Rn, $Rm", 755 "$Rn.addr = $wb", []> { 756 let Inst{5-4} = Rn{5-4}; 757 let DecoderMethod = "DecodeVLDST1Instruction"; 758 } 759 } 760 761 def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; 762 def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; 763 def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; 764 def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; 765 766 defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">; 767 defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">; 768 defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; 769 defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; 770 771 def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; 772 773 // VLD2 : Vector Load (multiple 2-element structures) 774 class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 775 InstrItinClass itin> 776 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 777 (ins addrmode6:$Rn), itin, 778 "vld2", Dt, "$Vd, $Rn", "", []> { 779 let Rm = 0b1111; 780 let Inst{5-4} = Rn{5-4}; 781 let DecoderMethod = "DecodeVLDST2Instruction"; 782 } 783 784 def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; 785 def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>; 786 def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>; 787 788 def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; 789 def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; 790 def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; 791 792 def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; 793 def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; 794 def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; 795 796 // ...with address register writeback: 797 multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 798 RegisterOperand VdTy, InstrItinClass itin> { 799 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 800 (ins addrmode6:$Rn), itin, 801 "vld2", Dt, "$Vd, $Rn!", 802 "$Rn.addr = $wb", []> { 803 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 804 let Inst{5-4} = Rn{5-4}; 805 let DecoderMethod = "DecodeVLDST2Instruction"; 806 } 807 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 808 (ins addrmode6:$Rn, rGPR:$Rm), itin, 809 "vld2", Dt, "$Vd, $Rn, $Rm", 810 "$Rn.addr = $wb", []> { 811 let Inst{5-4} = Rn{5-4}; 812 let DecoderMethod = "DecodeVLDST2Instruction"; 813 } 814 } 815 816 defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>; 817 defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>; 818 defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>; 819 820 defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; 821 defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; 822 defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; 823 824 def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 825 def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 826 def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 827 def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 828 def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 829 def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 830 831 // ...with double-spaced registers 832 def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>; 833 def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>; 834 def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>; 835 defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>; 836 defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>; 837 defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>; 838 839 // VLD3 : Vector Load (multiple 3-element structures) 840 class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 841 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 842 (ins addrmode6:$Rn), IIC_VLD3, 843 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { 844 let Rm = 0b1111; 845 let Inst{4} = Rn{4}; 846 let DecoderMethod = "DecodeVLDST3Instruction"; 847 } 848 849 def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 850 def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 851 def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 852 853 def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>; 854 def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>; 855 def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>; 856 857 // ...with address register writeback: 858 class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 859 : NLdSt<0, 0b10, op11_8, op7_4, 860 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 861 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 862 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 863 "$Rn.addr = $wb", []> { 864 let Inst{4} = Rn{4}; 865 let DecoderMethod = "DecodeVLDST3Instruction"; 866 } 867 868 def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 869 def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 870 def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 871 872 def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 873 def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 874 def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 875 876 // ...with double-spaced registers: 877 def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 878 def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 879 def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 880 def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 881 def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 882 def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 883 884 def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 885 def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 886 def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 887 888 // ...alternate versions to be allocated odd register numbers: 889 def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 890 def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 891 def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 892 893 def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 894 def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 895 def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 896 897 // VLD4 : Vector Load (multiple 4-element structures) 898 class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 899 : NLdSt<0, 0b10, op11_8, op7_4, 900 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 901 (ins addrmode6:$Rn), IIC_VLD4, 902 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { 903 let Rm = 0b1111; 904 let Inst{5-4} = Rn{5-4}; 905 let DecoderMethod = "DecodeVLDST4Instruction"; 906 } 907 908 def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 909 def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 910 def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 911 912 def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>; 913 def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>; 914 def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>; 915 916 // ...with address register writeback: 917 class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 918 : NLdSt<0, 0b10, op11_8, op7_4, 919 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 920 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 921 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 922 "$Rn.addr = $wb", []> { 923 let Inst{5-4} = Rn{5-4}; 924 let DecoderMethod = "DecodeVLDST4Instruction"; 925 } 926 927 def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 928 def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 929 def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 930 931 def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 932 def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 933 def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 934 935 // ...with double-spaced registers: 936 def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 937 def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 938 def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 939 def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 940 def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 941 def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 942 943 def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 944 def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 945 def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 946 947 // ...alternate versions to be allocated odd register numbers: 948 def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 949 def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 950 def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 951 952 def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 953 def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 954 def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 955 956 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 957 958 // Classes for VLD*LN pseudo-instructions with multi-register operands. 959 // These are expanded to real instructions after register allocation. 960 class VLDQLNPseudo<InstrItinClass itin> 961 : PseudoNLdSt<(outs QPR:$dst), 962 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 963 itin, "$src = $dst">; 964 class VLDQLNWBPseudo<InstrItinClass itin> 965 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 966 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 967 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 968 class VLDQQLNPseudo<InstrItinClass itin> 969 : PseudoNLdSt<(outs QQPR:$dst), 970 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 971 itin, "$src = $dst">; 972 class VLDQQLNWBPseudo<InstrItinClass itin> 973 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 974 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 975 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 976 class VLDQQQQLNPseudo<InstrItinClass itin> 977 : PseudoNLdSt<(outs QQQQPR:$dst), 978 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 979 itin, "$src = $dst">; 980 class VLDQQQQLNWBPseudo<InstrItinClass itin> 981 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 982 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 983 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 984 985 // VLD1LN : Vector Load (single element to one lane) 986 class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 987 PatFrag LoadOp> 988 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 989 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 990 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 991 "$src = $Vd", 992 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 993 (i32 (LoadOp addrmode6:$Rn)), 994 imm:$lane))]> { 995 let Rm = 0b1111; 996 let DecoderMethod = "DecodeVLD1LN"; 997 } 998 class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 999 PatFrag LoadOp> 1000 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1001 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1002 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1003 "$src = $Vd", 1004 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1005 (i32 (LoadOp addrmode6oneL32:$Rn)), 1006 imm:$lane))]> { 1007 let Rm = 0b1111; 1008 let DecoderMethod = "DecodeVLD1LN"; 1009 } 1010 class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { 1011 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1012 (i32 (LoadOp addrmode6:$addr)), 1013 imm:$lane))]; 1014 } 1015 1016 def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1017 let Inst{7-5} = lane{2-0}; 1018 } 1019 def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1020 let Inst{7-6} = lane{1-0}; 1021 let Inst{5-4} = Rn{5-4}; 1022 } 1023 def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1024 let Inst{7} = lane{0}; 1025 let Inst{5-4} = Rn{5-4}; 1026 } 1027 1028 def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1029 def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1030 def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1031 1032 def : Pat<(vector_insert (v2f32 DPR:$src), 1033 (f32 (load addrmode6:$addr)), imm:$lane), 1034 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1035 def : Pat<(vector_insert (v4f32 QPR:$src), 1036 (f32 (load addrmode6:$addr)), imm:$lane), 1037 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1038 1039 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 1040 1041 // ...with address register writeback: 1042 class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1043 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1044 (ins addrmode6:$Rn, am6offset:$Rm, 1045 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1046 "\\{$Vd[$lane]\\}, $Rn$Rm", 1047 "$src = $Vd, $Rn.addr = $wb", []> { 1048 let DecoderMethod = "DecodeVLD1LN"; 1049 } 1050 1051 def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1052 let Inst{7-5} = lane{2-0}; 1053 } 1054 def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1055 let Inst{7-6} = lane{1-0}; 1056 let Inst{4} = Rn{4}; 1057 } 1058 def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1059 let Inst{7} = lane{0}; 1060 let Inst{5} = Rn{4}; 1061 let Inst{4} = Rn{4}; 1062 } 1063 1064 def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1065 def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1066 def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1067 1068 // VLD2LN : Vector Load (single 2-element structure to one lane) 1069 class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1070 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1071 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1072 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1073 "$src1 = $Vd, $src2 = $dst2", []> { 1074 let Rm = 0b1111; 1075 let Inst{4} = Rn{4}; 1076 let DecoderMethod = "DecodeVLD2LN"; 1077 } 1078 1079 def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1080 let Inst{7-5} = lane{2-0}; 1081 } 1082 def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1083 let Inst{7-6} = lane{1-0}; 1084 } 1085 def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1086 let Inst{7} = lane{0}; 1087 } 1088 1089 def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1090 def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1091 def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1092 1093 // ...with double-spaced registers: 1094 def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1095 let Inst{7-6} = lane{1-0}; 1096 } 1097 def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1098 let Inst{7} = lane{0}; 1099 } 1100 1101 def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1102 def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1103 1104 // ...with address register writeback: 1105 class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1106 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1107 (ins addrmode6:$Rn, am6offset:$Rm, 1108 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1109 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1110 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1111 let Inst{4} = Rn{4}; 1112 let DecoderMethod = "DecodeVLD2LN"; 1113 } 1114 1115 def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1116 let Inst{7-5} = lane{2-0}; 1117 } 1118 def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1119 let Inst{7-6} = lane{1-0}; 1120 } 1121 def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1122 let Inst{7} = lane{0}; 1123 } 1124 1125 def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1126 def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1127 def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1128 1129 def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1130 let Inst{7-6} = lane{1-0}; 1131 } 1132 def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1133 let Inst{7} = lane{0}; 1134 } 1135 1136 def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1137 def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1138 1139 // VLD3LN : Vector Load (single 3-element structure to one lane) 1140 class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1141 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1142 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1143 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1144 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1145 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { 1146 let Rm = 0b1111; 1147 let DecoderMethod = "DecodeVLD3LN"; 1148 } 1149 1150 def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1151 let Inst{7-5} = lane{2-0}; 1152 } 1153 def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1154 let Inst{7-6} = lane{1-0}; 1155 } 1156 def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1157 let Inst{7} = lane{0}; 1158 } 1159 1160 def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1161 def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1162 def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1163 1164 // ...with double-spaced registers: 1165 def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1166 let Inst{7-6} = lane{1-0}; 1167 } 1168 def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1169 let Inst{7} = lane{0}; 1170 } 1171 1172 def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1173 def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1174 1175 // ...with address register writeback: 1176 class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1177 : NLdStLn<1, 0b10, op11_8, op7_4, 1178 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1179 (ins addrmode6:$Rn, am6offset:$Rm, 1180 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1181 IIC_VLD3lnu, "vld3", Dt, 1182 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1183 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1184 []> { 1185 let DecoderMethod = "DecodeVLD3LN"; 1186 } 1187 1188 def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1189 let Inst{7-5} = lane{2-0}; 1190 } 1191 def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1192 let Inst{7-6} = lane{1-0}; 1193 } 1194 def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1195 let Inst{7} = lane{0}; 1196 } 1197 1198 def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1199 def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1200 def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1201 1202 def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1203 let Inst{7-6} = lane{1-0}; 1204 } 1205 def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1206 let Inst{7} = lane{0}; 1207 } 1208 1209 def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1210 def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1211 1212 // VLD4LN : Vector Load (single 4-element structure to one lane) 1213 class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1214 : NLdStLn<1, 0b10, op11_8, op7_4, 1215 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1216 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1217 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1218 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1219 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { 1220 let Rm = 0b1111; 1221 let Inst{4} = Rn{4}; 1222 let DecoderMethod = "DecodeVLD4LN"; 1223 } 1224 1225 def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1226 let Inst{7-5} = lane{2-0}; 1227 } 1228 def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1229 let Inst{7-6} = lane{1-0}; 1230 } 1231 def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1232 let Inst{7} = lane{0}; 1233 let Inst{5} = Rn{5}; 1234 } 1235 1236 def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1237 def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1238 def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1239 1240 // ...with double-spaced registers: 1241 def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1242 let Inst{7-6} = lane{1-0}; 1243 } 1244 def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1245 let Inst{7} = lane{0}; 1246 let Inst{5} = Rn{5}; 1247 } 1248 1249 def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1250 def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1251 1252 // ...with address register writeback: 1253 class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1254 : NLdStLn<1, 0b10, op11_8, op7_4, 1255 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1256 (ins addrmode6:$Rn, am6offset:$Rm, 1257 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1258 IIC_VLD4lnu, "vld4", Dt, 1259 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1260 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1261 []> { 1262 let Inst{4} = Rn{4}; 1263 let DecoderMethod = "DecodeVLD4LN" ; 1264 } 1265 1266 def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1267 let Inst{7-5} = lane{2-0}; 1268 } 1269 def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1270 let Inst{7-6} = lane{1-0}; 1271 } 1272 def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1273 let Inst{7} = lane{0}; 1274 let Inst{5} = Rn{5}; 1275 } 1276 1277 def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1278 def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1279 def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1280 1281 def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1282 let Inst{7-6} = lane{1-0}; 1283 } 1284 def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1285 let Inst{7} = lane{0}; 1286 let Inst{5} = Rn{5}; 1287 } 1288 1289 def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1290 def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1291 1292 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1293 1294 // VLD1DUP : Vector Load (single element to all lanes) 1295 class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> 1296 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1297 (ins addrmode6dup:$Rn), 1298 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1299 [(set VecListOneDAllLanes:$Vd, 1300 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { 1301 let Rm = 0b1111; 1302 let Inst{4} = Rn{4}; 1303 let DecoderMethod = "DecodeVLD1DupInstruction"; 1304 } 1305 def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; 1306 def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; 1307 def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; 1308 1309 def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1310 (VLD1DUPd32 addrmode6:$addr)>; 1311 1312 class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> 1313 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1314 (ins addrmode6dup:$Rn), IIC_VLD1dup, 1315 "vld1", Dt, "$Vd, $Rn", "", 1316 [(set VecListDPairAllLanes:$Vd, 1317 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { 1318 let Rm = 0b1111; 1319 let Inst{4} = Rn{4}; 1320 let DecoderMethod = "DecodeVLD1DupInstruction"; 1321 } 1322 1323 def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>; 1324 def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>; 1325 def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>; 1326 1327 def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1328 (VLD1DUPq32 addrmode6:$addr)>; 1329 1330 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 1331 // ...with address register writeback: 1332 multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { 1333 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1334 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1335 (ins addrmode6dup:$Rn), IIC_VLD1dupu, 1336 "vld1", Dt, "$Vd, $Rn!", 1337 "$Rn.addr = $wb", []> { 1338 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1339 let Inst{4} = Rn{4}; 1340 let DecoderMethod = "DecodeVLD1DupInstruction"; 1341 } 1342 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1343 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1344 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1345 "vld1", Dt, "$Vd, $Rn, $Rm", 1346 "$Rn.addr = $wb", []> { 1347 let Inst{4} = Rn{4}; 1348 let DecoderMethod = "DecodeVLD1DupInstruction"; 1349 } 1350 } 1351 multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { 1352 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1353 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1354 (ins addrmode6dup:$Rn), IIC_VLD1dupu, 1355 "vld1", Dt, "$Vd, $Rn!", 1356 "$Rn.addr = $wb", []> { 1357 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1358 let Inst{4} = Rn{4}; 1359 let DecoderMethod = "DecodeVLD1DupInstruction"; 1360 } 1361 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1362 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1363 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1364 "vld1", Dt, "$Vd, $Rn, $Rm", 1365 "$Rn.addr = $wb", []> { 1366 let Inst{4} = Rn{4}; 1367 let DecoderMethod = "DecodeVLD1DupInstruction"; 1368 } 1369 } 1370 1371 defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">; 1372 defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">; 1373 defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">; 1374 1375 defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; 1376 defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; 1377 defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; 1378 1379 // VLD2DUP : Vector Load (single 2-element structure to all lanes) 1380 class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy> 1381 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1382 (ins addrmode6dup:$Rn), IIC_VLD2dup, 1383 "vld2", Dt, "$Vd, $Rn", "", []> { 1384 let Rm = 0b1111; 1385 let Inst{4} = Rn{4}; 1386 let DecoderMethod = "DecodeVLD2DupInstruction"; 1387 } 1388 1389 def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>; 1390 def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>; 1391 def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>; 1392 1393 // ...with double-spaced registers 1394 def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>; 1395 def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; 1396 def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; 1397 1398 // ...with address register writeback: 1399 multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { 1400 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1401 (outs VdTy:$Vd, GPR:$wb), 1402 (ins addrmode6dup:$Rn), IIC_VLD2dupu, 1403 "vld2", Dt, "$Vd, $Rn!", 1404 "$Rn.addr = $wb", []> { 1405 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1406 let Inst{4} = Rn{4}; 1407 let DecoderMethod = "DecodeVLD2DupInstruction"; 1408 } 1409 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1410 (outs VdTy:$Vd, GPR:$wb), 1411 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1412 "vld2", Dt, "$Vd, $Rn, $Rm", 1413 "$Rn.addr = $wb", []> { 1414 let Inst{4} = Rn{4}; 1415 let DecoderMethod = "DecodeVLD2DupInstruction"; 1416 } 1417 } 1418 1419 defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>; 1420 defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>; 1421 defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>; 1422 1423 defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>; 1424 defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; 1425 defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; 1426 1427 // VLD3DUP : Vector Load (single 3-element structure to all lanes) 1428 class VLD3DUP<bits<4> op7_4, string Dt> 1429 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1430 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1431 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { 1432 let Rm = 0b1111; 1433 let Inst{4} = 0; 1434 let DecoderMethod = "DecodeVLD3DupInstruction"; 1435 } 1436 1437 def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1438 def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1439 def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1440 1441 def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1442 def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1443 def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1444 1445 // ...with double-spaced registers (not used for codegen): 1446 def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1447 def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1448 def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1449 1450 // ...with address register writeback: 1451 class VLD3DUPWB<bits<4> op7_4, string Dt> 1452 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1453 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1454 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1455 "$Rn.addr = $wb", []> { 1456 let Inst{4} = 0; 1457 let DecoderMethod = "DecodeVLD3DupInstruction"; 1458 } 1459 1460 def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; 1461 def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; 1462 def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; 1463 1464 def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">; 1465 def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">; 1466 def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">; 1467 1468 def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1469 def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1470 def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1471 1472 // VLD4DUP : Vector Load (single 4-element structure to all lanes) 1473 class VLD4DUP<bits<4> op7_4, string Dt> 1474 : NLdSt<1, 0b10, 0b1111, op7_4, 1475 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1476 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1477 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1478 let Rm = 0b1111; 1479 let Inst{4} = Rn{4}; 1480 let DecoderMethod = "DecodeVLD4DupInstruction"; 1481 } 1482 1483 def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1484 def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1485 def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1486 1487 def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1488 def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1489 def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1490 1491 // ...with double-spaced registers (not used for codegen): 1492 def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1493 def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1494 def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1495 1496 // ...with address register writeback: 1497 class VLD4DUPWB<bits<4> op7_4, string Dt> 1498 : NLdSt<1, 0b10, 0b1111, op7_4, 1499 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1500 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1501 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1502 "$Rn.addr = $wb", []> { 1503 let Inst{4} = Rn{4}; 1504 let DecoderMethod = "DecodeVLD4DupInstruction"; 1505 } 1506 1507 def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1508 def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1509 def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1510 1511 def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1512 def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1513 def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1514 1515 def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1516 def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1517 def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1518 1519 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1520 1521 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 1522 1523 // Classes for VST* pseudo-instructions with multi-register operands. 1524 // These are expanded to real instructions after register allocation. 1525 class VSTQPseudo<InstrItinClass itin> 1526 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1527 class VSTQWBPseudo<InstrItinClass itin> 1528 : PseudoNLdSt<(outs GPR:$wb), 1529 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1530 "$addr.addr = $wb">; 1531 class VSTQWBfixedPseudo<InstrItinClass itin> 1532 : PseudoNLdSt<(outs GPR:$wb), 1533 (ins addrmode6:$addr, QPR:$src), itin, 1534 "$addr.addr = $wb">; 1535 class VSTQWBregisterPseudo<InstrItinClass itin> 1536 : PseudoNLdSt<(outs GPR:$wb), 1537 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1538 "$addr.addr = $wb">; 1539 class VSTQQPseudo<InstrItinClass itin> 1540 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1541 class VSTQQWBPseudo<InstrItinClass itin> 1542 : PseudoNLdSt<(outs GPR:$wb), 1543 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1544 "$addr.addr = $wb">; 1545 class VSTQQWBfixedPseudo<InstrItinClass itin> 1546 : PseudoNLdSt<(outs GPR:$wb), 1547 (ins addrmode6:$addr, QQPR:$src), itin, 1548 "$addr.addr = $wb">; 1549 class VSTQQWBregisterPseudo<InstrItinClass itin> 1550 : PseudoNLdSt<(outs GPR:$wb), 1551 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1552 "$addr.addr = $wb">; 1553 1554 class VSTQQQQPseudo<InstrItinClass itin> 1555 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1556 class VSTQQQQWBPseudo<InstrItinClass itin> 1557 : PseudoNLdSt<(outs GPR:$wb), 1558 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1559 "$addr.addr = $wb">; 1560 1561 // VST1 : Vector Store (multiple single elements) 1562 class VST1D<bits<4> op7_4, string Dt> 1563 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd), 1564 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { 1565 let Rm = 0b1111; 1566 let Inst{4} = Rn{4}; 1567 let DecoderMethod = "DecodeVLDST1Instruction"; 1568 } 1569 class VST1Q<bits<4> op7_4, string Dt> 1570 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), 1571 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { 1572 let Rm = 0b1111; 1573 let Inst{5-4} = Rn{5-4}; 1574 let DecoderMethod = "DecodeVLDST1Instruction"; 1575 } 1576 1577 def VST1d8 : VST1D<{0,0,0,?}, "8">; 1578 def VST1d16 : VST1D<{0,1,0,?}, "16">; 1579 def VST1d32 : VST1D<{1,0,0,?}, "32">; 1580 def VST1d64 : VST1D<{1,1,0,?}, "64">; 1581 1582 def VST1q8 : VST1Q<{0,0,?,?}, "8">; 1583 def VST1q16 : VST1Q<{0,1,?,?}, "16">; 1584 def VST1q32 : VST1Q<{1,0,?,?}, "32">; 1585 def VST1q64 : VST1Q<{1,1,?,?}, "64">; 1586 1587 // ...with address register writeback: 1588 multiclass VST1DWB<bits<4> op7_4, string Dt> { 1589 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1590 (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1591 "vst1", Dt, "$Vd, $Rn!", 1592 "$Rn.addr = $wb", []> { 1593 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1594 let Inst{4} = Rn{4}; 1595 let DecoderMethod = "DecodeVLDST1Instruction"; 1596 } 1597 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1598 (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1599 IIC_VLD1u, 1600 "vst1", Dt, "$Vd, $Rn, $Rm", 1601 "$Rn.addr = $wb", []> { 1602 let Inst{4} = Rn{4}; 1603 let DecoderMethod = "DecodeVLDST1Instruction"; 1604 } 1605 } 1606 multiclass VST1QWB<bits<4> op7_4, string Dt> { 1607 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1608 (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1609 "vst1", Dt, "$Vd, $Rn!", 1610 "$Rn.addr = $wb", []> { 1611 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1612 let Inst{5-4} = Rn{5-4}; 1613 let DecoderMethod = "DecodeVLDST1Instruction"; 1614 } 1615 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1616 (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1617 IIC_VLD1x2u, 1618 "vst1", Dt, "$Vd, $Rn, $Rm", 1619 "$Rn.addr = $wb", []> { 1620 let Inst{5-4} = Rn{5-4}; 1621 let DecoderMethod = "DecodeVLDST1Instruction"; 1622 } 1623 } 1624 1625 defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">; 1626 defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">; 1627 defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">; 1628 defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">; 1629 1630 defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">; 1631 defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; 1632 defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; 1633 defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; 1634 1635 // ...with 3 registers 1636 class VST1D3<bits<4> op7_4, string Dt> 1637 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1638 (ins addrmode6:$Rn, VecListThreeD:$Vd), 1639 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { 1640 let Rm = 0b1111; 1641 let Inst{4} = Rn{4}; 1642 let DecoderMethod = "DecodeVLDST1Instruction"; 1643 } 1644 multiclass VST1D3WB<bits<4> op7_4, string Dt> { 1645 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1646 (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1647 "vst1", Dt, "$Vd, $Rn!", 1648 "$Rn.addr = $wb", []> { 1649 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1650 let Inst{5-4} = Rn{5-4}; 1651 let DecoderMethod = "DecodeVLDST1Instruction"; 1652 } 1653 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1654 (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1655 IIC_VLD1x3u, 1656 "vst1", Dt, "$Vd, $Rn, $Rm", 1657 "$Rn.addr = $wb", []> { 1658 let Inst{5-4} = Rn{5-4}; 1659 let DecoderMethod = "DecodeVLDST1Instruction"; 1660 } 1661 } 1662 1663 def VST1d8T : VST1D3<{0,0,0,?}, "8">; 1664 def VST1d16T : VST1D3<{0,1,0,?}, "16">; 1665 def VST1d32T : VST1D3<{1,0,0,?}, "32">; 1666 def VST1d64T : VST1D3<{1,1,0,?}, "64">; 1667 1668 defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">; 1669 defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">; 1670 defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; 1671 defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; 1672 1673 def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; 1674 def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>; 1675 def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; 1676 1677 // ...with 4 registers 1678 class VST1D4<bits<4> op7_4, string Dt> 1679 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1680 (ins addrmode6:$Rn, VecListFourD:$Vd), 1681 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1682 []> { 1683 let Rm = 0b1111; 1684 let Inst{5-4} = Rn{5-4}; 1685 let DecoderMethod = "DecodeVLDST1Instruction"; 1686 } 1687 multiclass VST1D4WB<bits<4> op7_4, string Dt> { 1688 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1689 (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1690 "vst1", Dt, "$Vd, $Rn!", 1691 "$Rn.addr = $wb", []> { 1692 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1693 let Inst{5-4} = Rn{5-4}; 1694 let DecoderMethod = "DecodeVLDST1Instruction"; 1695 } 1696 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1697 (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1698 IIC_VLD1x4u, 1699 "vst1", Dt, "$Vd, $Rn, $Rm", 1700 "$Rn.addr = $wb", []> { 1701 let Inst{5-4} = Rn{5-4}; 1702 let DecoderMethod = "DecodeVLDST1Instruction"; 1703 } 1704 } 1705 1706 def VST1d8Q : VST1D4<{0,0,?,?}, "8">; 1707 def VST1d16Q : VST1D4<{0,1,?,?}, "16">; 1708 def VST1d32Q : VST1D4<{1,0,?,?}, "32">; 1709 def VST1d64Q : VST1D4<{1,1,?,?}, "64">; 1710 1711 defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">; 1712 defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">; 1713 defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; 1714 defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; 1715 1716 def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; 1717 def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>; 1718 def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; 1719 1720 // VST2 : Vector Store (multiple 2-element structures) 1721 class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1722 InstrItinClass itin> 1723 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd), 1724 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1725 let Rm = 0b1111; 1726 let Inst{5-4} = Rn{5-4}; 1727 let DecoderMethod = "DecodeVLDST2Instruction"; 1728 } 1729 1730 def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; 1731 def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>; 1732 def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>; 1733 1734 def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; 1735 def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; 1736 def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; 1737 1738 def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; 1739 def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; 1740 def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; 1741 1742 // ...with address register writeback: 1743 multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1744 RegisterOperand VdTy> { 1745 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1746 (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u, 1747 "vst2", Dt, "$Vd, $Rn!", 1748 "$Rn.addr = $wb", []> { 1749 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1750 let Inst{5-4} = Rn{5-4}; 1751 let DecoderMethod = "DecodeVLDST2Instruction"; 1752 } 1753 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1754 (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1755 "vst2", Dt, "$Vd, $Rn, $Rm", 1756 "$Rn.addr = $wb", []> { 1757 let Inst{5-4} = Rn{5-4}; 1758 let DecoderMethod = "DecodeVLDST2Instruction"; 1759 } 1760 } 1761 multiclass VST2QWB<bits<4> op7_4, string Dt> { 1762 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1763 (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1764 "vst2", Dt, "$Vd, $Rn!", 1765 "$Rn.addr = $wb", []> { 1766 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1767 let Inst{5-4} = Rn{5-4}; 1768 let DecoderMethod = "DecodeVLDST2Instruction"; 1769 } 1770 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1771 (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1772 IIC_VLD1u, 1773 "vst2", Dt, "$Vd, $Rn, $Rm", 1774 "$Rn.addr = $wb", []> { 1775 let Inst{5-4} = Rn{5-4}; 1776 let DecoderMethod = "DecodeVLDST2Instruction"; 1777 } 1778 } 1779 1780 defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>; 1781 defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>; 1782 defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>; 1783 1784 defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; 1785 defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; 1786 defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; 1787 1788 def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1789 def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1790 def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1791 def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1792 def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1793 def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1794 1795 // ...with double-spaced registers 1796 def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>; 1797 def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>; 1798 def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>; 1799 defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>; 1800 defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>; 1801 defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>; 1802 1803 // VST3 : Vector Store (multiple 3-element structures) 1804 class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1805 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1806 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1807 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { 1808 let Rm = 0b1111; 1809 let Inst{4} = Rn{4}; 1810 let DecoderMethod = "DecodeVLDST3Instruction"; 1811 } 1812 1813 def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1814 def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1815 def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1816 1817 def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>; 1818 def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>; 1819 def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>; 1820 1821 // ...with address register writeback: 1822 class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1823 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1824 (ins addrmode6:$Rn, am6offset:$Rm, 1825 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 1826 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1827 "$Rn.addr = $wb", []> { 1828 let Inst{4} = Rn{4}; 1829 let DecoderMethod = "DecodeVLDST3Instruction"; 1830 } 1831 1832 def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 1833 def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 1834 def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 1835 1836 def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1837 def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1838 def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1839 1840 // ...with double-spaced registers: 1841 def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 1842 def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 1843 def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 1844 def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 1845 def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 1846 def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 1847 1848 def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1849 def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1850 def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1851 1852 // ...alternate versions to be allocated odd register numbers: 1853 def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1854 def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1855 def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1856 1857 def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1858 def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1859 def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1860 1861 // VST4 : Vector Store (multiple 4-element structures) 1862 class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 1863 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1864 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 1865 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 1866 "", []> { 1867 let Rm = 0b1111; 1868 let Inst{5-4} = Rn{5-4}; 1869 let DecoderMethod = "DecodeVLDST4Instruction"; 1870 } 1871 1872 def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 1873 def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 1874 def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 1875 1876 def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>; 1877 def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>; 1878 def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>; 1879 1880 // ...with address register writeback: 1881 class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1882 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1883 (ins addrmode6:$Rn, am6offset:$Rm, 1884 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 1885 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 1886 "$Rn.addr = $wb", []> { 1887 let Inst{5-4} = Rn{5-4}; 1888 let DecoderMethod = "DecodeVLDST4Instruction"; 1889 } 1890 1891 def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 1892 def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 1893 def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 1894 1895 def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1896 def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1897 def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1898 1899 // ...with double-spaced registers: 1900 def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 1901 def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 1902 def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 1903 def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 1904 def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 1905 def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 1906 1907 def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1908 def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1909 def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1910 1911 // ...alternate versions to be allocated odd register numbers: 1912 def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1913 def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1914 def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1915 1916 def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1917 def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1918 def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1919 1920 } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 1921 1922 // Classes for VST*LN pseudo-instructions with multi-register operands. 1923 // These are expanded to real instructions after register allocation. 1924 class VSTQLNPseudo<InstrItinClass itin> 1925 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1926 itin, "">; 1927 class VSTQLNWBPseudo<InstrItinClass itin> 1928 : PseudoNLdSt<(outs GPR:$wb), 1929 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1930 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1931 class VSTQQLNPseudo<InstrItinClass itin> 1932 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1933 itin, "">; 1934 class VSTQQLNWBPseudo<InstrItinClass itin> 1935 : PseudoNLdSt<(outs GPR:$wb), 1936 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1937 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1938 class VSTQQQQLNPseudo<InstrItinClass itin> 1939 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1940 itin, "">; 1941 class VSTQQQQLNWBPseudo<InstrItinClass itin> 1942 : PseudoNLdSt<(outs GPR:$wb), 1943 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1944 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1945 1946 // VST1LN : Vector Store (single element from one lane) 1947 class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1948 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 1949 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1950 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 1951 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 1952 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { 1953 let Rm = 0b1111; 1954 let DecoderMethod = "DecodeVST1LN"; 1955 } 1956 class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 1957 : VSTQLNPseudo<IIC_VST1ln> { 1958 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 1959 addrmode6:$addr)]; 1960 } 1961 1962 def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 1963 NEONvgetlaneu, addrmode6> { 1964 let Inst{7-5} = lane{2-0}; 1965 } 1966 def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 1967 NEONvgetlaneu, addrmode6> { 1968 let Inst{7-6} = lane{1-0}; 1969 let Inst{4} = Rn{4}; 1970 } 1971 1972 def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 1973 addrmode6oneL32> { 1974 let Inst{7} = lane{0}; 1975 let Inst{5-4} = Rn{5-4}; 1976 } 1977 1978 def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>; 1979 def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>; 1980 def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 1981 1982 def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 1983 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1984 def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 1985 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1986 1987 // ...with address register writeback: 1988 class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1989 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 1990 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1991 (ins AdrMode:$Rn, am6offset:$Rm, 1992 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 1993 "\\{$Vd[$lane]\\}, $Rn$Rm", 1994 "$Rn.addr = $wb", 1995 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 1996 AdrMode:$Rn, am6offset:$Rm))]> { 1997 let DecoderMethod = "DecodeVST1LN"; 1998 } 1999 class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2000 : VSTQLNWBPseudo<IIC_VST1lnu> { 2001 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2002 addrmode6:$addr, am6offset:$offset))]; 2003 } 2004 2005 def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2006 NEONvgetlaneu, addrmode6> { 2007 let Inst{7-5} = lane{2-0}; 2008 } 2009 def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2010 NEONvgetlaneu, addrmode6> { 2011 let Inst{7-6} = lane{1-0}; 2012 let Inst{4} = Rn{4}; 2013 } 2014 def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2015 extractelt, addrmode6oneL32> { 2016 let Inst{7} = lane{0}; 2017 let Inst{5-4} = Rn{5-4}; 2018 } 2019 2020 def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; 2021 def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; 2022 def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2023 2024 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 2025 2026 // VST2LN : Vector Store (single 2-element structure from one lane) 2027 class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2028 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2029 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2030 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2031 "", []> { 2032 let Rm = 0b1111; 2033 let Inst{4} = Rn{4}; 2034 let DecoderMethod = "DecodeVST2LN"; 2035 } 2036 2037 def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2038 let Inst{7-5} = lane{2-0}; 2039 } 2040 def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2041 let Inst{7-6} = lane{1-0}; 2042 } 2043 def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2044 let Inst{7} = lane{0}; 2045 } 2046 2047 def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2048 def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2049 def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2050 2051 // ...with double-spaced registers: 2052 def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2053 let Inst{7-6} = lane{1-0}; 2054 let Inst{4} = Rn{4}; 2055 } 2056 def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2057 let Inst{7} = lane{0}; 2058 let Inst{4} = Rn{4}; 2059 } 2060 2061 def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2062 def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2063 2064 // ...with address register writeback: 2065 class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2066 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2067 (ins addrmode6:$Rn, am6offset:$Rm, 2068 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2069 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2070 "$Rn.addr = $wb", []> { 2071 let Inst{4} = Rn{4}; 2072 let DecoderMethod = "DecodeVST2LN"; 2073 } 2074 2075 def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2076 let Inst{7-5} = lane{2-0}; 2077 } 2078 def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2079 let Inst{7-6} = lane{1-0}; 2080 } 2081 def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2082 let Inst{7} = lane{0}; 2083 } 2084 2085 def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2086 def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2087 def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2088 2089 def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2090 let Inst{7-6} = lane{1-0}; 2091 } 2092 def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2093 let Inst{7} = lane{0}; 2094 } 2095 2096 def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2097 def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2098 2099 // VST3LN : Vector Store (single 3-element structure from one lane) 2100 class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2101 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2102 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2103 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2104 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { 2105 let Rm = 0b1111; 2106 let DecoderMethod = "DecodeVST3LN"; 2107 } 2108 2109 def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2110 let Inst{7-5} = lane{2-0}; 2111 } 2112 def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2113 let Inst{7-6} = lane{1-0}; 2114 } 2115 def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2116 let Inst{7} = lane{0}; 2117 } 2118 2119 def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2120 def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2121 def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2122 2123 // ...with double-spaced registers: 2124 def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2125 let Inst{7-6} = lane{1-0}; 2126 } 2127 def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2128 let Inst{7} = lane{0}; 2129 } 2130 2131 def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2132 def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2133 2134 // ...with address register writeback: 2135 class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2136 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2137 (ins addrmode6:$Rn, am6offset:$Rm, 2138 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2139 IIC_VST3lnu, "vst3", Dt, 2140 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2141 "$Rn.addr = $wb", []> { 2142 let DecoderMethod = "DecodeVST3LN"; 2143 } 2144 2145 def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2146 let Inst{7-5} = lane{2-0}; 2147 } 2148 def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2149 let Inst{7-6} = lane{1-0}; 2150 } 2151 def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2152 let Inst{7} = lane{0}; 2153 } 2154 2155 def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2156 def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2157 def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2158 2159 def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2160 let Inst{7-6} = lane{1-0}; 2161 } 2162 def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2163 let Inst{7} = lane{0}; 2164 } 2165 2166 def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2167 def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2168 2169 // VST4LN : Vector Store (single 4-element structure from one lane) 2170 class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2171 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2172 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2173 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2174 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2175 "", []> { 2176 let Rm = 0b1111; 2177 let Inst{4} = Rn{4}; 2178 let DecoderMethod = "DecodeVST4LN"; 2179 } 2180 2181 def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2182 let Inst{7-5} = lane{2-0}; 2183 } 2184 def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2185 let Inst{7-6} = lane{1-0}; 2186 } 2187 def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2188 let Inst{7} = lane{0}; 2189 let Inst{5} = Rn{5}; 2190 } 2191 2192 def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2193 def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2194 def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2195 2196 // ...with double-spaced registers: 2197 def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2198 let Inst{7-6} = lane{1-0}; 2199 } 2200 def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2201 let Inst{7} = lane{0}; 2202 let Inst{5} = Rn{5}; 2203 } 2204 2205 def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2206 def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2207 2208 // ...with address register writeback: 2209 class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2210 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2211 (ins addrmode6:$Rn, am6offset:$Rm, 2212 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2213 IIC_VST4lnu, "vst4", Dt, 2214 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2215 "$Rn.addr = $wb", []> { 2216 let Inst{4} = Rn{4}; 2217 let DecoderMethod = "DecodeVST4LN"; 2218 } 2219 2220 def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2221 let Inst{7-5} = lane{2-0}; 2222 } 2223 def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2224 let Inst{7-6} = lane{1-0}; 2225 } 2226 def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2227 let Inst{7} = lane{0}; 2228 let Inst{5} = Rn{5}; 2229 } 2230 2231 def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2232 def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2233 def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2234 2235 def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2236 let Inst{7-6} = lane{1-0}; 2237 } 2238 def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2239 let Inst{7} = lane{0}; 2240 let Inst{5} = Rn{5}; 2241 } 2242 2243 def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2244 def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2245 2246 } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 2247 2248 // Use vld1/vst1 for unaligned f64 load / store 2249 def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2250 (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; 2251 def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2252 (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2253 def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2254 (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; 2255 def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2256 (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2257 def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2258 (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; 2259 def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2260 (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; 2261 2262 // Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2263 // load / store if it's legal. 2264 def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2265 (VLD1q64 addrmode6:$addr)>; 2266 def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2267 (VST1q64 addrmode6:$addr, QPR:$value)>; 2268 def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2269 (VLD1q32 addrmode6:$addr)>; 2270 def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2271 (VST1q32 addrmode6:$addr, QPR:$value)>; 2272 def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2273 (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; 2274 def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2275 (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2276 def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2277 (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>; 2278 def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2279 (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2280 2281 //===----------------------------------------------------------------------===// 2282 // NEON pattern fragments 2283 //===----------------------------------------------------------------------===// 2284 2285 // Extract D sub-registers of Q registers. 2286 def DSubReg_i8_reg : SDNodeXForm<imm, [{ 2287 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2288 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); 2289 }]>; 2290 def DSubReg_i16_reg : SDNodeXForm<imm, [{ 2291 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2292 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); 2293 }]>; 2294 def DSubReg_i32_reg : SDNodeXForm<imm, [{ 2295 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2296 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); 2297 }]>; 2298 def DSubReg_f64_reg : SDNodeXForm<imm, [{ 2299 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2300 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); 2301 }]>; 2302 2303 // Extract S sub-registers of Q/D registers. 2304 def SSubReg_f32_reg : SDNodeXForm<imm, [{ 2305 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 2306 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); 2307 }]>; 2308 2309 // Translate lane numbers from Q registers to D subregs. 2310 def SubReg_i8_lane : SDNodeXForm<imm, [{ 2311 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 2312 }]>; 2313 def SubReg_i16_lane : SDNodeXForm<imm, [{ 2314 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 2315 }]>; 2316 def SubReg_i32_lane : SDNodeXForm<imm, [{ 2317 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 2318 }]>; 2319 2320 //===----------------------------------------------------------------------===// 2321 // Instruction Classes 2322 //===----------------------------------------------------------------------===// 2323 2324 // Basic 2-register operations: double- and quad-register. 2325 class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2326 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2327 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2328 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2329 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2330 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2331 class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2332 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2333 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2334 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2335 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2336 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2337 2338 // Basic 2-register intrinsics, both double- and quad-register. 2339 class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2340 bits<2> op17_16, bits<5> op11_7, bit op4, 2341 InstrItinClass itin, string OpcodeStr, string Dt, 2342 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2343 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2344 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2345 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2346 class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2347 bits<2> op17_16, bits<5> op11_7, bit op4, 2348 InstrItinClass itin, string OpcodeStr, string Dt, 2349 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2350 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2351 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2352 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2353 2354 // Same as above, but not predicated. 2355 class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, 2356 InstrItinClass itin, string OpcodeStr, string Dt, 2357 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2358 : N2Vnp<op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), 2359 itin, OpcodeStr, Dt, ResTy, OpTy, 2360 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2361 2362 class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, 2363 InstrItinClass itin, string OpcodeStr, string Dt, 2364 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2365 : N2Vnp<op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), 2366 itin, OpcodeStr, Dt, ResTy, OpTy, 2367 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2368 2369 // Narrow 2-register operations. 2370 class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2371 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2372 InstrItinClass itin, string OpcodeStr, string Dt, 2373 ValueType TyD, ValueType TyQ, SDNode OpNode> 2374 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2375 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2376 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2377 2378 // Narrow 2-register intrinsics. 2379 class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2380 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2381 InstrItinClass itin, string OpcodeStr, string Dt, 2382 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2383 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2384 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2385 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2386 2387 // Long 2-register operations (currently only used for VMOVL). 2388 class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2389 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2390 InstrItinClass itin, string OpcodeStr, string Dt, 2391 ValueType TyQ, ValueType TyD, SDNode OpNode> 2392 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2393 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2394 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2395 2396 // Long 2-register intrinsics. 2397 class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2398 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2399 InstrItinClass itin, string OpcodeStr, string Dt, 2400 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2401 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2402 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2403 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2404 2405 // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2406 class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2407 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2408 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2409 OpcodeStr, Dt, "$Vd, $Vm", 2410 "$src1 = $Vd, $src2 = $Vm", []>; 2411 class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2412 InstrItinClass itin, string OpcodeStr, string Dt> 2413 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2414 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2415 "$src1 = $Vd, $src2 = $Vm", []>; 2416 2417 // Basic 3-register operations: double- and quad-register. 2418 class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2419 InstrItinClass itin, string OpcodeStr, string Dt, 2420 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2421 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2422 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2423 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2424 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2425 // All of these have a two-operand InstAlias. 2426 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2427 let isCommutable = Commutable; 2428 } 2429 // Same as N3VD but no data type. 2430 class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2431 InstrItinClass itin, string OpcodeStr, 2432 ValueType ResTy, ValueType OpTy, 2433 SDNode OpNode, bit Commutable> 2434 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2435 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2436 OpcodeStr, "$Vd, $Vn, $Vm", "", 2437 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2438 // All of these have a two-operand InstAlias. 2439 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2440 let isCommutable = Commutable; 2441 } 2442 2443 class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2444 InstrItinClass itin, string OpcodeStr, string Dt, 2445 ValueType Ty, SDNode ShOp> 2446 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2447 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2448 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2449 [(set (Ty DPR:$Vd), 2450 (Ty (ShOp (Ty DPR:$Vn), 2451 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2452 // All of these have a two-operand InstAlias. 2453 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2454 let isCommutable = 0; 2455 } 2456 class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2457 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2458 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2459 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2460 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2461 [(set (Ty DPR:$Vd), 2462 (Ty (ShOp (Ty DPR:$Vn), 2463 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2464 // All of these have a two-operand InstAlias. 2465 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2466 let isCommutable = 0; 2467 } 2468 2469 class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2470 InstrItinClass itin, string OpcodeStr, string Dt, 2471 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2472 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2473 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2474 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2475 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2476 // All of these have a two-operand InstAlias. 2477 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2478 let isCommutable = Commutable; 2479 } 2480 class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2481 InstrItinClass itin, string OpcodeStr, 2482 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2483 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2484 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2485 OpcodeStr, "$Vd, $Vn, $Vm", "", 2486 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2487 // All of these have a two-operand InstAlias. 2488 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2489 let isCommutable = Commutable; 2490 } 2491 class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2492 InstrItinClass itin, string OpcodeStr, string Dt, 2493 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2494 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2495 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2496 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2497 [(set (ResTy QPR:$Vd), 2498 (ResTy (ShOp (ResTy QPR:$Vn), 2499 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2500 imm:$lane)))))]> { 2501 // All of these have a two-operand InstAlias. 2502 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2503 let isCommutable = 0; 2504 } 2505 class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2506 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2507 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2508 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2509 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2510 [(set (ResTy QPR:$Vd), 2511 (ResTy (ShOp (ResTy QPR:$Vn), 2512 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2513 imm:$lane)))))]> { 2514 // All of these have a two-operand InstAlias. 2515 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2516 let isCommutable = 0; 2517 } 2518 2519 // Basic 3-register intrinsics, both double- and quad-register. 2520 class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2521 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2522 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2523 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2524 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2525 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2526 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2527 // All of these have a two-operand InstAlias. 2528 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2529 let isCommutable = Commutable; 2530 } 2531 2532 class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2533 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2534 string Dt, ValueType ResTy, ValueType OpTy, 2535 SDPatternOperator IntOp, bit Commutable> 2536 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2537 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, OpcodeStr, Dt, 2538 ResTy, OpTy, IntOp, Commutable, 2539 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2540 2541 class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2542 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2543 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2544 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2545 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2546 [(set (Ty DPR:$Vd), 2547 (Ty (IntOp (Ty DPR:$Vn), 2548 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2549 imm:$lane)))))]> { 2550 let isCommutable = 0; 2551 } 2552 2553 class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2554 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2555 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2556 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2557 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2558 [(set (Ty DPR:$Vd), 2559 (Ty (IntOp (Ty DPR:$Vn), 2560 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2561 let isCommutable = 0; 2562 } 2563 class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2564 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2565 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2566 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2567 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2568 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2569 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2570 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2571 let isCommutable = 0; 2572 } 2573 2574 class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2575 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2576 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2577 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2578 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2579 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2580 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2581 // All of these have a two-operand InstAlias. 2582 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2583 let isCommutable = Commutable; 2584 } 2585 2586 class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2587 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2588 string Dt, ValueType ResTy, ValueType OpTy, 2589 SDPatternOperator IntOp, bit Commutable> 2590 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2591 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, 2592 ResTy, OpTy, IntOp, Commutable, 2593 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2594 2595 class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2596 string OpcodeStr, string Dt, 2597 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2598 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2599 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2600 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2601 [(set (ResTy QPR:$Vd), 2602 (ResTy (IntOp (ResTy QPR:$Vn), 2603 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2604 imm:$lane)))))]> { 2605 let isCommutable = 0; 2606 } 2607 class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2608 string OpcodeStr, string Dt, 2609 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2610 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2611 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2612 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2613 [(set (ResTy QPR:$Vd), 2614 (ResTy (IntOp (ResTy QPR:$Vn), 2615 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2616 imm:$lane)))))]> { 2617 let isCommutable = 0; 2618 } 2619 class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2620 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2621 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2622 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2623 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2624 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2625 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2626 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2627 let isCommutable = 0; 2628 } 2629 2630 // Multiply-Add/Sub operations: double- and quad-register. 2631 class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2632 InstrItinClass itin, string OpcodeStr, string Dt, 2633 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2634 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2635 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2636 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2637 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2638 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2639 2640 class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2641 string OpcodeStr, string Dt, 2642 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2643 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2644 (outs DPR:$Vd), 2645 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2646 NVMulSLFrm, itin, 2647 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2648 [(set (Ty DPR:$Vd), 2649 (Ty (ShOp (Ty DPR:$src1), 2650 (Ty (MulOp DPR:$Vn, 2651 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2652 imm:$lane)))))))]>; 2653 class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2654 string OpcodeStr, string Dt, 2655 ValueType Ty, SDNode MulOp, SDNode ShOp> 2656 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2657 (outs DPR:$Vd), 2658 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2659 NVMulSLFrm, itin, 2660 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2661 [(set (Ty DPR:$Vd), 2662 (Ty (ShOp (Ty DPR:$src1), 2663 (Ty (MulOp DPR:$Vn, 2664 (Ty (NEONvduplane (Ty DPR_8:$Vm), 2665 imm:$lane)))))))]>; 2666 2667 class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2668 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2669 SDPatternOperator MulOp, SDPatternOperator OpNode> 2670 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2671 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2672 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2673 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2674 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2675 class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2676 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2677 SDPatternOperator MulOp, SDPatternOperator ShOp> 2678 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2679 (outs QPR:$Vd), 2680 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2681 NVMulSLFrm, itin, 2682 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2683 [(set (ResTy QPR:$Vd), 2684 (ResTy (ShOp (ResTy QPR:$src1), 2685 (ResTy (MulOp QPR:$Vn, 2686 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2687 imm:$lane)))))))]>; 2688 class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2689 string OpcodeStr, string Dt, 2690 ValueType ResTy, ValueType OpTy, 2691 SDNode MulOp, SDNode ShOp> 2692 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2693 (outs QPR:$Vd), 2694 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2695 NVMulSLFrm, itin, 2696 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2697 [(set (ResTy QPR:$Vd), 2698 (ResTy (ShOp (ResTy QPR:$src1), 2699 (ResTy (MulOp QPR:$Vn, 2700 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2701 imm:$lane)))))))]>; 2702 2703 // Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2704 class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2705 InstrItinClass itin, string OpcodeStr, string Dt, 2706 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2707 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2708 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2709 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2710 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2711 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2712 class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2713 InstrItinClass itin, string OpcodeStr, string Dt, 2714 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2715 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2716 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2717 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2718 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2719 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2720 2721 // Neon 3-argument intrinsics, both double- and quad-register. 2722 // The destination register is also used as the first source operand register. 2723 class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2724 InstrItinClass itin, string OpcodeStr, string Dt, 2725 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2726 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2727 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2728 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2729 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2730 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2731 class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2732 InstrItinClass itin, string OpcodeStr, string Dt, 2733 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2734 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2735 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2736 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2737 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2738 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2739 2740 // Long Multiply-Add/Sub operations. 2741 class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2742 InstrItinClass itin, string OpcodeStr, string Dt, 2743 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2744 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2745 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2746 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2747 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2748 (TyQ (MulOp (TyD DPR:$Vn), 2749 (TyD DPR:$Vm)))))]>; 2750 class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2751 InstrItinClass itin, string OpcodeStr, string Dt, 2752 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2753 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2754 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2755 NVMulSLFrm, itin, 2756 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2757 [(set QPR:$Vd, 2758 (OpNode (TyQ QPR:$src1), 2759 (TyQ (MulOp (TyD DPR:$Vn), 2760 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm), 2761 imm:$lane))))))]>; 2762 class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2763 InstrItinClass itin, string OpcodeStr, string Dt, 2764 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2765 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2766 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2767 NVMulSLFrm, itin, 2768 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2769 [(set QPR:$Vd, 2770 (OpNode (TyQ QPR:$src1), 2771 (TyQ (MulOp (TyD DPR:$Vn), 2772 (TyD (NEONvduplane (TyD DPR_8:$Vm), 2773 imm:$lane))))))]>; 2774 2775 // Long Intrinsic-Op vector operations with explicit extend (VABAL). 2776 class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2777 InstrItinClass itin, string OpcodeStr, string Dt, 2778 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2779 SDNode OpNode> 2780 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2781 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2782 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2783 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2784 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2785 (TyD DPR:$Vm)))))))]>; 2786 2787 // Neon Long 3-argument intrinsic. The destination register is 2788 // a quad-register and is also used as the first source operand register. 2789 class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2790 InstrItinClass itin, string OpcodeStr, string Dt, 2791 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2792 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2793 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2794 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2795 [(set QPR:$Vd, 2796 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 2797 class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2798 string OpcodeStr, string Dt, 2799 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2800 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2801 (outs QPR:$Vd), 2802 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2803 NVMulSLFrm, itin, 2804 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2805 [(set (ResTy QPR:$Vd), 2806 (ResTy (IntOp (ResTy QPR:$src1), 2807 (OpTy DPR:$Vn), 2808 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2809 imm:$lane)))))]>; 2810 class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2811 InstrItinClass itin, string OpcodeStr, string Dt, 2812 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2813 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2814 (outs QPR:$Vd), 2815 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2816 NVMulSLFrm, itin, 2817 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2818 [(set (ResTy QPR:$Vd), 2819 (ResTy (IntOp (ResTy QPR:$src1), 2820 (OpTy DPR:$Vn), 2821 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2822 imm:$lane)))))]>; 2823 2824 // Narrowing 3-register intrinsics. 2825 class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2826 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 2827 SDPatternOperator IntOp, bit Commutable> 2828 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2829 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 2830 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2831 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 2832 let isCommutable = Commutable; 2833 } 2834 2835 // Long 3-register operations. 2836 class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2837 InstrItinClass itin, string OpcodeStr, string Dt, 2838 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 2839 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2840 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2841 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2842 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2843 let isCommutable = Commutable; 2844 } 2845 class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2846 InstrItinClass itin, string OpcodeStr, string Dt, 2847 ValueType TyQ, ValueType TyD, SDNode OpNode> 2848 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2849 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2850 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2851 [(set QPR:$Vd, 2852 (TyQ (OpNode (TyD DPR:$Vn), 2853 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 2854 class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2855 InstrItinClass itin, string OpcodeStr, string Dt, 2856 ValueType TyQ, ValueType TyD, SDNode OpNode> 2857 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2858 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2859 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2860 [(set QPR:$Vd, 2861 (TyQ (OpNode (TyD DPR:$Vn), 2862 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 2863 2864 // Long 3-register operations with explicitly extended operands. 2865 class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2866 InstrItinClass itin, string OpcodeStr, string Dt, 2867 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 2868 bit Commutable> 2869 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2870 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2871 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2872 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 2873 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2874 let isCommutable = Commutable; 2875 } 2876 2877 // Long 3-register intrinsics with explicit extend (VABDL). 2878 class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2879 InstrItinClass itin, string OpcodeStr, string Dt, 2880 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2881 bit Commutable> 2882 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2883 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2884 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2885 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2886 (TyD DPR:$Vm))))))]> { 2887 let isCommutable = Commutable; 2888 } 2889 2890 // Long 3-register intrinsics. 2891 class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2892 InstrItinClass itin, string OpcodeStr, string Dt, 2893 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 2894 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2895 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2896 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2897 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2898 let isCommutable = Commutable; 2899 } 2900 class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2901 string OpcodeStr, string Dt, 2902 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2903 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2904 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2905 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2906 [(set (ResTy QPR:$Vd), 2907 (ResTy (IntOp (OpTy DPR:$Vn), 2908 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2909 imm:$lane)))))]>; 2910 class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2911 InstrItinClass itin, string OpcodeStr, string Dt, 2912 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2913 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2914 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2915 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2916 [(set (ResTy QPR:$Vd), 2917 (ResTy (IntOp (OpTy DPR:$Vn), 2918 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2919 imm:$lane)))))]>; 2920 2921 // Wide 3-register operations. 2922 class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2923 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 2924 SDNode OpNode, SDNode ExtOp, bit Commutable> 2925 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2926 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 2927 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2928 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 2929 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2930 // All of these have a two-operand InstAlias. 2931 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2932 let isCommutable = Commutable; 2933 } 2934 2935 // Pairwise long 2-register intrinsics, both double- and quad-register. 2936 class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2937 bits<2> op17_16, bits<5> op11_7, bit op4, 2938 string OpcodeStr, string Dt, 2939 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2940 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2941 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2942 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2943 class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2944 bits<2> op17_16, bits<5> op11_7, bit op4, 2945 string OpcodeStr, string Dt, 2946 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2947 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2948 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2949 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2950 2951 // Pairwise long 2-register accumulate intrinsics, 2952 // both double- and quad-register. 2953 // The destination register is also used as the first source operand register. 2954 class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2955 bits<2> op17_16, bits<5> op11_7, bit op4, 2956 string OpcodeStr, string Dt, 2957 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2958 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 2959 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 2960 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 2961 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 2962 class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2963 bits<2> op17_16, bits<5> op11_7, bit op4, 2964 string OpcodeStr, string Dt, 2965 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2966 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 2967 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 2968 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 2969 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 2970 2971 // Shift by immediate, 2972 // both double- and quad-register. 2973 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 2974 class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2975 Format f, InstrItinClass itin, Operand ImmTy, 2976 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 2977 : N2VImm<op24, op23, op11_8, op7, 0, op4, 2978 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 2979 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2980 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 2981 class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2982 Format f, InstrItinClass itin, Operand ImmTy, 2983 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 2984 : N2VImm<op24, op23, op11_8, op7, 1, op4, 2985 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 2986 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2987 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 2988 } 2989 2990 // Long shift by immediate. 2991 class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 2992 string OpcodeStr, string Dt, 2993 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> 2994 : N2VImm<op24, op23, op11_8, op7, op6, op4, 2995 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 2996 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2997 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), 2998 (i32 imm:$SIMM))))]>; 2999 3000 // Narrow shift by immediate. 3001 class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3002 InstrItinClass itin, string OpcodeStr, string Dt, 3003 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> 3004 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3005 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 3006 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3007 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3008 (i32 imm:$SIMM))))]>; 3009 3010 // Shift right by immediate and accumulate, 3011 // both double- and quad-register. 3012 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3013 class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3014 Operand ImmTy, string OpcodeStr, string Dt, 3015 ValueType Ty, SDNode ShOp> 3016 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3017 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3018 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3019 [(set DPR:$Vd, (Ty (add DPR:$src1, 3020 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3021 class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3022 Operand ImmTy, string OpcodeStr, string Dt, 3023 ValueType Ty, SDNode ShOp> 3024 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3025 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3026 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3027 [(set QPR:$Vd, (Ty (add QPR:$src1, 3028 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3029 } 3030 3031 // Shift by immediate and insert, 3032 // both double- and quad-register. 3033 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3034 class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3035 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3036 ValueType Ty,SDNode ShOp> 3037 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3038 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3039 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3040 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3041 class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3042 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3043 ValueType Ty,SDNode ShOp> 3044 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3045 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3046 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3047 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3048 } 3049 3050 // Convert, with fractional bits immediate, 3051 // both double- and quad-register. 3052 class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3053 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3054 SDPatternOperator IntOp> 3055 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3056 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3057 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3058 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3059 class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3060 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3061 SDPatternOperator IntOp> 3062 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3063 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3064 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3065 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3066 3067 //===----------------------------------------------------------------------===// 3068 // Multiclasses 3069 //===----------------------------------------------------------------------===// 3070 3071 // Abbreviations used in multiclass suffixes: 3072 // Q = quarter int (8 bit) elements 3073 // H = half int (16 bit) elements 3074 // S = single int (32 bit) elements 3075 // D = double int (64 bit) elements 3076 3077 // Neon 2-register vector operations and intrinsics. 3078 3079 // Neon 2-register comparisons. 3080 // source operand element sizes of 8, 16 and 32 bits: 3081 multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3082 bits<5> op11_7, bit op4, string opc, string Dt, 3083 string asm, SDNode OpNode> { 3084 // 64-bit vector types. 3085 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3086 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3087 opc, !strconcat(Dt, "8"), asm, "", 3088 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; 3089 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3090 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3091 opc, !strconcat(Dt, "16"), asm, "", 3092 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; 3093 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3094 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3095 opc, !strconcat(Dt, "32"), asm, "", 3096 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; 3097 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3098 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3099 opc, "f32", asm, "", 3100 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { 3101 let Inst{10} = 1; // overwrite F = 1 3102 } 3103 3104 // 128-bit vector types. 3105 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3106 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3107 opc, !strconcat(Dt, "8"), asm, "", 3108 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; 3109 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3110 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3111 opc, !strconcat(Dt, "16"), asm, "", 3112 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; 3113 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3114 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3115 opc, !strconcat(Dt, "32"), asm, "", 3116 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; 3117 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3118 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3119 opc, "f32", asm, "", 3120 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { 3121 let Inst{10} = 1; // overwrite F = 1 3122 } 3123 } 3124 3125 3126 // Neon 2-register vector intrinsics, 3127 // element sizes of 8, 16 and 32 bits: 3128 multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3129 bits<5> op11_7, bit op4, 3130 InstrItinClass itinD, InstrItinClass itinQ, 3131 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3132 // 64-bit vector types. 3133 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3134 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3135 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3136 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3137 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3138 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3139 3140 // 128-bit vector types. 3141 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3142 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3143 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3144 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3145 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3146 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3147 } 3148 3149 3150 // Neon Narrowing 2-register vector operations, 3151 // source operand element sizes of 16, 32 and 64 bits: 3152 multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3153 bits<5> op11_7, bit op6, bit op4, 3154 InstrItinClass itin, string OpcodeStr, string Dt, 3155 SDNode OpNode> { 3156 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3157 itin, OpcodeStr, !strconcat(Dt, "16"), 3158 v8i8, v8i16, OpNode>; 3159 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3160 itin, OpcodeStr, !strconcat(Dt, "32"), 3161 v4i16, v4i32, OpNode>; 3162 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3163 itin, OpcodeStr, !strconcat(Dt, "64"), 3164 v2i32, v2i64, OpNode>; 3165 } 3166 3167 // Neon Narrowing 2-register vector intrinsics, 3168 // source operand element sizes of 16, 32 and 64 bits: 3169 multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3170 bits<5> op11_7, bit op6, bit op4, 3171 InstrItinClass itin, string OpcodeStr, string Dt, 3172 SDPatternOperator IntOp> { 3173 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3174 itin, OpcodeStr, !strconcat(Dt, "16"), 3175 v8i8, v8i16, IntOp>; 3176 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3177 itin, OpcodeStr, !strconcat(Dt, "32"), 3178 v4i16, v4i32, IntOp>; 3179 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3180 itin, OpcodeStr, !strconcat(Dt, "64"), 3181 v2i32, v2i64, IntOp>; 3182 } 3183 3184 3185 // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3186 // source operand element sizes of 16, 32 and 64 bits: 3187 multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3188 string OpcodeStr, string Dt, SDNode OpNode> { 3189 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3190 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3191 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3192 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3193 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3194 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3195 } 3196 3197 3198 // Neon 3-register vector operations. 3199 3200 // First with only element sizes of 8, 16 and 32 bits: 3201 multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3202 InstrItinClass itinD16, InstrItinClass itinD32, 3203 InstrItinClass itinQ16, InstrItinClass itinQ32, 3204 string OpcodeStr, string Dt, 3205 SDNode OpNode, bit Commutable = 0> { 3206 // 64-bit vector types. 3207 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3208 OpcodeStr, !strconcat(Dt, "8"), 3209 v8i8, v8i8, OpNode, Commutable>; 3210 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3211 OpcodeStr, !strconcat(Dt, "16"), 3212 v4i16, v4i16, OpNode, Commutable>; 3213 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3214 OpcodeStr, !strconcat(Dt, "32"), 3215 v2i32, v2i32, OpNode, Commutable>; 3216 3217 // 128-bit vector types. 3218 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3219 OpcodeStr, !strconcat(Dt, "8"), 3220 v16i8, v16i8, OpNode, Commutable>; 3221 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3222 OpcodeStr, !strconcat(Dt, "16"), 3223 v8i16, v8i16, OpNode, Commutable>; 3224 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3225 OpcodeStr, !strconcat(Dt, "32"), 3226 v4i32, v4i32, OpNode, Commutable>; 3227 } 3228 3229 multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3230 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3231 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3232 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3233 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3234 v4i32, v2i32, ShOp>; 3235 } 3236 3237 // ....then also with element size 64 bits: 3238 multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3239 InstrItinClass itinD, InstrItinClass itinQ, 3240 string OpcodeStr, string Dt, 3241 SDNode OpNode, bit Commutable = 0> 3242 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3243 OpcodeStr, Dt, OpNode, Commutable> { 3244 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3245 OpcodeStr, !strconcat(Dt, "64"), 3246 v1i64, v1i64, OpNode, Commutable>; 3247 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3248 OpcodeStr, !strconcat(Dt, "64"), 3249 v2i64, v2i64, OpNode, Commutable>; 3250 } 3251 3252 3253 // Neon 3-register vector intrinsics. 3254 3255 // First with only element sizes of 16 and 32 bits: 3256 multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3257 InstrItinClass itinD16, InstrItinClass itinD32, 3258 InstrItinClass itinQ16, InstrItinClass itinQ32, 3259 string OpcodeStr, string Dt, 3260 SDPatternOperator IntOp, bit Commutable = 0> { 3261 // 64-bit vector types. 3262 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3263 OpcodeStr, !strconcat(Dt, "16"), 3264 v4i16, v4i16, IntOp, Commutable>; 3265 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3266 OpcodeStr, !strconcat(Dt, "32"), 3267 v2i32, v2i32, IntOp, Commutable>; 3268 3269 // 128-bit vector types. 3270 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3271 OpcodeStr, !strconcat(Dt, "16"), 3272 v8i16, v8i16, IntOp, Commutable>; 3273 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3274 OpcodeStr, !strconcat(Dt, "32"), 3275 v4i32, v4i32, IntOp, Commutable>; 3276 } 3277 multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3278 InstrItinClass itinD16, InstrItinClass itinD32, 3279 InstrItinClass itinQ16, InstrItinClass itinQ32, 3280 string OpcodeStr, string Dt, 3281 SDPatternOperator IntOp> { 3282 // 64-bit vector types. 3283 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3284 OpcodeStr, !strconcat(Dt, "16"), 3285 v4i16, v4i16, IntOp>; 3286 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3287 OpcodeStr, !strconcat(Dt, "32"), 3288 v2i32, v2i32, IntOp>; 3289 3290 // 128-bit vector types. 3291 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3292 OpcodeStr, !strconcat(Dt, "16"), 3293 v8i16, v8i16, IntOp>; 3294 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3295 OpcodeStr, !strconcat(Dt, "32"), 3296 v4i32, v4i32, IntOp>; 3297 } 3298 3299 multiclass N3VIntSL_HS<bits<4> op11_8, 3300 InstrItinClass itinD16, InstrItinClass itinD32, 3301 InstrItinClass itinQ16, InstrItinClass itinQ32, 3302 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3303 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3304 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3305 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3306 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3307 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3308 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3309 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3310 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3311 } 3312 3313 // ....then also with element size of 8 bits: 3314 multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3315 InstrItinClass itinD16, InstrItinClass itinD32, 3316 InstrItinClass itinQ16, InstrItinClass itinQ32, 3317 string OpcodeStr, string Dt, 3318 SDPatternOperator IntOp, bit Commutable = 0> 3319 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3320 OpcodeStr, Dt, IntOp, Commutable> { 3321 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3322 OpcodeStr, !strconcat(Dt, "8"), 3323 v8i8, v8i8, IntOp, Commutable>; 3324 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3325 OpcodeStr, !strconcat(Dt, "8"), 3326 v16i8, v16i8, IntOp, Commutable>; 3327 } 3328 multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3329 InstrItinClass itinD16, InstrItinClass itinD32, 3330 InstrItinClass itinQ16, InstrItinClass itinQ32, 3331 string OpcodeStr, string Dt, 3332 SDPatternOperator IntOp> 3333 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3334 OpcodeStr, Dt, IntOp> { 3335 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3336 OpcodeStr, !strconcat(Dt, "8"), 3337 v8i8, v8i8, IntOp>; 3338 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3339 OpcodeStr, !strconcat(Dt, "8"), 3340 v16i8, v16i8, IntOp>; 3341 } 3342 3343 3344 // ....then also with element size of 64 bits: 3345 multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3346 InstrItinClass itinD16, InstrItinClass itinD32, 3347 InstrItinClass itinQ16, InstrItinClass itinQ32, 3348 string OpcodeStr, string Dt, 3349 SDPatternOperator IntOp, bit Commutable = 0> 3350 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3351 OpcodeStr, Dt, IntOp, Commutable> { 3352 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3353 OpcodeStr, !strconcat(Dt, "64"), 3354 v1i64, v1i64, IntOp, Commutable>; 3355 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3356 OpcodeStr, !strconcat(Dt, "64"), 3357 v2i64, v2i64, IntOp, Commutable>; 3358 } 3359 multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3360 InstrItinClass itinD16, InstrItinClass itinD32, 3361 InstrItinClass itinQ16, InstrItinClass itinQ32, 3362 string OpcodeStr, string Dt, 3363 SDPatternOperator IntOp> 3364 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3365 OpcodeStr, Dt, IntOp> { 3366 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3367 OpcodeStr, !strconcat(Dt, "64"), 3368 v1i64, v1i64, IntOp>; 3369 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3370 OpcodeStr, !strconcat(Dt, "64"), 3371 v2i64, v2i64, IntOp>; 3372 } 3373 3374 // Neon Narrowing 3-register vector intrinsics, 3375 // source operand element sizes of 16, 32 and 64 bits: 3376 multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3377 string OpcodeStr, string Dt, 3378 SDPatternOperator IntOp, bit Commutable = 0> { 3379 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3380 OpcodeStr, !strconcat(Dt, "16"), 3381 v8i8, v8i16, IntOp, Commutable>; 3382 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3383 OpcodeStr, !strconcat(Dt, "32"), 3384 v4i16, v4i32, IntOp, Commutable>; 3385 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3386 OpcodeStr, !strconcat(Dt, "64"), 3387 v2i32, v2i64, IntOp, Commutable>; 3388 } 3389 3390 3391 // Neon Long 3-register vector operations. 3392 3393 multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3394 InstrItinClass itin16, InstrItinClass itin32, 3395 string OpcodeStr, string Dt, 3396 SDNode OpNode, bit Commutable = 0> { 3397 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3398 OpcodeStr, !strconcat(Dt, "8"), 3399 v8i16, v8i8, OpNode, Commutable>; 3400 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3401 OpcodeStr, !strconcat(Dt, "16"), 3402 v4i32, v4i16, OpNode, Commutable>; 3403 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3404 OpcodeStr, !strconcat(Dt, "32"), 3405 v2i64, v2i32, OpNode, Commutable>; 3406 } 3407 3408 multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3409 InstrItinClass itin, string OpcodeStr, string Dt, 3410 SDNode OpNode> { 3411 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3412 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3413 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3414 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3415 } 3416 3417 multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3418 InstrItinClass itin16, InstrItinClass itin32, 3419 string OpcodeStr, string Dt, 3420 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3421 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3422 OpcodeStr, !strconcat(Dt, "8"), 3423 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3424 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3425 OpcodeStr, !strconcat(Dt, "16"), 3426 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3427 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3428 OpcodeStr, !strconcat(Dt, "32"), 3429 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3430 } 3431 3432 // Neon Long 3-register vector intrinsics. 3433 3434 // First with only element sizes of 16 and 32 bits: 3435 multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3436 InstrItinClass itin16, InstrItinClass itin32, 3437 string OpcodeStr, string Dt, 3438 SDPatternOperator IntOp, bit Commutable = 0> { 3439 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3440 OpcodeStr, !strconcat(Dt, "16"), 3441 v4i32, v4i16, IntOp, Commutable>; 3442 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3443 OpcodeStr, !strconcat(Dt, "32"), 3444 v2i64, v2i32, IntOp, Commutable>; 3445 } 3446 3447 multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3448 InstrItinClass itin, string OpcodeStr, string Dt, 3449 SDPatternOperator IntOp> { 3450 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3451 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3452 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3453 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3454 } 3455 3456 // ....then also with element size of 8 bits: 3457 multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3458 InstrItinClass itin16, InstrItinClass itin32, 3459 string OpcodeStr, string Dt, 3460 SDPatternOperator IntOp, bit Commutable = 0> 3461 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3462 IntOp, Commutable> { 3463 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3464 OpcodeStr, !strconcat(Dt, "8"), 3465 v8i16, v8i8, IntOp, Commutable>; 3466 } 3467 3468 // ....with explicit extend (VABDL). 3469 multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3470 InstrItinClass itin, string OpcodeStr, string Dt, 3471 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3472 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3473 OpcodeStr, !strconcat(Dt, "8"), 3474 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3475 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3476 OpcodeStr, !strconcat(Dt, "16"), 3477 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3478 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3479 OpcodeStr, !strconcat(Dt, "32"), 3480 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3481 } 3482 3483 3484 // Neon Wide 3-register vector intrinsics, 3485 // source operand element sizes of 8, 16 and 32 bits: 3486 multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3487 string OpcodeStr, string Dt, 3488 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3489 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3490 OpcodeStr, !strconcat(Dt, "8"), 3491 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3492 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3493 OpcodeStr, !strconcat(Dt, "16"), 3494 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3495 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3496 OpcodeStr, !strconcat(Dt, "32"), 3497 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3498 } 3499 3500 3501 // Neon Multiply-Op vector operations, 3502 // element sizes of 8, 16 and 32 bits: 3503 multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3504 InstrItinClass itinD16, InstrItinClass itinD32, 3505 InstrItinClass itinQ16, InstrItinClass itinQ32, 3506 string OpcodeStr, string Dt, SDNode OpNode> { 3507 // 64-bit vector types. 3508 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3509 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3510 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3511 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3512 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3513 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3514 3515 // 128-bit vector types. 3516 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3517 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3518 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3519 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3520 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3521 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3522 } 3523 3524 multiclass N3VMulOpSL_HS<bits<4> op11_8, 3525 InstrItinClass itinD16, InstrItinClass itinD32, 3526 InstrItinClass itinQ16, InstrItinClass itinQ32, 3527 string OpcodeStr, string Dt, SDNode ShOp> { 3528 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3529 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3530 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3531 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3532 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3533 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3534 mul, ShOp>; 3535 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3536 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3537 mul, ShOp>; 3538 } 3539 3540 // Neon Intrinsic-Op vector operations, 3541 // element sizes of 8, 16 and 32 bits: 3542 multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3543 InstrItinClass itinD, InstrItinClass itinQ, 3544 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3545 SDNode OpNode> { 3546 // 64-bit vector types. 3547 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3548 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3549 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3550 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3551 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3552 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3553 3554 // 128-bit vector types. 3555 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3556 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3557 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3558 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3559 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3560 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3561 } 3562 3563 // Neon 3-argument intrinsics, 3564 // element sizes of 8, 16 and 32 bits: 3565 multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3566 InstrItinClass itinD, InstrItinClass itinQ, 3567 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3568 // 64-bit vector types. 3569 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, 3570 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3571 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, 3572 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3573 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, 3574 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3575 3576 // 128-bit vector types. 3577 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, 3578 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3579 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, 3580 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3581 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, 3582 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3583 } 3584 3585 3586 // Neon Long Multiply-Op vector operations, 3587 // element sizes of 8, 16 and 32 bits: 3588 multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3589 InstrItinClass itin16, InstrItinClass itin32, 3590 string OpcodeStr, string Dt, SDNode MulOp, 3591 SDNode OpNode> { 3592 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3593 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3594 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3595 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3596 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3597 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3598 } 3599 3600 multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3601 string Dt, SDNode MulOp, SDNode OpNode> { 3602 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3603 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3604 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3605 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3606 } 3607 3608 3609 // Neon Long 3-argument intrinsics. 3610 3611 // First with only element sizes of 16 and 32 bits: 3612 multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3613 InstrItinClass itin16, InstrItinClass itin32, 3614 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3615 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3616 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3617 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3618 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3619 } 3620 3621 multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3622 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3623 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3624 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3625 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3626 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3627 } 3628 3629 // ....then also with element size of 8 bits: 3630 multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3631 InstrItinClass itin16, InstrItinClass itin32, 3632 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3633 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3634 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3635 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3636 } 3637 3638 // ....with explicit extend (VABAL). 3639 multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3640 InstrItinClass itin, string OpcodeStr, string Dt, 3641 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3642 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3643 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3644 IntOp, ExtOp, OpNode>; 3645 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3646 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3647 IntOp, ExtOp, OpNode>; 3648 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3649 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3650 IntOp, ExtOp, OpNode>; 3651 } 3652 3653 3654 // Neon Pairwise long 2-register intrinsics, 3655 // element sizes of 8, 16 and 32 bits: 3656 multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3657 bits<5> op11_7, bit op4, 3658 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3659 // 64-bit vector types. 3660 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3661 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3662 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3663 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3664 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3665 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3666 3667 // 128-bit vector types. 3668 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3669 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3670 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3671 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3672 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3673 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3674 } 3675 3676 3677 // Neon Pairwise long 2-register accumulate intrinsics, 3678 // element sizes of 8, 16 and 32 bits: 3679 multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3680 bits<5> op11_7, bit op4, 3681 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3682 // 64-bit vector types. 3683 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3684 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3685 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3686 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3687 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3688 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3689 3690 // 128-bit vector types. 3691 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3692 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3693 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3694 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3695 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3696 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3697 } 3698 3699 3700 // Neon 2-register vector shift by immediate, 3701 // with f of either N2RegVShLFrm or N2RegVShRFrm 3702 // element sizes of 8, 16, 32 and 64 bits: 3703 multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3704 InstrItinClass itin, string OpcodeStr, string Dt, 3705 SDNode OpNode> { 3706 // 64-bit vector types. 3707 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3708 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3709 let Inst{21-19} = 0b001; // imm6 = 001xxx 3710 } 3711 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3712 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3713 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3714 } 3715 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3716 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3717 let Inst{21} = 0b1; // imm6 = 1xxxxx 3718 } 3719 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3720 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3721 // imm6 = xxxxxx 3722 3723 // 128-bit vector types. 3724 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3725 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3726 let Inst{21-19} = 0b001; // imm6 = 001xxx 3727 } 3728 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3729 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3730 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3731 } 3732 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3733 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3734 let Inst{21} = 0b1; // imm6 = 1xxxxx 3735 } 3736 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3737 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3738 // imm6 = xxxxxx 3739 } 3740 multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3741 InstrItinClass itin, string OpcodeStr, string Dt, 3742 string baseOpc, SDNode OpNode> { 3743 // 64-bit vector types. 3744 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3745 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3746 let Inst{21-19} = 0b001; // imm6 = 001xxx 3747 } 3748 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3749 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3750 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3751 } 3752 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3753 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3754 let Inst{21} = 0b1; // imm6 = 1xxxxx 3755 } 3756 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3757 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3758 // imm6 = xxxxxx 3759 3760 // 128-bit vector types. 3761 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3762 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3763 let Inst{21-19} = 0b001; // imm6 = 001xxx 3764 } 3765 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3766 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3767 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3768 } 3769 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3770 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3771 let Inst{21} = 0b1; // imm6 = 1xxxxx 3772 } 3773 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3774 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3775 // imm6 = xxxxxx 3776 } 3777 3778 // Neon Shift-Accumulate vector operations, 3779 // element sizes of 8, 16, 32 and 64 bits: 3780 multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3781 string OpcodeStr, string Dt, SDNode ShOp> { 3782 // 64-bit vector types. 3783 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3784 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 3785 let Inst{21-19} = 0b001; // imm6 = 001xxx 3786 } 3787 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3788 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 3789 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3790 } 3791 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3792 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 3793 let Inst{21} = 0b1; // imm6 = 1xxxxx 3794 } 3795 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3796 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 3797 // imm6 = xxxxxx 3798 3799 // 128-bit vector types. 3800 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3801 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 3802 let Inst{21-19} = 0b001; // imm6 = 001xxx 3803 } 3804 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3805 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 3806 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3807 } 3808 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3809 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 3810 let Inst{21} = 0b1; // imm6 = 1xxxxx 3811 } 3812 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3813 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 3814 // imm6 = xxxxxx 3815 } 3816 3817 // Neon Shift-Insert vector operations, 3818 // with f of either N2RegVShLFrm or N2RegVShRFrm 3819 // element sizes of 8, 16, 32 and 64 bits: 3820 multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3821 string OpcodeStr> { 3822 // 64-bit vector types. 3823 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3824 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> { 3825 let Inst{21-19} = 0b001; // imm6 = 001xxx 3826 } 3827 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3828 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> { 3829 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3830 } 3831 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3832 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> { 3833 let Inst{21} = 0b1; // imm6 = 1xxxxx 3834 } 3835 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 3836 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>; 3837 // imm6 = xxxxxx 3838 3839 // 128-bit vector types. 3840 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3841 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> { 3842 let Inst{21-19} = 0b001; // imm6 = 001xxx 3843 } 3844 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3845 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> { 3846 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3847 } 3848 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3849 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> { 3850 let Inst{21} = 0b1; // imm6 = 1xxxxx 3851 } 3852 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 3853 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>; 3854 // imm6 = xxxxxx 3855 } 3856 multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3857 string OpcodeStr> { 3858 // 64-bit vector types. 3859 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 3860 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> { 3861 let Inst{21-19} = 0b001; // imm6 = 001xxx 3862 } 3863 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 3864 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> { 3865 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3866 } 3867 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 3868 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> { 3869 let Inst{21} = 0b1; // imm6 = 1xxxxx 3870 } 3871 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 3872 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>; 3873 // imm6 = xxxxxx 3874 3875 // 128-bit vector types. 3876 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 3877 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> { 3878 let Inst{21-19} = 0b001; // imm6 = 001xxx 3879 } 3880 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 3881 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> { 3882 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3883 } 3884 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 3885 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> { 3886 let Inst{21} = 0b1; // imm6 = 1xxxxx 3887 } 3888 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 3889 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>; 3890 // imm6 = xxxxxx 3891 } 3892 3893 // Neon Shift Long operations, 3894 // element sizes of 8, 16, 32 bits: 3895 multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3896 bit op4, string OpcodeStr, string Dt, SDNode OpNode> { 3897 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3898 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 3899 let Inst{21-19} = 0b001; // imm6 = 001xxx 3900 } 3901 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3902 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 3903 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3904 } 3905 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3906 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 3907 let Inst{21} = 0b1; // imm6 = 1xxxxx 3908 } 3909 } 3910 3911 // Neon Shift Narrow operations, 3912 // element sizes of 16, 32, 64 bits: 3913 multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3914 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 3915 SDNode OpNode> { 3916 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3917 OpcodeStr, !strconcat(Dt, "16"), 3918 v8i8, v8i16, shr_imm8, OpNode> { 3919 let Inst{21-19} = 0b001; // imm6 = 001xxx 3920 } 3921 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3922 OpcodeStr, !strconcat(Dt, "32"), 3923 v4i16, v4i32, shr_imm16, OpNode> { 3924 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3925 } 3926 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3927 OpcodeStr, !strconcat(Dt, "64"), 3928 v2i32, v2i64, shr_imm32, OpNode> { 3929 let Inst{21} = 0b1; // imm6 = 1xxxxx 3930 } 3931 } 3932 3933 //===----------------------------------------------------------------------===// 3934 // Instruction Definitions. 3935 //===----------------------------------------------------------------------===// 3936 3937 // Vector Add Operations. 3938 3939 // VADD : Vector Add (integer and floating-point) 3940 defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 3941 add, 1>; 3942 def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 3943 v2f32, v2f32, fadd, 1>; 3944 def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 3945 v4f32, v4f32, fadd, 1>; 3946 // VADDL : Vector Add Long (Q = D + D) 3947 defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3948 "vaddl", "s", add, sext, 1>; 3949 defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3950 "vaddl", "u", add, zext, 1>; 3951 // VADDW : Vector Add Wide (Q = Q + D) 3952 defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 3953 defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 3954 // VHADD : Vector Halving Add 3955 defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 3956 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3957 "vhadd", "s", int_arm_neon_vhadds, 1>; 3958 defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 3959 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3960 "vhadd", "u", int_arm_neon_vhaddu, 1>; 3961 // VRHADD : Vector Rounding Halving Add 3962 defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 3963 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3964 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 3965 defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 3966 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3967 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 3968 // VQADD : Vector Saturating Add 3969 defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 3970 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3971 "vqadd", "s", int_arm_neon_vqadds, 1>; 3972 defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 3973 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3974 "vqadd", "u", int_arm_neon_vqaddu, 1>; 3975 // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 3976 defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", 3977 int_arm_neon_vaddhn, 1>; 3978 // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 3979 defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 3980 int_arm_neon_vraddhn, 1>; 3981 3982 // Vector Multiply Operations. 3983 3984 // VMUL : Vector Multiply (integer, polynomial and floating-point) 3985 defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 3986 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 3987 def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 3988 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 3989 def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 3990 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 3991 def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 3992 v2f32, v2f32, fmul, 1>; 3993 def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 3994 v4f32, v4f32, fmul, 1>; 3995 defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 3996 def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 3997 def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 3998 v2f32, fmul>; 3999 4000 def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 4001 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 4002 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 4003 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4004 (DSubReg_i16_reg imm:$lane))), 4005 (SubReg_i16_lane imm:$lane)))>; 4006 def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 4007 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 4008 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4009 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4010 (DSubReg_i32_reg imm:$lane))), 4011 (SubReg_i32_lane imm:$lane)))>; 4012 def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4013 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 4014 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4015 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4016 (DSubReg_i32_reg imm:$lane))), 4017 (SubReg_i32_lane imm:$lane)))>; 4018 4019 // VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4020 defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4021 IIC_VMULi16Q, IIC_VMULi32Q, 4022 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4023 defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4024 IIC_VMULi16Q, IIC_VMULi32Q, 4025 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4026 def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4027 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4028 imm:$lane)))), 4029 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4030 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4031 (DSubReg_i16_reg imm:$lane))), 4032 (SubReg_i16_lane imm:$lane)))>; 4033 def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4034 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4035 imm:$lane)))), 4036 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4037 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4038 (DSubReg_i32_reg imm:$lane))), 4039 (SubReg_i32_lane imm:$lane)))>; 4040 4041 // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4042 defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4043 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4044 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4045 defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4046 IIC_VMULi16Q, IIC_VMULi32Q, 4047 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4048 def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4049 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4050 imm:$lane)))), 4051 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4052 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4053 (DSubReg_i16_reg imm:$lane))), 4054 (SubReg_i16_lane imm:$lane)))>; 4055 def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4056 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4057 imm:$lane)))), 4058 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4059 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4060 (DSubReg_i32_reg imm:$lane))), 4061 (SubReg_i32_lane imm:$lane)))>; 4062 4063 // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4064 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4065 "vmull", "s", NEONvmulls, 1>; 4066 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4067 "vmull", "u", NEONvmullu, 1>; 4068 def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4069 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4070 defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; 4071 defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; 4072 4073 // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4074 defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4075 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4076 defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4077 "vqdmull", "s", int_arm_neon_vqdmull>; 4078 4079 // Vector Multiply-Accumulate and Multiply-Subtract Operations. 4080 4081 // VMLA : Vector Multiply Accumulate (integer and floating-point) 4082 defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4083 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4084 def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4085 v2f32, fmul_su, fadd_mlx>, 4086 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4087 def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4088 v4f32, fmul_su, fadd_mlx>, 4089 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4090 defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4091 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4092 def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4093 v2f32, fmul_su, fadd_mlx>, 4094 Requires<[HasNEON, UseFPVMLx]>; 4095 def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4096 v4f32, v2f32, fmul_su, fadd_mlx>, 4097 Requires<[HasNEON, UseFPVMLx]>; 4098 4099 def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4100 (mul (v8i16 QPR:$src2), 4101 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4102 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4103 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4104 (DSubReg_i16_reg imm:$lane))), 4105 (SubReg_i16_lane imm:$lane)))>; 4106 4107 def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4108 (mul (v4i32 QPR:$src2), 4109 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4110 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4111 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4112 (DSubReg_i32_reg imm:$lane))), 4113 (SubReg_i32_lane imm:$lane)))>; 4114 4115 def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4116 (fmul_su (v4f32 QPR:$src2), 4117 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4118 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4119 (v4f32 QPR:$src2), 4120 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4121 (DSubReg_i32_reg imm:$lane))), 4122 (SubReg_i32_lane imm:$lane)))>, 4123 Requires<[HasNEON, UseFPVMLx]>; 4124 4125 // VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4126 defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4127 "vmlal", "s", NEONvmulls, add>; 4128 defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4129 "vmlal", "u", NEONvmullu, add>; 4130 4131 defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; 4132 defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; 4133 4134 // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4135 defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4136 "vqdmlal", "s", int_arm_neon_vqdmlal>; 4137 defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; 4138 4139 // VMLS : Vector Multiply Subtract (integer and floating-point) 4140 defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4141 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4142 def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4143 v2f32, fmul_su, fsub_mlx>, 4144 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4145 def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4146 v4f32, fmul_su, fsub_mlx>, 4147 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4148 defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4149 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4150 def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4151 v2f32, fmul_su, fsub_mlx>, 4152 Requires<[HasNEON, UseFPVMLx]>; 4153 def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4154 v4f32, v2f32, fmul_su, fsub_mlx>, 4155 Requires<[HasNEON, UseFPVMLx]>; 4156 4157 def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4158 (mul (v8i16 QPR:$src2), 4159 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4160 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4161 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4162 (DSubReg_i16_reg imm:$lane))), 4163 (SubReg_i16_lane imm:$lane)))>; 4164 4165 def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4166 (mul (v4i32 QPR:$src2), 4167 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4168 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4169 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4170 (DSubReg_i32_reg imm:$lane))), 4171 (SubReg_i32_lane imm:$lane)))>; 4172 4173 def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4174 (fmul_su (v4f32 QPR:$src2), 4175 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4176 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4177 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4178 (DSubReg_i32_reg imm:$lane))), 4179 (SubReg_i32_lane imm:$lane)))>, 4180 Requires<[HasNEON, UseFPVMLx]>; 4181 4182 // VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4183 defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4184 "vmlsl", "s", NEONvmulls, sub>; 4185 defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4186 "vmlsl", "u", NEONvmullu, sub>; 4187 4188 defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; 4189 defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; 4190 4191 // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4192 defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4193 "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 4194 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 4195 4196 // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4197 def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4198 v2f32, fmul_su, fadd_mlx>, 4199 Requires<[HasVFP4,UseFusedMAC]>; 4200 4201 def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4202 v4f32, fmul_su, fadd_mlx>, 4203 Requires<[HasVFP4,UseFusedMAC]>; 4204 4205 // Fused Vector Multiply Subtract (floating-point) 4206 def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4207 v2f32, fmul_su, fsub_mlx>, 4208 Requires<[HasVFP4,UseFusedMAC]>; 4209 def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4210 v4f32, fmul_su, fsub_mlx>, 4211 Requires<[HasVFP4,UseFusedMAC]>; 4212 4213 // Match @llvm.fma.* intrinsics 4214 def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4215 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4216 Requires<[HasVFP4]>; 4217 def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4218 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4219 Requires<[HasVFP4]>; 4220 def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4221 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4222 Requires<[HasVFP4]>; 4223 def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4224 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4225 Requires<[HasVFP4]>; 4226 4227 // Vector Subtract Operations. 4228 4229 // VSUB : Vector Subtract (integer and floating-point) 4230 defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 4231 "vsub", "i", sub, 0>; 4232 def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 4233 v2f32, v2f32, fsub, 0>; 4234 def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 4235 v4f32, v4f32, fsub, 0>; 4236 // VSUBL : Vector Subtract Long (Q = D - D) 4237 defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4238 "vsubl", "s", sub, sext, 0>; 4239 defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4240 "vsubl", "u", sub, zext, 0>; 4241 // VSUBW : Vector Subtract Wide (Q = Q - D) 4242 defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 4243 defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 4244 // VHSUB : Vector Halving Subtract 4245 defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 4246 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4247 "vhsub", "s", int_arm_neon_vhsubs, 0>; 4248 defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 4249 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4250 "vhsub", "u", int_arm_neon_vhsubu, 0>; 4251 // VQSUB : Vector Saturing Subtract 4252 defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 4253 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4254 "vqsub", "s", int_arm_neon_vqsubs, 0>; 4255 defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 4256 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4257 "vqsub", "u", int_arm_neon_vqsubu, 0>; 4258 // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 4259 defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", 4260 int_arm_neon_vsubhn, 0>; 4261 // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 4262 defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 4263 int_arm_neon_vrsubhn, 0>; 4264 4265 // Vector Comparisons. 4266 4267 // VCEQ : Vector Compare Equal 4268 defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4269 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 4270 def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 4271 NEONvceq, 1>; 4272 def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 4273 NEONvceq, 1>; 4274 4275 let TwoOperandAliasConstraint = "$Vm = $Vd" in 4276 defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 4277 "$Vd, $Vm, #0", NEONvceqz>; 4278 4279 // VCGE : Vector Compare Greater Than or Equal 4280 defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4281 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 4282 defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4283 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 4284 def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 4285 NEONvcge, 0>; 4286 def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 4287 NEONvcge, 0>; 4288 4289 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4290 defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 4291 "$Vd, $Vm, #0", NEONvcgez>; 4292 defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 4293 "$Vd, $Vm, #0", NEONvclez>; 4294 } 4295 4296 // VCGT : Vector Compare Greater Than 4297 defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4298 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 4299 defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4300 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 4301 def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 4302 NEONvcgt, 0>; 4303 def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 4304 NEONvcgt, 0>; 4305 4306 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4307 defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 4308 "$Vd, $Vm, #0", NEONvcgtz>; 4309 defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 4310 "$Vd, $Vm, #0", NEONvcltz>; 4311 } 4312 4313 // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 4314 def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 4315 "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; 4316 def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 4317 "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; 4318 // VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 4319 def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 4320 "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; 4321 def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 4322 "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; 4323 // VTST : Vector Test Bits 4324 defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 4325 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 4326 4327 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 4328 (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4329 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 4330 (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4331 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 4332 (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4333 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 4334 (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4335 4336 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 4337 (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4338 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 4339 (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4340 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 4341 (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4342 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 4343 (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4344 4345 // Vector Bitwise Operations. 4346 4347 def vnotd : PatFrag<(ops node:$in), 4348 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 4349 def vnotq : PatFrag<(ops node:$in), 4350 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 4351 4352 4353 // VAND : Vector Bitwise AND 4354 def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 4355 v2i32, v2i32, and, 1>; 4356 def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 4357 v4i32, v4i32, and, 1>; 4358 4359 // VEOR : Vector Bitwise Exclusive OR 4360 def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 4361 v2i32, v2i32, xor, 1>; 4362 def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 4363 v4i32, v4i32, xor, 1>; 4364 4365 // VORR : Vector Bitwise OR 4366 def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 4367 v2i32, v2i32, or, 1>; 4368 def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 4369 v4i32, v4i32, or, 1>; 4370 4371 def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 4372 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4373 IIC_VMOVImm, 4374 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4375 [(set DPR:$Vd, 4376 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4377 let Inst{9} = SIMM{9}; 4378 } 4379 4380 def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 4381 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4382 IIC_VMOVImm, 4383 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4384 [(set DPR:$Vd, 4385 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4386 let Inst{10-9} = SIMM{10-9}; 4387 } 4388 4389 def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 4390 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4391 IIC_VMOVImm, 4392 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4393 [(set QPR:$Vd, 4394 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4395 let Inst{9} = SIMM{9}; 4396 } 4397 4398 def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 4399 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4400 IIC_VMOVImm, 4401 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4402 [(set QPR:$Vd, 4403 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4404 let Inst{10-9} = SIMM{10-9}; 4405 } 4406 4407 4408 // VBIC : Vector Bitwise Bit Clear (AND NOT) 4409 let TwoOperandAliasConstraint = "$Vn = $Vd" in { 4410 def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4411 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4412 "vbic", "$Vd, $Vn, $Vm", "", 4413 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 4414 (vnotd DPR:$Vm))))]>; 4415 def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4416 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4417 "vbic", "$Vd, $Vn, $Vm", "", 4418 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 4419 (vnotq QPR:$Vm))))]>; 4420 } 4421 4422 def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 4423 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4424 IIC_VMOVImm, 4425 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4426 [(set DPR:$Vd, 4427 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4428 let Inst{9} = SIMM{9}; 4429 } 4430 4431 def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 4432 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4433 IIC_VMOVImm, 4434 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4435 [(set DPR:$Vd, 4436 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4437 let Inst{10-9} = SIMM{10-9}; 4438 } 4439 4440 def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 4441 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4442 IIC_VMOVImm, 4443 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4444 [(set QPR:$Vd, 4445 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4446 let Inst{9} = SIMM{9}; 4447 } 4448 4449 def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 4450 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4451 IIC_VMOVImm, 4452 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4453 [(set QPR:$Vd, 4454 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4455 let Inst{10-9} = SIMM{10-9}; 4456 } 4457 4458 // VORN : Vector Bitwise OR NOT 4459 def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 4460 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4461 "vorn", "$Vd, $Vn, $Vm", "", 4462 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 4463 (vnotd DPR:$Vm))))]>; 4464 def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 4465 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4466 "vorn", "$Vd, $Vn, $Vm", "", 4467 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 4468 (vnotq QPR:$Vm))))]>; 4469 4470 // VMVN : Vector Bitwise NOT (Immediate) 4471 4472 let isReMaterializable = 1 in { 4473 4474 def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 4475 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4476 "vmvn", "i16", "$Vd, $SIMM", "", 4477 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { 4478 let Inst{9} = SIMM{9}; 4479 } 4480 4481 def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 4482 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4483 "vmvn", "i16", "$Vd, $SIMM", "", 4484 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { 4485 let Inst{9} = SIMM{9}; 4486 } 4487 4488 def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 4489 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4490 "vmvn", "i32", "$Vd, $SIMM", "", 4491 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { 4492 let Inst{11-8} = SIMM{11-8}; 4493 } 4494 4495 def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 4496 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4497 "vmvn", "i32", "$Vd, $SIMM", "", 4498 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { 4499 let Inst{11-8} = SIMM{11-8}; 4500 } 4501 } 4502 4503 // VMVN : Vector Bitwise NOT 4504 def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 4505 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 4506 "vmvn", "$Vd, $Vm", "", 4507 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 4508 def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 4509 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 4510 "vmvn", "$Vd, $Vm", "", 4511 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 4512 def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 4513 def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 4514 4515 // VBSL : Vector Bitwise Select 4516 def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4517 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4518 N3RegFrm, IIC_VCNTiD, 4519 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4520 [(set DPR:$Vd, 4521 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 4522 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 4523 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 4524 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4525 Requires<[HasNEON]>; 4526 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 4527 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 4528 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4529 Requires<[HasNEON]>; 4530 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 4531 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 4532 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4533 Requires<[HasNEON]>; 4534 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 4535 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 4536 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4537 Requires<[HasNEON]>; 4538 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 4539 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 4540 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4541 Requires<[HasNEON]>; 4542 4543 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 4544 (and DPR:$Vm, (vnotd DPR:$Vd)))), 4545 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 4546 Requires<[HasNEON]>; 4547 4548 def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 4549 (and DPR:$Vm, (vnotd DPR:$Vd)))), 4550 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 4551 Requires<[HasNEON]>; 4552 4553 def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4554 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4555 N3RegFrm, IIC_VCNTiQ, 4556 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4557 [(set QPR:$Vd, 4558 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 4559 4560 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 4561 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 4562 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4563 Requires<[HasNEON]>; 4564 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 4565 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 4566 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4567 Requires<[HasNEON]>; 4568 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 4569 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 4570 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4571 Requires<[HasNEON]>; 4572 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 4573 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 4574 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4575 Requires<[HasNEON]>; 4576 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 4577 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 4578 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4579 Requires<[HasNEON]>; 4580 4581 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 4582 (and QPR:$Vm, (vnotq QPR:$Vd)))), 4583 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 4584 Requires<[HasNEON]>; 4585 def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 4586 (and QPR:$Vm, (vnotq QPR:$Vd)))), 4587 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 4588 Requires<[HasNEON]>; 4589 4590 // VBIF : Vector Bitwise Insert if False 4591 // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 4592 // FIXME: This instruction's encoding MAY NOT BE correct. 4593 def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 4594 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4595 N3RegFrm, IIC_VBINiD, 4596 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4597 []>; 4598 def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 4599 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4600 N3RegFrm, IIC_VBINiQ, 4601 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4602 []>; 4603 4604 // VBIT : Vector Bitwise Insert if True 4605 // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 4606 // FIXME: This instruction's encoding MAY NOT BE correct. 4607 def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 4608 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4609 N3RegFrm, IIC_VBINiD, 4610 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4611 []>; 4612 def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 4613 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4614 N3RegFrm, IIC_VBINiQ, 4615 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4616 []>; 4617 4618 // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 4619 // for equivalent operations with different register constraints; it just 4620 // inserts copies. 4621 4622 // Vector Absolute Differences. 4623 4624 // VABD : Vector Absolute Difference 4625 defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 4626 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4627 "vabd", "s", int_arm_neon_vabds, 1>; 4628 defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 4629 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4630 "vabd", "u", int_arm_neon_vabdu, 1>; 4631 def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 4632 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 4633 def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 4634 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 4635 4636 // VABDL : Vector Absolute Difference Long (Q = | D - D |) 4637 defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 4638 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 4639 defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 4640 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 4641 4642 // VABA : Vector Absolute Difference and Accumulate 4643 defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4644 "vaba", "s", int_arm_neon_vabds, add>; 4645 defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4646 "vaba", "u", int_arm_neon_vabdu, add>; 4647 4648 // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 4649 defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 4650 "vabal", "s", int_arm_neon_vabds, zext, add>; 4651 defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 4652 "vabal", "u", int_arm_neon_vabdu, zext, add>; 4653 4654 // Vector Maximum and Minimum. 4655 4656 // VMAX : Vector Maximum 4657 defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 4658 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4659 "vmax", "s", int_arm_neon_vmaxs, 1>; 4660 defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 4661 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4662 "vmax", "u", int_arm_neon_vmaxu, 1>; 4663 def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 4664 "vmax", "f32", 4665 v2f32, v2f32, int_arm_neon_vmaxs, 1>; 4666 def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4667 "vmax", "f32", 4668 v4f32, v4f32, int_arm_neon_vmaxs, 1>; 4669 4670 // VMAXNM 4671 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 4672 def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, 4673 N3RegFrm, NoItinerary, "vmaxnm", "f32", 4674 v2f32, v2f32, int_arm_neon_vmaxnm, 1>, 4675 Requires<[HasV8, HasNEON]>; 4676 def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, 4677 N3RegFrm, NoItinerary, "vmaxnm", "f32", 4678 v4f32, v4f32, int_arm_neon_vmaxnm, 1>, 4679 Requires<[HasV8, HasNEON]>; 4680 } 4681 4682 // VMIN : Vector Minimum 4683 defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 4684 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4685 "vmin", "s", int_arm_neon_vmins, 1>; 4686 defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 4687 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4688 "vmin", "u", int_arm_neon_vminu, 1>; 4689 def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 4690 "vmin", "f32", 4691 v2f32, v2f32, int_arm_neon_vmins, 1>; 4692 def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4693 "vmin", "f32", 4694 v4f32, v4f32, int_arm_neon_vmins, 1>; 4695 4696 // VMINNM 4697 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 4698 def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, 4699 N3RegFrm, NoItinerary, "vminnm", "f32", 4700 v2f32, v2f32, int_arm_neon_vminnm, 1>, 4701 Requires<[HasV8, HasNEON]>; 4702 def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, 4703 N3RegFrm, NoItinerary, "vminnm", "f32", 4704 v4f32, v4f32, int_arm_neon_vminnm, 1>, 4705 Requires<[HasV8, HasNEON]>; 4706 } 4707 4708 // Vector Pairwise Operations. 4709 4710 // VPADD : Vector Pairwise Add 4711 def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4712 "vpadd", "i8", 4713 v8i8, v8i8, int_arm_neon_vpadd, 0>; 4714 def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4715 "vpadd", "i16", 4716 v4i16, v4i16, int_arm_neon_vpadd, 0>; 4717 def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4718 "vpadd", "i32", 4719 v2i32, v2i32, int_arm_neon_vpadd, 0>; 4720 def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 4721 IIC_VPBIND, "vpadd", "f32", 4722 v2f32, v2f32, int_arm_neon_vpadd, 0>; 4723 4724 // VPADDL : Vector Pairwise Add Long 4725 defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 4726 int_arm_neon_vpaddls>; 4727 defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 4728 int_arm_neon_vpaddlu>; 4729 4730 // VPADAL : Vector Pairwise Add and Accumulate Long 4731 defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 4732 int_arm_neon_vpadals>; 4733 defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 4734 int_arm_neon_vpadalu>; 4735 4736 // VPMAX : Vector Pairwise Maximum 4737 def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4738 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 4739 def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4740 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 4741 def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4742 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 4743 def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4744 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 4745 def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4746 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 4747 def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4748 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 4749 def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 4750 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 4751 4752 // VPMIN : Vector Pairwise Minimum 4753 def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4754 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 4755 def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4756 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 4757 def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4758 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 4759 def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4760 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 4761 def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4762 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 4763 def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4764 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 4765 def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 4766 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 4767 4768 // Vector Reciprocal and Reciprocal Square Root Estimate and Step. 4769 4770 // VRECPE : Vector Reciprocal Estimate 4771 def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4772 IIC_VUNAD, "vrecpe", "u32", 4773 v2i32, v2i32, int_arm_neon_vrecpe>; 4774 def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4775 IIC_VUNAQ, "vrecpe", "u32", 4776 v4i32, v4i32, int_arm_neon_vrecpe>; 4777 def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4778 IIC_VUNAD, "vrecpe", "f32", 4779 v2f32, v2f32, int_arm_neon_vrecpe>; 4780 def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4781 IIC_VUNAQ, "vrecpe", "f32", 4782 v4f32, v4f32, int_arm_neon_vrecpe>; 4783 4784 // VRECPS : Vector Reciprocal Step 4785 def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4786 IIC_VRECSD, "vrecps", "f32", 4787 v2f32, v2f32, int_arm_neon_vrecps, 1>; 4788 def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4789 IIC_VRECSQ, "vrecps", "f32", 4790 v4f32, v4f32, int_arm_neon_vrecps, 1>; 4791 4792 // VRSQRTE : Vector Reciprocal Square Root Estimate 4793 def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 4794 IIC_VUNAD, "vrsqrte", "u32", 4795 v2i32, v2i32, int_arm_neon_vrsqrte>; 4796 def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 4797 IIC_VUNAQ, "vrsqrte", "u32", 4798 v4i32, v4i32, int_arm_neon_vrsqrte>; 4799 def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 4800 IIC_VUNAD, "vrsqrte", "f32", 4801 v2f32, v2f32, int_arm_neon_vrsqrte>; 4802 def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 4803 IIC_VUNAQ, "vrsqrte", "f32", 4804 v4f32, v4f32, int_arm_neon_vrsqrte>; 4805 4806 // VRSQRTS : Vector Reciprocal Square Root Step 4807 def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 4808 IIC_VRECSD, "vrsqrts", "f32", 4809 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 4810 def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 4811 IIC_VRECSQ, "vrsqrts", "f32", 4812 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 4813 4814 // Vector Shifts. 4815 4816 // VSHL : Vector Shift 4817 defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 4818 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 4819 "vshl", "s", int_arm_neon_vshifts>; 4820 defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 4821 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 4822 "vshl", "u", int_arm_neon_vshiftu>; 4823 4824 // VSHL : Vector Shift Left (Immediate) 4825 defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; 4826 4827 // VSHR : Vector Shift Right (Immediate) 4828 defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", 4829 NEONvshrs>; 4830 defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", 4831 NEONvshru>; 4832 4833 // VSHLL : Vector Shift Left Long 4834 defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; 4835 defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; 4836 4837 // VSHLL : Vector Shift Left Long (with maximum shift count) 4838 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 4839 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 4840 ValueType OpTy, Operand ImmTy, SDNode OpNode> 4841 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 4842 ResTy, OpTy, ImmTy, OpNode> { 4843 let Inst{21-16} = op21_16; 4844 let DecoderMethod = "DecodeVSHLMaxInstruction"; 4845 } 4846 def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 4847 v8i16, v8i8, imm8, NEONvshlli>; 4848 def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 4849 v4i32, v4i16, imm16, NEONvshlli>; 4850 def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 4851 v2i64, v2i32, imm32, NEONvshlli>; 4852 4853 // VSHRN : Vector Shift Right and Narrow 4854 defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 4855 NEONvshrn>; 4856 4857 // VRSHL : Vector Rounding Shift 4858 defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 4859 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4860 "vrshl", "s", int_arm_neon_vrshifts>; 4861 defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 4862 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4863 "vrshl", "u", int_arm_neon_vrshiftu>; 4864 // VRSHR : Vector Rounding Shift Right 4865 defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", 4866 NEONvrshrs>; 4867 defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", 4868 NEONvrshru>; 4869 4870 // VRSHRN : Vector Rounding Shift Right and Narrow 4871 defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 4872 NEONvrshrn>; 4873 4874 // VQSHL : Vector Saturating Shift 4875 defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 4876 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4877 "vqshl", "s", int_arm_neon_vqshifts>; 4878 defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 4879 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4880 "vqshl", "u", int_arm_neon_vqshiftu>; 4881 // VQSHL : Vector Saturating Shift Left (Immediate) 4882 defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; 4883 defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; 4884 4885 // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 4886 defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; 4887 4888 // VQSHRN : Vector Saturating Shift Right and Narrow 4889 defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 4890 NEONvqshrns>; 4891 defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 4892 NEONvqshrnu>; 4893 4894 // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 4895 defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 4896 NEONvqshrnsu>; 4897 4898 // VQRSHL : Vector Saturating Rounding Shift 4899 defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 4900 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4901 "vqrshl", "s", int_arm_neon_vqrshifts>; 4902 defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 4903 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4904 "vqrshl", "u", int_arm_neon_vqrshiftu>; 4905 4906 // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 4907 defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 4908 NEONvqrshrns>; 4909 defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 4910 NEONvqrshrnu>; 4911 4912 // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 4913 defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 4914 NEONvqrshrnsu>; 4915 4916 // VSRA : Vector Shift Right and Accumulate 4917 defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 4918 defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 4919 // VRSRA : Vector Rounding Shift Right and Accumulate 4920 defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 4921 defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 4922 4923 // VSLI : Vector Shift Left and Insert 4924 defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 4925 4926 // VSRI : Vector Shift Right and Insert 4927 defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 4928 4929 // Vector Absolute and Saturating Absolute. 4930 4931 // VABS : Vector Absolute Value 4932 defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 4933 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", 4934 int_arm_neon_vabs>; 4935 def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 4936 "vabs", "f32", 4937 v2f32, v2f32, fabs>; 4938 def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 4939 "vabs", "f32", 4940 v4f32, v4f32, fabs>; 4941 4942 def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))), 4943 (v2i32 (bitconvert (v8i8 (add DPR:$src, 4944 (NEONvshrs DPR:$src, (i32 7))))))), 4945 (VABSv8i8 DPR:$src)>; 4946 def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))), 4947 (v2i32 (bitconvert (v4i16 (add DPR:$src, 4948 (NEONvshrs DPR:$src, (i32 15))))))), 4949 (VABSv4i16 DPR:$src)>; 4950 def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))), 4951 (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))), 4952 (VABSv2i32 DPR:$src)>; 4953 def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))), 4954 (v4i32 (bitconvert (v16i8 (add QPR:$src, 4955 (NEONvshrs QPR:$src, (i32 7))))))), 4956 (VABSv16i8 QPR:$src)>; 4957 def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))), 4958 (v4i32 (bitconvert (v8i16 (add QPR:$src, 4959 (NEONvshrs QPR:$src, (i32 15))))))), 4960 (VABSv8i16 QPR:$src)>; 4961 def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))), 4962 (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))), 4963 (VABSv4i32 QPR:$src)>; 4964 4965 def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>; 4966 def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>; 4967 4968 // VQABS : Vector Saturating Absolute Value 4969 defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 4970 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 4971 int_arm_neon_vqabs>; 4972 4973 // Vector Negate. 4974 4975 def vnegd : PatFrag<(ops node:$in), 4976 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 4977 def vnegq : PatFrag<(ops node:$in), 4978 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 4979 4980 class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 4981 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 4982 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 4983 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 4984 class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 4985 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 4986 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 4987 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 4988 4989 // VNEG : Vector Negate (integer) 4990 def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 4991 def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 4992 def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 4993 def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 4994 def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 4995 def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 4996 4997 // VNEG : Vector Negate (floating-point) 4998 def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 4999 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 5000 "vneg", "f32", "$Vd, $Vm", "", 5001 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 5002 def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 5003 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 5004 "vneg", "f32", "$Vd, $Vm", "", 5005 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 5006 5007 def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 5008 def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 5009 def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 5010 def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 5011 def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 5012 def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 5013 5014 // VQNEG : Vector Saturating Negate 5015 defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 5016 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 5017 int_arm_neon_vqneg>; 5018 5019 // Vector Bit Counting Operations. 5020 5021 // VCLS : Vector Count Leading Sign Bits 5022 defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 5023 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 5024 int_arm_neon_vcls>; 5025 // VCLZ : Vector Count Leading Zeros 5026 defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 5027 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 5028 ctlz>; 5029 // VCNT : Vector Count One Bits 5030 def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5031 IIC_VCNTiD, "vcnt", "8", 5032 v8i8, v8i8, ctpop>; 5033 def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5034 IIC_VCNTiQ, "vcnt", "8", 5035 v16i8, v16i8, ctpop>; 5036 5037 // Vector Swap 5038 def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 5039 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 5040 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5041 []>; 5042 def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 5043 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 5044 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5045 []>; 5046 5047 // Vector Move Operations. 5048 5049 // VMOV : Vector Move (Register) 5050 def : InstAlias<"vmov${p} $Vd, $Vm", 5051 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 5052 def : InstAlias<"vmov${p} $Vd, $Vm", 5053 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 5054 5055 // VMOV : Vector Move (Immediate) 5056 5057 let isReMaterializable = 1 in { 5058 def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 5059 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5060 "vmov", "i8", "$Vd, $SIMM", "", 5061 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; 5062 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 5063 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5064 "vmov", "i8", "$Vd, $SIMM", "", 5065 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; 5066 5067 def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 5068 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5069 "vmov", "i16", "$Vd, $SIMM", "", 5070 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { 5071 let Inst{9} = SIMM{9}; 5072 } 5073 5074 def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 5075 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5076 "vmov", "i16", "$Vd, $SIMM", "", 5077 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { 5078 let Inst{9} = SIMM{9}; 5079 } 5080 5081 def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 5082 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5083 "vmov", "i32", "$Vd, $SIMM", "", 5084 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { 5085 let Inst{11-8} = SIMM{11-8}; 5086 } 5087 5088 def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 5089 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5090 "vmov", "i32", "$Vd, $SIMM", "", 5091 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { 5092 let Inst{11-8} = SIMM{11-8}; 5093 } 5094 5095 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 5096 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5097 "vmov", "i64", "$Vd, $SIMM", "", 5098 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; 5099 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 5100 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5101 "vmov", "i64", "$Vd, $SIMM", "", 5102 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; 5103 5104 def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 5105 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5106 "vmov", "f32", "$Vd, $SIMM", "", 5107 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; 5108 def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 5109 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5110 "vmov", "f32", "$Vd, $SIMM", "", 5111 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; 5112 } // isReMaterializable 5113 5114 // VMOV : Vector Get Lane (move scalar to ARM core register) 5115 5116 def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 5117 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5118 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 5119 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), 5120 imm:$lane))]> { 5121 let Inst{21} = lane{2}; 5122 let Inst{6-5} = lane{1-0}; 5123 } 5124 def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 5125 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5126 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 5127 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), 5128 imm:$lane))]> { 5129 let Inst{21} = lane{1}; 5130 let Inst{6} = lane{0}; 5131 } 5132 def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 5133 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5134 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 5135 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), 5136 imm:$lane))]> { 5137 let Inst{21} = lane{2}; 5138 let Inst{6-5} = lane{1-0}; 5139 } 5140 def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 5141 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5142 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 5143 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), 5144 imm:$lane))]> { 5145 let Inst{21} = lane{1}; 5146 let Inst{6} = lane{0}; 5147 } 5148 def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 5149 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 5150 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 5151 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 5152 imm:$lane))]>, 5153 Requires<[HasNEON, HasFastVGETLNi32]> { 5154 let Inst{21} = lane{0}; 5155 } 5156 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 5157 def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 5158 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5159 (DSubReg_i8_reg imm:$lane))), 5160 (SubReg_i8_lane imm:$lane))>; 5161 def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 5162 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5163 (DSubReg_i16_reg imm:$lane))), 5164 (SubReg_i16_lane imm:$lane))>; 5165 def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 5166 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5167 (DSubReg_i8_reg imm:$lane))), 5168 (SubReg_i8_lane imm:$lane))>; 5169 def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 5170 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5171 (DSubReg_i16_reg imm:$lane))), 5172 (SubReg_i16_lane imm:$lane))>; 5173 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5174 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 5175 (DSubReg_i32_reg imm:$lane))), 5176 (SubReg_i32_lane imm:$lane))>, 5177 Requires<[HasNEON, HasFastVGETLNi32]>; 5178 def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 5179 (COPY_TO_REGCLASS 5180 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5181 Requires<[HasNEON, HasSlowVGETLNi32]>; 5182 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5183 (COPY_TO_REGCLASS 5184 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5185 Requires<[HasNEON, HasSlowVGETLNi32]>; 5186 def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 5187 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 5188 (SSubReg_f32_reg imm:$src2))>; 5189 def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 5190 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 5191 (SSubReg_f32_reg imm:$src2))>; 5192 //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 5193 // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5194 def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 5195 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5196 5197 5198 // VMOV : Vector Set Lane (move ARM core register to scalar) 5199 5200 let Constraints = "$src1 = $V" in { 5201 def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 5202 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 5203 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 5204 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 5205 GPR:$R, imm:$lane))]> { 5206 let Inst{21} = lane{2}; 5207 let Inst{6-5} = lane{1-0}; 5208 } 5209 def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 5210 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 5211 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 5212 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 5213 GPR:$R, imm:$lane))]> { 5214 let Inst{21} = lane{1}; 5215 let Inst{6} = lane{0}; 5216 } 5217 def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 5218 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 5219 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 5220 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 5221 GPR:$R, imm:$lane))]> { 5222 let Inst{21} = lane{0}; 5223 } 5224 } 5225 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 5226 (v16i8 (INSERT_SUBREG QPR:$src1, 5227 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 5228 (DSubReg_i8_reg imm:$lane))), 5229 GPR:$src2, (SubReg_i8_lane imm:$lane))), 5230 (DSubReg_i8_reg imm:$lane)))>; 5231 def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 5232 (v8i16 (INSERT_SUBREG QPR:$src1, 5233 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 5234 (DSubReg_i16_reg imm:$lane))), 5235 GPR:$src2, (SubReg_i16_lane imm:$lane))), 5236 (DSubReg_i16_reg imm:$lane)))>; 5237 def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 5238 (v4i32 (INSERT_SUBREG QPR:$src1, 5239 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 5240 (DSubReg_i32_reg imm:$lane))), 5241 GPR:$src2, (SubReg_i32_lane imm:$lane))), 5242 (DSubReg_i32_reg imm:$lane)))>; 5243 5244 def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 5245 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 5246 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5247 def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 5248 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 5249 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5250 5251 //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5252 // (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5253 def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5254 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5255 5256 def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 5257 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5258 def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 5259 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 5260 def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 5261 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5262 5263 def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 5264 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5265 def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 5266 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5267 def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 5268 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5269 5270 def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 5271 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 5272 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5273 dsub_0)>; 5274 def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 5275 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 5276 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5277 dsub_0)>; 5278 def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 5279 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 5280 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5281 dsub_0)>; 5282 5283 // VDUP : Vector Duplicate (from ARM core register to all elements) 5284 5285 class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5286 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 5287 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5288 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5289 class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5290 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 5291 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5292 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5293 5294 def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 5295 def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 5296 def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 5297 Requires<[HasNEON, HasFastVDUP32]>; 5298 def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 5299 def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 5300 def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 5301 5302 // NEONvdup patterns for uarchs with fast VDUP.32. 5303 def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 5304 Requires<[HasNEON,HasFastVDUP32]>; 5305 def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; 5306 5307 // NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 5308 def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 5309 Requires<[HasNEON,HasSlowVDUP32]>; 5310 def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 5311 Requires<[HasNEON,HasSlowVDUP32]>; 5312 5313 // VDUP : Vector Duplicate Lane (from scalar to all elements) 5314 5315 class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 5316 ValueType Ty, Operand IdxTy> 5317 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 5318 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 5319 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>; 5320 5321 class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 5322 ValueType ResTy, ValueType OpTy, Operand IdxTy> 5323 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 5324 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 5325 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm), 5326 VectorIndex32:$lane)))]>; 5327 5328 // Inst{19-16} is partially specified depending on the element size. 5329 5330 def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 5331 bits<3> lane; 5332 let Inst{19-17} = lane{2-0}; 5333 } 5334 def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 5335 bits<2> lane; 5336 let Inst{19-18} = lane{1-0}; 5337 } 5338 def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 5339 bits<1> lane; 5340 let Inst{19} = lane{0}; 5341 } 5342 def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 5343 bits<3> lane; 5344 let Inst{19-17} = lane{2-0}; 5345 } 5346 def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 5347 bits<2> lane; 5348 let Inst{19-18} = lane{1-0}; 5349 } 5350 def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 5351 bits<1> lane; 5352 let Inst{19} = lane{0}; 5353 } 5354 5355 def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 5356 (VDUPLN32d DPR:$Vm, imm:$lane)>; 5357 5358 def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 5359 (VDUPLN32q DPR:$Vm, imm:$lane)>; 5360 5361 def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 5362 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 5363 (DSubReg_i8_reg imm:$lane))), 5364 (SubReg_i8_lane imm:$lane)))>; 5365 def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 5366 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 5367 (DSubReg_i16_reg imm:$lane))), 5368 (SubReg_i16_lane imm:$lane)))>; 5369 def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 5370 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 5371 (DSubReg_i32_reg imm:$lane))), 5372 (SubReg_i32_lane imm:$lane)))>; 5373 def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 5374 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 5375 (DSubReg_i32_reg imm:$lane))), 5376 (SubReg_i32_lane imm:$lane)))>; 5377 5378 def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 5379 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; 5380 def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 5381 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; 5382 5383 // VMOVN : Vector Narrowing Move 5384 defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 5385 "vmovn", "i", trunc>; 5386 // VQMOVN : Vector Saturating Narrowing Move 5387 defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 5388 "vqmovn", "s", int_arm_neon_vqmovns>; 5389 defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 5390 "vqmovn", "u", int_arm_neon_vqmovnu>; 5391 defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 5392 "vqmovun", "s", int_arm_neon_vqmovnsu>; 5393 // VMOVL : Vector Lengthening Move 5394 defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 5395 defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 5396 def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 5397 def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 5398 def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 5399 5400 // Vector Conversions. 5401 5402 // VCVT : Vector Convert Between Floating-Point and Integers 5403 def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 5404 v2i32, v2f32, fp_to_sint>; 5405 def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 5406 v2i32, v2f32, fp_to_uint>; 5407 def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 5408 v2f32, v2i32, sint_to_fp>; 5409 def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 5410 v2f32, v2i32, uint_to_fp>; 5411 5412 def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 5413 v4i32, v4f32, fp_to_sint>; 5414 def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 5415 v4i32, v4f32, fp_to_uint>; 5416 def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 5417 v4f32, v4i32, sint_to_fp>; 5418 def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 5419 v4f32, v4i32, uint_to_fp>; 5420 5421 // VCVT{A, N, P, M} 5422 multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, 5423 SDPatternOperator IntU> { 5424 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5425 def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 5426 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; 5427 def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 5428 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; 5429 def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 5430 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; 5431 def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 5432 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; 5433 } 5434 } 5435 5436 defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; 5437 defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; 5438 defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; 5439 defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; 5440 5441 // VCVT : Vector Convert Between Floating-Point and Fixed-Point. 5442 let DecoderMethod = "DecodeVCVTD" in { 5443 def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 5444 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 5445 def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 5446 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 5447 def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 5448 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 5449 def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 5450 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 5451 } 5452 5453 let DecoderMethod = "DecodeVCVTQ" in { 5454 def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 5455 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 5456 def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 5457 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 5458 def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 5459 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 5460 def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 5461 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 5462 } 5463 5464 // VCVT : Vector Convert Between Half-Precision and Single-Precision. 5465 def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 5466 IIC_VUNAQ, "vcvt", "f16.f32", 5467 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 5468 Requires<[HasNEON, HasFP16]>; 5469 def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 5470 IIC_VUNAQ, "vcvt", "f32.f16", 5471 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 5472 Requires<[HasNEON, HasFP16]>; 5473 5474 // Vector Reverse. 5475 5476 // VREV64 : Vector Reverse elements within 64-bit doublewords 5477 5478 class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5479 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 5480 (ins DPR:$Vm), IIC_VMOVD, 5481 OpcodeStr, Dt, "$Vd, $Vm", "", 5482 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>; 5483 class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5484 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 5485 (ins QPR:$Vm), IIC_VMOVQ, 5486 OpcodeStr, Dt, "$Vd, $Vm", "", 5487 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>; 5488 5489 def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 5490 def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 5491 def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 5492 def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 5493 5494 def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 5495 def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 5496 def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 5497 def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; 5498 5499 // VREV32 : Vector Reverse elements within 32-bit words 5500 5501 class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5502 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 5503 (ins DPR:$Vm), IIC_VMOVD, 5504 OpcodeStr, Dt, "$Vd, $Vm", "", 5505 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>; 5506 class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5507 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 5508 (ins QPR:$Vm), IIC_VMOVQ, 5509 OpcodeStr, Dt, "$Vd, $Vm", "", 5510 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>; 5511 5512 def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 5513 def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 5514 5515 def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 5516 def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 5517 5518 // VREV16 : Vector Reverse elements within 16-bit halfwords 5519 5520 class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5521 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 5522 (ins DPR:$Vm), IIC_VMOVD, 5523 OpcodeStr, Dt, "$Vd, $Vm", "", 5524 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>; 5525 class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5526 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 5527 (ins QPR:$Vm), IIC_VMOVQ, 5528 OpcodeStr, Dt, "$Vd, $Vm", "", 5529 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>; 5530 5531 def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 5532 def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 5533 5534 // Other Vector Shuffles. 5535 5536 // Aligned extractions: really just dropping registers 5537 5538 class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 5539 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 5540 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>; 5541 5542 def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 5543 5544 def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 5545 5546 def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 5547 5548 def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 5549 5550 def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 5551 5552 5553 // VEXT : Vector Extract 5554 5555 5556 // All of these have a two-operand InstAlias. 5557 let TwoOperandAliasConstraint = "$Vn = $Vd" in { 5558 class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 5559 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 5560 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 5561 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 5562 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 5563 (Ty DPR:$Vm), imm:$index)))]> { 5564 bits<3> index; 5565 let Inst{11} = 0b0; 5566 let Inst{10-8} = index{2-0}; 5567 } 5568 5569 class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 5570 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 5571 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 5572 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 5573 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 5574 (Ty QPR:$Vm), imm:$index)))]> { 5575 bits<4> index; 5576 let Inst{11-8} = index{3-0}; 5577 } 5578 } 5579 5580 def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 5581 let Inst{10-8} = index{2-0}; 5582 } 5583 def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 5584 let Inst{10-9} = index{1-0}; 5585 let Inst{8} = 0b0; 5586 } 5587 def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 5588 let Inst{10} = index{0}; 5589 let Inst{9-8} = 0b00; 5590 } 5591 def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), 5592 (v2f32 DPR:$Vm), 5593 (i32 imm:$index))), 5594 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 5595 5596 def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 5597 let Inst{11-8} = index{3-0}; 5598 } 5599 def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 5600 let Inst{11-9} = index{2-0}; 5601 let Inst{8} = 0b0; 5602 } 5603 def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 5604 let Inst{11-10} = index{1-0}; 5605 let Inst{9-8} = 0b00; 5606 } 5607 def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 5608 let Inst{11} = index{0}; 5609 let Inst{10-8} = 0b000; 5610 } 5611 def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), 5612 (v4f32 QPR:$Vm), 5613 (i32 imm:$index))), 5614 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 5615 5616 // VTRN : Vector Transpose 5617 5618 def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 5619 def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 5620 def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 5621 5622 def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 5623 def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 5624 def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 5625 5626 // VUZP : Vector Unzip (Deinterleave) 5627 5628 def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 5629 def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 5630 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 5631 def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 5632 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 5633 5634 def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 5635 def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 5636 def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 5637 5638 // VZIP : Vector Zip (Interleave) 5639 5640 def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 5641 def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 5642 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 5643 def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 5644 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 5645 5646 def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 5647 def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 5648 def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 5649 5650 // Vector Table Lookup and Table Extension. 5651 5652 // VTBL : Vector Table Lookup 5653 let DecoderMethod = "DecodeTBLInstruction" in { 5654 def VTBL1 5655 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 5656 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 5657 "vtbl", "8", "$Vd, $Vn, $Vm", "", 5658 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 5659 let hasExtraSrcRegAllocReq = 1 in { 5660 def VTBL2 5661 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 5662 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 5663 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5664 def VTBL3 5665 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 5666 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 5667 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5668 def VTBL4 5669 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 5670 (ins VecListFourD:$Vn, DPR:$Vm), 5671 NVTBLFrm, IIC_VTB4, 5672 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5673 } // hasExtraSrcRegAllocReq = 1 5674 5675 def VTBL3Pseudo 5676 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 5677 def VTBL4Pseudo 5678 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 5679 5680 // VTBX : Vector Table Extension 5681 def VTBX1 5682 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 5683 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 5684 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 5685 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 5686 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 5687 let hasExtraSrcRegAllocReq = 1 in { 5688 def VTBX2 5689 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 5690 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 5691 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 5692 def VTBX3 5693 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 5694 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 5695 NVTBLFrm, IIC_VTBX3, 5696 "vtbx", "8", "$Vd, $Vn, $Vm", 5697 "$orig = $Vd", []>; 5698 def VTBX4 5699 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 5700 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 5701 "vtbx", "8", "$Vd, $Vn, $Vm", 5702 "$orig = $Vd", []>; 5703 } // hasExtraSrcRegAllocReq = 1 5704 5705 def VTBX3Pseudo 5706 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 5707 IIC_VTBX3, "$orig = $dst", []>; 5708 def VTBX4Pseudo 5709 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 5710 IIC_VTBX4, "$orig = $dst", []>; 5711 } // DecoderMethod = "DecodeTBLInstruction" 5712 5713 // VRINT : Vector Rounding 5714 multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { 5715 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5716 def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary, 5717 !strconcat("vrint", op), "f32", 5718 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { 5719 let Inst{9-7} = op9_7; 5720 } 5721 def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary, 5722 !strconcat("vrint", op), "f32", 5723 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { 5724 let Inst{9-7} = op9_7; 5725 } 5726 } 5727 5728 def : InstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), 5729 (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>; 5730 def : InstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), 5731 (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>; 5732 } 5733 5734 defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; 5735 defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; 5736 defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; 5737 defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; 5738 defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; 5739 defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; 5740 5741 //===----------------------------------------------------------------------===// 5742 // NEON instructions for single-precision FP math 5743 //===----------------------------------------------------------------------===// 5744 5745 class N2VSPat<SDNode OpNode, NeonI Inst> 5746 : NEONFPPat<(f32 (OpNode SPR:$a)), 5747 (EXTRACT_SUBREG 5748 (v2f32 (COPY_TO_REGCLASS (Inst 5749 (INSERT_SUBREG 5750 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5751 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 5752 5753 class N3VSPat<SDNode OpNode, NeonI Inst> 5754 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 5755 (EXTRACT_SUBREG 5756 (v2f32 (COPY_TO_REGCLASS (Inst 5757 (INSERT_SUBREG 5758 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5759 SPR:$a, ssub_0), 5760 (INSERT_SUBREG 5761 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5762 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 5763 5764 class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 5765 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 5766 (EXTRACT_SUBREG 5767 (v2f32 (COPY_TO_REGCLASS (Inst 5768 (INSERT_SUBREG 5769 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5770 SPR:$acc, ssub_0), 5771 (INSERT_SUBREG 5772 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5773 SPR:$a, ssub_0), 5774 (INSERT_SUBREG 5775 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5776 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 5777 5778 def : N3VSPat<fadd, VADDfd>; 5779 def : N3VSPat<fsub, VSUBfd>; 5780 def : N3VSPat<fmul, VMULfd>; 5781 def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 5782 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 5783 def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 5784 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 5785 def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 5786 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 5787 def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 5788 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 5789 def : N2VSPat<fabs, VABSfd>; 5790 def : N2VSPat<fneg, VNEGfd>; 5791 def : N3VSPat<NEONfmax, VMAXfd>; 5792 def : N3VSPat<NEONfmin, VMINfd>; 5793 def : N2VSPat<arm_ftosi, VCVTf2sd>; 5794 def : N2VSPat<arm_ftoui, VCVTf2ud>; 5795 def : N2VSPat<arm_sitof, VCVTs2fd>; 5796 def : N2VSPat<arm_uitof, VCVTu2fd>; 5797 5798 // Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 5799 def : Pat<(f32 (bitconvert GPR:$a)), 5800 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 5801 Requires<[HasNEON, DontUseVMOVSR]>; 5802 5803 //===----------------------------------------------------------------------===// 5804 // Non-Instruction Patterns 5805 //===----------------------------------------------------------------------===// 5806 5807 // bit_convert 5808 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 5809 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 5810 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 5811 def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 5812 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 5813 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 5814 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 5815 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 5816 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 5817 def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 5818 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 5819 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 5820 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 5821 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 5822 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 5823 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 5824 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 5825 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 5826 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 5827 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 5828 def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 5829 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 5830 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 5831 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 5832 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 5833 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 5834 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 5835 def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 5836 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 5837 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 5838 5839 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 5840 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 5841 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 5842 def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 5843 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 5844 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 5845 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 5846 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 5847 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 5848 def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 5849 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 5850 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 5851 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 5852 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 5853 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 5854 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 5855 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 5856 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 5857 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 5858 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 5859 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 5860 def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 5861 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 5862 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 5863 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 5864 def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 5865 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 5866 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 5867 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 5868 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 5869 5870 // Fold extracting an element out of a v2i32 into a vfp register. 5871 def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 5872 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 5873 5874 // Vector lengthening move with load, matching extending loads. 5875 5876 // extload, zextload and sextload for a standard lengthening load. Example: 5877 // Lengthen_Single<"8", "i16", "8"> = 5878 // Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 5879 // (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 5880 // (f64 (IMPLICIT_DEF)), (i32 0)))>; 5881 multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 5882 let AddedComplexity = 10 in { 5883 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5884 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 5885 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 5886 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 5887 5888 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5889 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 5890 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 5891 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 5892 5893 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5894 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 5895 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 5896 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 5897 } 5898 } 5899 5900 // extload, zextload and sextload for a lengthening load which only uses 5901 // half the lanes available. Example: 5902 // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 5903 // Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 5904 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 5905 // (f64 (IMPLICIT_DEF)), (i32 0))), 5906 // dsub_0)>; 5907 multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 5908 string InsnLanes, string InsnTy> { 5909 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5910 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 5911 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 5912 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5913 dsub_0)>; 5914 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5915 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 5916 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 5917 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5918 dsub_0)>; 5919 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5920 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 5921 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 5922 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5923 dsub_0)>; 5924 } 5925 5926 // extload, zextload and sextload for a lengthening load followed by another 5927 // lengthening load, to quadruple the initial length. 5928 // 5929 // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 5930 // Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 5931 // (EXTRACT_SUBREG (VMOVLuv4i32 5932 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 5933 // (f64 (IMPLICIT_DEF)), 5934 // (i32 0))), 5935 // dsub_0)), 5936 // dsub_0)>; 5937 multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 5938 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 5939 string Insn2Ty> { 5940 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5941 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 5942 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 5943 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 5944 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5945 dsub_0))>; 5946 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5947 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 5948 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 5949 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 5950 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5951 dsub_0))>; 5952 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5953 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 5954 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 5955 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 5956 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5957 dsub_0))>; 5958 } 5959 5960 // extload, zextload and sextload for a lengthening load followed by another 5961 // lengthening load, to quadruple the initial length, but which ends up only 5962 // requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 5963 // 5964 // Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 5965 // Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 5966 // (EXTRACT_SUBREG (VMOVLuv4i32 5967 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 5968 // (f64 (IMPLICIT_DEF)), (i32 0))), 5969 // dsub_0)), 5970 // dsub_0)>; 5971 multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 5972 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 5973 string Insn2Ty> { 5974 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5975 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 5976 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 5977 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 5978 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5979 dsub_0)), 5980 dsub_0)>; 5981 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5982 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 5983 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 5984 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 5985 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5986 dsub_0)), 5987 dsub_0)>; 5988 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5989 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 5990 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 5991 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 5992 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5993 dsub_0)), 5994 dsub_0)>; 5995 } 5996 5997 defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 5998 defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 5999 defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 6000 6001 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 6002 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 6003 6004 // Double lengthening - v4i8 -> v4i16 -> v4i32 6005 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 6006 // v2i8 -> v2i16 -> v2i32 6007 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 6008 // v2i16 -> v2i32 -> v2i64 6009 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 6010 6011 // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 6012 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 6013 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 6014 (VLD1LNd16 addrmode6:$addr, 6015 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6016 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 6017 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 6018 (VLD1LNd16 addrmode6:$addr, 6019 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6020 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 6021 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 6022 (VLD1LNd16 addrmode6:$addr, 6023 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6024 6025 //===----------------------------------------------------------------------===// 6026 // Assembler aliases 6027 // 6028 6029 def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 6030 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 6031 def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 6032 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 6033 6034 // VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 6035 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 6036 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6037 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 6038 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6039 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 6040 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6041 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 6042 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6043 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 6044 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6045 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 6046 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6047 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 6048 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6049 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 6050 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6051 // ... two-operand aliases 6052 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 6053 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6054 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 6055 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6056 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 6057 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6058 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 6059 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6060 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 6061 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6062 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 6063 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6064 6065 // VLD1 single-lane pseudo-instructions. These need special handling for 6066 // the lane index that an InstAlias can't handle, so we use these instead. 6067 def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 6068 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6069 def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 6070 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6071 def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 6072 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6073 6074 def VLD1LNdWB_fixed_Asm_8 : 6075 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 6076 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6077 def VLD1LNdWB_fixed_Asm_16 : 6078 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 6079 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6080 def VLD1LNdWB_fixed_Asm_32 : 6081 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 6082 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6083 def VLD1LNdWB_register_Asm_8 : 6084 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 6085 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 6086 rGPR:$Rm, pred:$p)>; 6087 def VLD1LNdWB_register_Asm_16 : 6088 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 6089 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, 6090 rGPR:$Rm, pred:$p)>; 6091 def VLD1LNdWB_register_Asm_32 : 6092 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 6093 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, 6094 rGPR:$Rm, pred:$p)>; 6095 6096 6097 // VST1 single-lane pseudo-instructions. These need special handling for 6098 // the lane index that an InstAlias can't handle, so we use these instead. 6099 def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 6100 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6101 def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 6102 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6103 def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 6104 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6105 6106 def VST1LNdWB_fixed_Asm_8 : 6107 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 6108 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6109 def VST1LNdWB_fixed_Asm_16 : 6110 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 6111 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6112 def VST1LNdWB_fixed_Asm_32 : 6113 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 6114 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6115 def VST1LNdWB_register_Asm_8 : 6116 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 6117 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 6118 rGPR:$Rm, pred:$p)>; 6119 def VST1LNdWB_register_Asm_16 : 6120 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 6121 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, 6122 rGPR:$Rm, pred:$p)>; 6123 def VST1LNdWB_register_Asm_32 : 6124 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 6125 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, 6126 rGPR:$Rm, pred:$p)>; 6127 6128 // VLD2 single-lane pseudo-instructions. These need special handling for 6129 // the lane index that an InstAlias can't handle, so we use these instead. 6130 def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 6131 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6132 def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 6133 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6134 def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 6135 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6136 def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 6137 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6138 def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 6139 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6140 6141 def VLD2LNdWB_fixed_Asm_8 : 6142 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 6143 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6144 def VLD2LNdWB_fixed_Asm_16 : 6145 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 6146 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6147 def VLD2LNdWB_fixed_Asm_32 : 6148 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 6149 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6150 def VLD2LNqWB_fixed_Asm_16 : 6151 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 6152 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6153 def VLD2LNqWB_fixed_Asm_32 : 6154 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 6155 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6156 def VLD2LNdWB_register_Asm_8 : 6157 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 6158 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, 6159 rGPR:$Rm, pred:$p)>; 6160 def VLD2LNdWB_register_Asm_16 : 6161 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 6162 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, 6163 rGPR:$Rm, pred:$p)>; 6164 def VLD2LNdWB_register_Asm_32 : 6165 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 6166 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, 6167 rGPR:$Rm, pred:$p)>; 6168 def VLD2LNqWB_register_Asm_16 : 6169 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 6170 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, 6171 rGPR:$Rm, pred:$p)>; 6172 def VLD2LNqWB_register_Asm_32 : 6173 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 6174 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, 6175 rGPR:$Rm, pred:$p)>; 6176 6177 6178 // VST2 single-lane pseudo-instructions. These need special handling for 6179 // the lane index that an InstAlias can't handle, so we use these instead. 6180 def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 6181 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6182 def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 6183 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6184 def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 6185 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6186 def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 6187 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6188 def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 6189 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6190 6191 def VST2LNdWB_fixed_Asm_8 : 6192 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 6193 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6194 def VST2LNdWB_fixed_Asm_16 : 6195 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 6196 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6197 def VST2LNdWB_fixed_Asm_32 : 6198 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 6199 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6200 def VST2LNqWB_fixed_Asm_16 : 6201 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 6202 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6203 def VST2LNqWB_fixed_Asm_32 : 6204 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 6205 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6206 def VST2LNdWB_register_Asm_8 : 6207 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 6208 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, 6209 rGPR:$Rm, pred:$p)>; 6210 def VST2LNdWB_register_Asm_16 : 6211 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 6212 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, 6213 rGPR:$Rm, pred:$p)>; 6214 def VST2LNdWB_register_Asm_32 : 6215 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 6216 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, 6217 rGPR:$Rm, pred:$p)>; 6218 def VST2LNqWB_register_Asm_16 : 6219 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 6220 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, 6221 rGPR:$Rm, pred:$p)>; 6222 def VST2LNqWB_register_Asm_32 : 6223 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 6224 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, 6225 rGPR:$Rm, pred:$p)>; 6226 6227 // VLD3 all-lanes pseudo-instructions. These need special handling for 6228 // the lane index that an InstAlias can't handle, so we use these instead. 6229 def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6230 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6231 def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6232 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6233 def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6234 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6235 def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6236 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6237 def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6238 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6239 def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6240 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6241 6242 def VLD3DUPdWB_fixed_Asm_8 : 6243 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6244 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6245 def VLD3DUPdWB_fixed_Asm_16 : 6246 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6247 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6248 def VLD3DUPdWB_fixed_Asm_32 : 6249 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6250 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6251 def VLD3DUPqWB_fixed_Asm_8 : 6252 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6253 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6254 def VLD3DUPqWB_fixed_Asm_16 : 6255 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6256 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6257 def VLD3DUPqWB_fixed_Asm_32 : 6258 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6259 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6260 def VLD3DUPdWB_register_Asm_8 : 6261 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6262 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6263 rGPR:$Rm, pred:$p)>; 6264 def VLD3DUPdWB_register_Asm_16 : 6265 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6266 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6267 rGPR:$Rm, pred:$p)>; 6268 def VLD3DUPdWB_register_Asm_32 : 6269 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6270 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6271 rGPR:$Rm, pred:$p)>; 6272 def VLD3DUPqWB_register_Asm_8 : 6273 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6274 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6275 rGPR:$Rm, pred:$p)>; 6276 def VLD3DUPqWB_register_Asm_16 : 6277 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6278 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6279 rGPR:$Rm, pred:$p)>; 6280 def VLD3DUPqWB_register_Asm_32 : 6281 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6282 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6283 rGPR:$Rm, pred:$p)>; 6284 6285 6286 // VLD3 single-lane pseudo-instructions. These need special handling for 6287 // the lane index that an InstAlias can't handle, so we use these instead. 6288 def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6289 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6290 def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6291 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6292 def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6293 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6294 def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6295 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6296 def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6297 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6298 6299 def VLD3LNdWB_fixed_Asm_8 : 6300 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6301 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6302 def VLD3LNdWB_fixed_Asm_16 : 6303 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6304 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6305 def VLD3LNdWB_fixed_Asm_32 : 6306 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6307 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6308 def VLD3LNqWB_fixed_Asm_16 : 6309 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6310 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6311 def VLD3LNqWB_fixed_Asm_32 : 6312 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6313 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6314 def VLD3LNdWB_register_Asm_8 : 6315 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6316 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, 6317 rGPR:$Rm, pred:$p)>; 6318 def VLD3LNdWB_register_Asm_16 : 6319 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6320 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, 6321 rGPR:$Rm, pred:$p)>; 6322 def VLD3LNdWB_register_Asm_32 : 6323 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6324 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, 6325 rGPR:$Rm, pred:$p)>; 6326 def VLD3LNqWB_register_Asm_16 : 6327 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6328 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, 6329 rGPR:$Rm, pred:$p)>; 6330 def VLD3LNqWB_register_Asm_32 : 6331 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6332 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, 6333 rGPR:$Rm, pred:$p)>; 6334 6335 // VLD3 multiple structure pseudo-instructions. These need special handling for 6336 // the vector operands that the normal instructions don't yet model. 6337 // FIXME: Remove these when the register classes and instructions are updated. 6338 def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6339 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6340 def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6341 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6342 def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6343 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6344 def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6345 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6346 def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6347 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6348 def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6349 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6350 6351 def VLD3dWB_fixed_Asm_8 : 6352 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6353 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6354 def VLD3dWB_fixed_Asm_16 : 6355 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6356 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6357 def VLD3dWB_fixed_Asm_32 : 6358 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6359 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6360 def VLD3qWB_fixed_Asm_8 : 6361 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6362 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6363 def VLD3qWB_fixed_Asm_16 : 6364 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6365 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6366 def VLD3qWB_fixed_Asm_32 : 6367 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6368 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6369 def VLD3dWB_register_Asm_8 : 6370 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6371 (ins VecListThreeD:$list, addrmode6:$addr, 6372 rGPR:$Rm, pred:$p)>; 6373 def VLD3dWB_register_Asm_16 : 6374 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6375 (ins VecListThreeD:$list, addrmode6:$addr, 6376 rGPR:$Rm, pred:$p)>; 6377 def VLD3dWB_register_Asm_32 : 6378 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6379 (ins VecListThreeD:$list, addrmode6:$addr, 6380 rGPR:$Rm, pred:$p)>; 6381 def VLD3qWB_register_Asm_8 : 6382 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6383 (ins VecListThreeQ:$list, addrmode6:$addr, 6384 rGPR:$Rm, pred:$p)>; 6385 def VLD3qWB_register_Asm_16 : 6386 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6387 (ins VecListThreeQ:$list, addrmode6:$addr, 6388 rGPR:$Rm, pred:$p)>; 6389 def VLD3qWB_register_Asm_32 : 6390 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6391 (ins VecListThreeQ:$list, addrmode6:$addr, 6392 rGPR:$Rm, pred:$p)>; 6393 6394 // VST3 single-lane pseudo-instructions. These need special handling for 6395 // the lane index that an InstAlias can't handle, so we use these instead. 6396 def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6397 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6398 def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6399 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6400 def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6401 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6402 def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6403 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6404 def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6405 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6406 6407 def VST3LNdWB_fixed_Asm_8 : 6408 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6409 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6410 def VST3LNdWB_fixed_Asm_16 : 6411 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6412 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6413 def VST3LNdWB_fixed_Asm_32 : 6414 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6415 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6416 def VST3LNqWB_fixed_Asm_16 : 6417 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6418 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6419 def VST3LNqWB_fixed_Asm_32 : 6420 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6421 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6422 def VST3LNdWB_register_Asm_8 : 6423 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6424 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, 6425 rGPR:$Rm, pred:$p)>; 6426 def VST3LNdWB_register_Asm_16 : 6427 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6428 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, 6429 rGPR:$Rm, pred:$p)>; 6430 def VST3LNdWB_register_Asm_32 : 6431 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6432 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, 6433 rGPR:$Rm, pred:$p)>; 6434 def VST3LNqWB_register_Asm_16 : 6435 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6436 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, 6437 rGPR:$Rm, pred:$p)>; 6438 def VST3LNqWB_register_Asm_32 : 6439 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6440 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, 6441 rGPR:$Rm, pred:$p)>; 6442 6443 6444 // VST3 multiple structure pseudo-instructions. These need special handling for 6445 // the vector operands that the normal instructions don't yet model. 6446 // FIXME: Remove these when the register classes and instructions are updated. 6447 def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6448 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6449 def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6450 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6451 def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6452 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6453 def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6454 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6455 def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6456 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6457 def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6458 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6459 6460 def VST3dWB_fixed_Asm_8 : 6461 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6462 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6463 def VST3dWB_fixed_Asm_16 : 6464 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6465 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6466 def VST3dWB_fixed_Asm_32 : 6467 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6468 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6469 def VST3qWB_fixed_Asm_8 : 6470 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6471 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6472 def VST3qWB_fixed_Asm_16 : 6473 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6474 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6475 def VST3qWB_fixed_Asm_32 : 6476 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6477 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6478 def VST3dWB_register_Asm_8 : 6479 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6480 (ins VecListThreeD:$list, addrmode6:$addr, 6481 rGPR:$Rm, pred:$p)>; 6482 def VST3dWB_register_Asm_16 : 6483 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6484 (ins VecListThreeD:$list, addrmode6:$addr, 6485 rGPR:$Rm, pred:$p)>; 6486 def VST3dWB_register_Asm_32 : 6487 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6488 (ins VecListThreeD:$list, addrmode6:$addr, 6489 rGPR:$Rm, pred:$p)>; 6490 def VST3qWB_register_Asm_8 : 6491 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6492 (ins VecListThreeQ:$list, addrmode6:$addr, 6493 rGPR:$Rm, pred:$p)>; 6494 def VST3qWB_register_Asm_16 : 6495 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6496 (ins VecListThreeQ:$list, addrmode6:$addr, 6497 rGPR:$Rm, pred:$p)>; 6498 def VST3qWB_register_Asm_32 : 6499 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6500 (ins VecListThreeQ:$list, addrmode6:$addr, 6501 rGPR:$Rm, pred:$p)>; 6502 6503 // VLD4 all-lanes pseudo-instructions. These need special handling for 6504 // the lane index that an InstAlias can't handle, so we use these instead. 6505 def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6506 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6507 def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6508 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6509 def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6510 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6511 def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6512 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6513 def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6514 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6515 def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6516 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6517 6518 def VLD4DUPdWB_fixed_Asm_8 : 6519 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6520 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6521 def VLD4DUPdWB_fixed_Asm_16 : 6522 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6523 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6524 def VLD4DUPdWB_fixed_Asm_32 : 6525 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6526 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6527 def VLD4DUPqWB_fixed_Asm_8 : 6528 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6529 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6530 def VLD4DUPqWB_fixed_Asm_16 : 6531 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6532 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6533 def VLD4DUPqWB_fixed_Asm_32 : 6534 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6535 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6536 def VLD4DUPdWB_register_Asm_8 : 6537 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6538 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6539 rGPR:$Rm, pred:$p)>; 6540 def VLD4DUPdWB_register_Asm_16 : 6541 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6542 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6543 rGPR:$Rm, pred:$p)>; 6544 def VLD4DUPdWB_register_Asm_32 : 6545 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6546 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6547 rGPR:$Rm, pred:$p)>; 6548 def VLD4DUPqWB_register_Asm_8 : 6549 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6550 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6551 rGPR:$Rm, pred:$p)>; 6552 def VLD4DUPqWB_register_Asm_16 : 6553 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6554 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6555 rGPR:$Rm, pred:$p)>; 6556 def VLD4DUPqWB_register_Asm_32 : 6557 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6558 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6559 rGPR:$Rm, pred:$p)>; 6560 6561 6562 // VLD4 single-lane pseudo-instructions. These need special handling for 6563 // the lane index that an InstAlias can't handle, so we use these instead. 6564 def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6565 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6566 def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6567 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6568 def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6569 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6570 def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6571 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6572 def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6573 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6574 6575 def VLD4LNdWB_fixed_Asm_8 : 6576 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6577 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6578 def VLD4LNdWB_fixed_Asm_16 : 6579 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6580 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6581 def VLD4LNdWB_fixed_Asm_32 : 6582 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6583 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6584 def VLD4LNqWB_fixed_Asm_16 : 6585 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6586 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6587 def VLD4LNqWB_fixed_Asm_32 : 6588 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6589 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6590 def VLD4LNdWB_register_Asm_8 : 6591 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6592 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, 6593 rGPR:$Rm, pred:$p)>; 6594 def VLD4LNdWB_register_Asm_16 : 6595 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6596 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, 6597 rGPR:$Rm, pred:$p)>; 6598 def VLD4LNdWB_register_Asm_32 : 6599 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6600 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, 6601 rGPR:$Rm, pred:$p)>; 6602 def VLD4LNqWB_register_Asm_16 : 6603 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6604 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, 6605 rGPR:$Rm, pred:$p)>; 6606 def VLD4LNqWB_register_Asm_32 : 6607 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6608 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, 6609 rGPR:$Rm, pred:$p)>; 6610 6611 6612 6613 // VLD4 multiple structure pseudo-instructions. These need special handling for 6614 // the vector operands that the normal instructions don't yet model. 6615 // FIXME: Remove these when the register classes and instructions are updated. 6616 def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6617 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6618 def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6619 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6620 def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6621 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6622 def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6623 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6624 def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6625 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6626 def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6627 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6628 6629 def VLD4dWB_fixed_Asm_8 : 6630 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6631 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6632 def VLD4dWB_fixed_Asm_16 : 6633 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6634 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6635 def VLD4dWB_fixed_Asm_32 : 6636 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6637 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6638 def VLD4qWB_fixed_Asm_8 : 6639 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6640 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6641 def VLD4qWB_fixed_Asm_16 : 6642 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6643 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6644 def VLD4qWB_fixed_Asm_32 : 6645 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6646 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6647 def VLD4dWB_register_Asm_8 : 6648 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6649 (ins VecListFourD:$list, addrmode6:$addr, 6650 rGPR:$Rm, pred:$p)>; 6651 def VLD4dWB_register_Asm_16 : 6652 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6653 (ins VecListFourD:$list, addrmode6:$addr, 6654 rGPR:$Rm, pred:$p)>; 6655 def VLD4dWB_register_Asm_32 : 6656 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6657 (ins VecListFourD:$list, addrmode6:$addr, 6658 rGPR:$Rm, pred:$p)>; 6659 def VLD4qWB_register_Asm_8 : 6660 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6661 (ins VecListFourQ:$list, addrmode6:$addr, 6662 rGPR:$Rm, pred:$p)>; 6663 def VLD4qWB_register_Asm_16 : 6664 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6665 (ins VecListFourQ:$list, addrmode6:$addr, 6666 rGPR:$Rm, pred:$p)>; 6667 def VLD4qWB_register_Asm_32 : 6668 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6669 (ins VecListFourQ:$list, addrmode6:$addr, 6670 rGPR:$Rm, pred:$p)>; 6671 6672 // VST4 single-lane pseudo-instructions. These need special handling for 6673 // the lane index that an InstAlias can't handle, so we use these instead. 6674 def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6675 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6676 def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6677 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6678 def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6679 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6680 def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6681 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6682 def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6683 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6684 6685 def VST4LNdWB_fixed_Asm_8 : 6686 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6687 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6688 def VST4LNdWB_fixed_Asm_16 : 6689 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6690 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6691 def VST4LNdWB_fixed_Asm_32 : 6692 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6693 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6694 def VST4LNqWB_fixed_Asm_16 : 6695 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6696 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6697 def VST4LNqWB_fixed_Asm_32 : 6698 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6699 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6700 def VST4LNdWB_register_Asm_8 : 6701 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6702 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, 6703 rGPR:$Rm, pred:$p)>; 6704 def VST4LNdWB_register_Asm_16 : 6705 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6706 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, 6707 rGPR:$Rm, pred:$p)>; 6708 def VST4LNdWB_register_Asm_32 : 6709 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6710 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, 6711 rGPR:$Rm, pred:$p)>; 6712 def VST4LNqWB_register_Asm_16 : 6713 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6714 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, 6715 rGPR:$Rm, pred:$p)>; 6716 def VST4LNqWB_register_Asm_32 : 6717 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6718 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, 6719 rGPR:$Rm, pred:$p)>; 6720 6721 6722 // VST4 multiple structure pseudo-instructions. These need special handling for 6723 // the vector operands that the normal instructions don't yet model. 6724 // FIXME: Remove these when the register classes and instructions are updated. 6725 def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6726 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6727 def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6728 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6729 def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6730 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6731 def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6732 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6733 def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6734 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6735 def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6736 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6737 6738 def VST4dWB_fixed_Asm_8 : 6739 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6740 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6741 def VST4dWB_fixed_Asm_16 : 6742 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6743 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6744 def VST4dWB_fixed_Asm_32 : 6745 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6746 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6747 def VST4qWB_fixed_Asm_8 : 6748 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6749 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6750 def VST4qWB_fixed_Asm_16 : 6751 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6752 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6753 def VST4qWB_fixed_Asm_32 : 6754 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6755 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6756 def VST4dWB_register_Asm_8 : 6757 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6758 (ins VecListFourD:$list, addrmode6:$addr, 6759 rGPR:$Rm, pred:$p)>; 6760 def VST4dWB_register_Asm_16 : 6761 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6762 (ins VecListFourD:$list, addrmode6:$addr, 6763 rGPR:$Rm, pred:$p)>; 6764 def VST4dWB_register_Asm_32 : 6765 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6766 (ins VecListFourD:$list, addrmode6:$addr, 6767 rGPR:$Rm, pred:$p)>; 6768 def VST4qWB_register_Asm_8 : 6769 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6770 (ins VecListFourQ:$list, addrmode6:$addr, 6771 rGPR:$Rm, pred:$p)>; 6772 def VST4qWB_register_Asm_16 : 6773 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6774 (ins VecListFourQ:$list, addrmode6:$addr, 6775 rGPR:$Rm, pred:$p)>; 6776 def VST4qWB_register_Asm_32 : 6777 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6778 (ins VecListFourQ:$list, addrmode6:$addr, 6779 rGPR:$Rm, pred:$p)>; 6780 6781 // VMOV/VMVN takes an optional datatype suffix 6782 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 6783 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 6784 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 6785 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 6786 6787 defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 6788 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; 6789 defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 6790 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; 6791 6792 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 6793 // D-register versions. 6794 def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 6795 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6796 def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 6797 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6798 def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 6799 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6800 def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 6801 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6802 def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 6803 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6804 def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 6805 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6806 def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 6807 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6808 // Q-register versions. 6809 def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 6810 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6811 def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 6812 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6813 def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 6814 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6815 def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 6816 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6817 def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 6818 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6819 def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 6820 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6821 def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 6822 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6823 6824 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 6825 // D-register versions. 6826 def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 6827 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6828 def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 6829 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6830 def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 6831 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6832 def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 6833 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6834 def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 6835 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6836 def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 6837 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6838 def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 6839 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6840 // Q-register versions. 6841 def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 6842 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6843 def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 6844 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6845 def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 6846 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6847 def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 6848 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6849 def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 6850 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6851 def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 6852 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6853 def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 6854 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6855 6856 // VSWP allows, but does not require, a type suffix. 6857 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 6858 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 6859 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 6860 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 6861 6862 // VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 6863 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 6864 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6865 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 6866 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6867 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 6868 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6869 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 6870 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6871 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 6872 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6873 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 6874 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6875 6876 // "vmov Rd, #-imm" can be handled via "vmvn". 6877 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 6878 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 6879 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 6880 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 6881 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 6882 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 6883 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 6884 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 6885 6886 // 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 6887 // these should restrict to just the Q register variants, but the register 6888 // classes are enough to match correctly regardless, so we keep it simple 6889 // and just use MnemonicAlias. 6890 def : NEONMnemonicAlias<"vbicq", "vbic">; 6891 def : NEONMnemonicAlias<"vandq", "vand">; 6892 def : NEONMnemonicAlias<"veorq", "veor">; 6893 def : NEONMnemonicAlias<"vorrq", "vorr">; 6894 6895 def : NEONMnemonicAlias<"vmovq", "vmov">; 6896 def : NEONMnemonicAlias<"vmvnq", "vmvn">; 6897 // Explicit versions for floating point so that the FPImm variants get 6898 // handled early. The parser gets confused otherwise. 6899 def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 6900 def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 6901 6902 def : NEONMnemonicAlias<"vaddq", "vadd">; 6903 def : NEONMnemonicAlias<"vsubq", "vsub">; 6904 6905 def : NEONMnemonicAlias<"vminq", "vmin">; 6906 def : NEONMnemonicAlias<"vmaxq", "vmax">; 6907 6908 def : NEONMnemonicAlias<"vmulq", "vmul">; 6909 6910 def : NEONMnemonicAlias<"vabsq", "vabs">; 6911 6912 def : NEONMnemonicAlias<"vshlq", "vshl">; 6913 def : NEONMnemonicAlias<"vshrq", "vshr">; 6914 6915 def : NEONMnemonicAlias<"vcvtq", "vcvt">; 6916 6917 def : NEONMnemonicAlias<"vcleq", "vcle">; 6918 def : NEONMnemonicAlias<"vceqq", "vceq">; 6919 6920 def : NEONMnemonicAlias<"vzipq", "vzip">; 6921 def : NEONMnemonicAlias<"vswpq", "vswp">; 6922 6923 def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 6924 def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 6925 6926 6927 // Alias for loading floating point immediates that aren't representable 6928 // using the vmov.f32 encoding but the bitpattern is representable using 6929 // the .i32 encoding. 6930 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 6931 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 6932 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 6933 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 6934