1 //===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file describes the ARM NEON instruction set. 11 // 12 //===----------------------------------------------------------------------===// 13 14 15 //===----------------------------------------------------------------------===// 16 // NEON-specific Operands. 17 //===----------------------------------------------------------------------===// 18 def nModImm : Operand<i32> { 19 let PrintMethod = "printNEONModImmOperand"; 20 } 21 22 def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 23 def nImmSplatI8 : Operand<i32> { 24 let PrintMethod = "printNEONModImmOperand"; 25 let ParserMatchClass = nImmSplatI8AsmOperand; 26 } 27 def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 28 def nImmSplatI16 : Operand<i32> { 29 let PrintMethod = "printNEONModImmOperand"; 30 let ParserMatchClass = nImmSplatI16AsmOperand; 31 } 32 def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 33 def nImmSplatI32 : Operand<i32> { 34 let PrintMethod = "printNEONModImmOperand"; 35 let ParserMatchClass = nImmSplatI32AsmOperand; 36 } 37 def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 38 def nImmVMOVI32 : Operand<i32> { 39 let PrintMethod = "printNEONModImmOperand"; 40 let ParserMatchClass = nImmVMOVI32AsmOperand; 41 } 42 def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 43 def nImmVMOVI32Neg : Operand<i32> { 44 let PrintMethod = "printNEONModImmOperand"; 45 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 46 } 47 def nImmVMOVF32 : Operand<i32> { 48 let PrintMethod = "printFPImmOperand"; 49 let ParserMatchClass = FPImmOperand; 50 } 51 def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 52 def nImmSplatI64 : Operand<i32> { 53 let PrintMethod = "printNEONModImmOperand"; 54 let ParserMatchClass = nImmSplatI64AsmOperand; 55 } 56 57 def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 58 def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 59 def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 60 def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 61 return ((uint64_t)Imm) < 8; 62 }]> { 63 let ParserMatchClass = VectorIndex8Operand; 64 let PrintMethod = "printVectorIndex"; 65 let MIOperandInfo = (ops i32imm); 66 } 67 def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 68 return ((uint64_t)Imm) < 4; 69 }]> { 70 let ParserMatchClass = VectorIndex16Operand; 71 let PrintMethod = "printVectorIndex"; 72 let MIOperandInfo = (ops i32imm); 73 } 74 def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 75 return ((uint64_t)Imm) < 2; 76 }]> { 77 let ParserMatchClass = VectorIndex32Operand; 78 let PrintMethod = "printVectorIndex"; 79 let MIOperandInfo = (ops i32imm); 80 } 81 82 // Register list of one D register. 83 def VecListOneDAsmOperand : AsmOperandClass { 84 let Name = "VecListOneD"; 85 let ParserMethod = "parseVectorList"; 86 let RenderMethod = "addVecListOperands"; 87 } 88 def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 89 let ParserMatchClass = VecListOneDAsmOperand; 90 } 91 // Register list of two sequential D registers. 92 def VecListDPairAsmOperand : AsmOperandClass { 93 let Name = "VecListDPair"; 94 let ParserMethod = "parseVectorList"; 95 let RenderMethod = "addVecListOperands"; 96 } 97 def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 98 let ParserMatchClass = VecListDPairAsmOperand; 99 } 100 // Register list of three sequential D registers. 101 def VecListThreeDAsmOperand : AsmOperandClass { 102 let Name = "VecListThreeD"; 103 let ParserMethod = "parseVectorList"; 104 let RenderMethod = "addVecListOperands"; 105 } 106 def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 107 let ParserMatchClass = VecListThreeDAsmOperand; 108 } 109 // Register list of four sequential D registers. 110 def VecListFourDAsmOperand : AsmOperandClass { 111 let Name = "VecListFourD"; 112 let ParserMethod = "parseVectorList"; 113 let RenderMethod = "addVecListOperands"; 114 } 115 def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 116 let ParserMatchClass = VecListFourDAsmOperand; 117 } 118 // Register list of two D registers spaced by 2 (two sequential Q registers). 119 def VecListDPairSpacedAsmOperand : AsmOperandClass { 120 let Name = "VecListDPairSpaced"; 121 let ParserMethod = "parseVectorList"; 122 let RenderMethod = "addVecListOperands"; 123 } 124 def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 125 let ParserMatchClass = VecListDPairSpacedAsmOperand; 126 } 127 // Register list of three D registers spaced by 2 (three Q registers). 128 def VecListThreeQAsmOperand : AsmOperandClass { 129 let Name = "VecListThreeQ"; 130 let ParserMethod = "parseVectorList"; 131 let RenderMethod = "addVecListOperands"; 132 } 133 def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 134 let ParserMatchClass = VecListThreeQAsmOperand; 135 } 136 // Register list of three D registers spaced by 2 (three Q registers). 137 def VecListFourQAsmOperand : AsmOperandClass { 138 let Name = "VecListFourQ"; 139 let ParserMethod = "parseVectorList"; 140 let RenderMethod = "addVecListOperands"; 141 } 142 def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 143 let ParserMatchClass = VecListFourQAsmOperand; 144 } 145 146 // Register list of one D register, with "all lanes" subscripting. 147 def VecListOneDAllLanesAsmOperand : AsmOperandClass { 148 let Name = "VecListOneDAllLanes"; 149 let ParserMethod = "parseVectorList"; 150 let RenderMethod = "addVecListOperands"; 151 } 152 def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 153 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 154 } 155 // Register list of two D registers, with "all lanes" subscripting. 156 def VecListDPairAllLanesAsmOperand : AsmOperandClass { 157 let Name = "VecListDPairAllLanes"; 158 let ParserMethod = "parseVectorList"; 159 let RenderMethod = "addVecListOperands"; 160 } 161 def VecListDPairAllLanes : RegisterOperand<DPair, 162 "printVectorListTwoAllLanes"> { 163 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 164 } 165 // Register list of two D registers spaced by 2 (two sequential Q registers). 166 def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 167 let Name = "VecListDPairSpacedAllLanes"; 168 let ParserMethod = "parseVectorList"; 169 let RenderMethod = "addVecListOperands"; 170 } 171 def VecListDPairSpacedAllLanes : RegisterOperand<DPair, 172 "printVectorListTwoSpacedAllLanes"> { 173 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 174 } 175 // Register list of three D registers, with "all lanes" subscripting. 176 def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 177 let Name = "VecListThreeDAllLanes"; 178 let ParserMethod = "parseVectorList"; 179 let RenderMethod = "addVecListOperands"; 180 } 181 def VecListThreeDAllLanes : RegisterOperand<DPR, 182 "printVectorListThreeAllLanes"> { 183 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 184 } 185 // Register list of three D registers spaced by 2 (three sequential Q regs). 186 def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 187 let Name = "VecListThreeQAllLanes"; 188 let ParserMethod = "parseVectorList"; 189 let RenderMethod = "addVecListOperands"; 190 } 191 def VecListThreeQAllLanes : RegisterOperand<DPR, 192 "printVectorListThreeSpacedAllLanes"> { 193 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 194 } 195 // Register list of four D registers, with "all lanes" subscripting. 196 def VecListFourDAllLanesAsmOperand : AsmOperandClass { 197 let Name = "VecListFourDAllLanes"; 198 let ParserMethod = "parseVectorList"; 199 let RenderMethod = "addVecListOperands"; 200 } 201 def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 202 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 203 } 204 // Register list of four D registers spaced by 2 (four sequential Q regs). 205 def VecListFourQAllLanesAsmOperand : AsmOperandClass { 206 let Name = "VecListFourQAllLanes"; 207 let ParserMethod = "parseVectorList"; 208 let RenderMethod = "addVecListOperands"; 209 } 210 def VecListFourQAllLanes : RegisterOperand<DPR, 211 "printVectorListFourSpacedAllLanes"> { 212 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 213 } 214 215 216 // Register list of one D register, with byte lane subscripting. 217 def VecListOneDByteIndexAsmOperand : AsmOperandClass { 218 let Name = "VecListOneDByteIndexed"; 219 let ParserMethod = "parseVectorList"; 220 let RenderMethod = "addVecListIndexedOperands"; 221 } 222 def VecListOneDByteIndexed : Operand<i32> { 223 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 224 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 225 } 226 // ...with half-word lane subscripting. 227 def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 228 let Name = "VecListOneDHWordIndexed"; 229 let ParserMethod = "parseVectorList"; 230 let RenderMethod = "addVecListIndexedOperands"; 231 } 232 def VecListOneDHWordIndexed : Operand<i32> { 233 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 234 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 235 } 236 // ...with word lane subscripting. 237 def VecListOneDWordIndexAsmOperand : AsmOperandClass { 238 let Name = "VecListOneDWordIndexed"; 239 let ParserMethod = "parseVectorList"; 240 let RenderMethod = "addVecListIndexedOperands"; 241 } 242 def VecListOneDWordIndexed : Operand<i32> { 243 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 244 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 245 } 246 247 // Register list of two D registers with byte lane subscripting. 248 def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 249 let Name = "VecListTwoDByteIndexed"; 250 let ParserMethod = "parseVectorList"; 251 let RenderMethod = "addVecListIndexedOperands"; 252 } 253 def VecListTwoDByteIndexed : Operand<i32> { 254 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 255 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 256 } 257 // ...with half-word lane subscripting. 258 def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 259 let Name = "VecListTwoDHWordIndexed"; 260 let ParserMethod = "parseVectorList"; 261 let RenderMethod = "addVecListIndexedOperands"; 262 } 263 def VecListTwoDHWordIndexed : Operand<i32> { 264 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 265 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 266 } 267 // ...with word lane subscripting. 268 def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 269 let Name = "VecListTwoDWordIndexed"; 270 let ParserMethod = "parseVectorList"; 271 let RenderMethod = "addVecListIndexedOperands"; 272 } 273 def VecListTwoDWordIndexed : Operand<i32> { 274 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 275 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 276 } 277 // Register list of two Q registers with half-word lane subscripting. 278 def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 279 let Name = "VecListTwoQHWordIndexed"; 280 let ParserMethod = "parseVectorList"; 281 let RenderMethod = "addVecListIndexedOperands"; 282 } 283 def VecListTwoQHWordIndexed : Operand<i32> { 284 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 285 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 286 } 287 // ...with word lane subscripting. 288 def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 289 let Name = "VecListTwoQWordIndexed"; 290 let ParserMethod = "parseVectorList"; 291 let RenderMethod = "addVecListIndexedOperands"; 292 } 293 def VecListTwoQWordIndexed : Operand<i32> { 294 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 296 } 297 298 299 // Register list of three D registers with byte lane subscripting. 300 def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 301 let Name = "VecListThreeDByteIndexed"; 302 let ParserMethod = "parseVectorList"; 303 let RenderMethod = "addVecListIndexedOperands"; 304 } 305 def VecListThreeDByteIndexed : Operand<i32> { 306 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 307 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 308 } 309 // ...with half-word lane subscripting. 310 def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 311 let Name = "VecListThreeDHWordIndexed"; 312 let ParserMethod = "parseVectorList"; 313 let RenderMethod = "addVecListIndexedOperands"; 314 } 315 def VecListThreeDHWordIndexed : Operand<i32> { 316 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 317 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 318 } 319 // ...with word lane subscripting. 320 def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 321 let Name = "VecListThreeDWordIndexed"; 322 let ParserMethod = "parseVectorList"; 323 let RenderMethod = "addVecListIndexedOperands"; 324 } 325 def VecListThreeDWordIndexed : Operand<i32> { 326 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 327 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 328 } 329 // Register list of three Q registers with half-word lane subscripting. 330 def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 331 let Name = "VecListThreeQHWordIndexed"; 332 let ParserMethod = "parseVectorList"; 333 let RenderMethod = "addVecListIndexedOperands"; 334 } 335 def VecListThreeQHWordIndexed : Operand<i32> { 336 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 337 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 338 } 339 // ...with word lane subscripting. 340 def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 341 let Name = "VecListThreeQWordIndexed"; 342 let ParserMethod = "parseVectorList"; 343 let RenderMethod = "addVecListIndexedOperands"; 344 } 345 def VecListThreeQWordIndexed : Operand<i32> { 346 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 347 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 348 } 349 350 // Register list of four D registers with byte lane subscripting. 351 def VecListFourDByteIndexAsmOperand : AsmOperandClass { 352 let Name = "VecListFourDByteIndexed"; 353 let ParserMethod = "parseVectorList"; 354 let RenderMethod = "addVecListIndexedOperands"; 355 } 356 def VecListFourDByteIndexed : Operand<i32> { 357 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 358 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 359 } 360 // ...with half-word lane subscripting. 361 def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 362 let Name = "VecListFourDHWordIndexed"; 363 let ParserMethod = "parseVectorList"; 364 let RenderMethod = "addVecListIndexedOperands"; 365 } 366 def VecListFourDHWordIndexed : Operand<i32> { 367 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 368 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 369 } 370 // ...with word lane subscripting. 371 def VecListFourDWordIndexAsmOperand : AsmOperandClass { 372 let Name = "VecListFourDWordIndexed"; 373 let ParserMethod = "parseVectorList"; 374 let RenderMethod = "addVecListIndexedOperands"; 375 } 376 def VecListFourDWordIndexed : Operand<i32> { 377 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 378 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 379 } 380 // Register list of four Q registers with half-word lane subscripting. 381 def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 382 let Name = "VecListFourQHWordIndexed"; 383 let ParserMethod = "parseVectorList"; 384 let RenderMethod = "addVecListIndexedOperands"; 385 } 386 def VecListFourQHWordIndexed : Operand<i32> { 387 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 388 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 389 } 390 // ...with word lane subscripting. 391 def VecListFourQWordIndexAsmOperand : AsmOperandClass { 392 let Name = "VecListFourQWordIndexed"; 393 let ParserMethod = "parseVectorList"; 394 let RenderMethod = "addVecListIndexedOperands"; 395 } 396 def VecListFourQWordIndexed : Operand<i32> { 397 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 399 } 400 401 def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 402 return cast<LoadSDNode>(N)->getAlignment() >= 8; 403 }]>; 404 def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 405 (store node:$val, node:$ptr), [{ 406 return cast<StoreSDNode>(N)->getAlignment() >= 8; 407 }]>; 408 def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 409 return cast<LoadSDNode>(N)->getAlignment() == 4; 410 }]>; 411 def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 412 (store node:$val, node:$ptr), [{ 413 return cast<StoreSDNode>(N)->getAlignment() == 4; 414 }]>; 415 def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 416 return cast<LoadSDNode>(N)->getAlignment() == 2; 417 }]>; 418 def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 419 (store node:$val, node:$ptr), [{ 420 return cast<StoreSDNode>(N)->getAlignment() == 2; 421 }]>; 422 def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 423 return cast<LoadSDNode>(N)->getAlignment() == 1; 424 }]>; 425 def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 426 (store node:$val, node:$ptr), [{ 427 return cast<StoreSDNode>(N)->getAlignment() == 1; 428 }]>; 429 def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 430 return cast<LoadSDNode>(N)->getAlignment() < 4; 431 }]>; 432 def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 433 (store node:$val, node:$ptr), [{ 434 return cast<StoreSDNode>(N)->getAlignment() < 4; 435 }]>; 436 437 //===----------------------------------------------------------------------===// 438 // NEON-specific DAG Nodes. 439 //===----------------------------------------------------------------------===// 440 441 def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 442 def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; 443 444 def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 445 def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; 446 def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 447 def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; 448 def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; 449 def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 450 def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 451 def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; 452 def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; 453 def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 454 def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 455 456 // Types for vector shift by immediates. The "SHX" version is for long and 457 // narrow operations where the source and destination vectors have different 458 // types. The "SHINS" version is for shift and insert operations. 459 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 460 SDTCisVT<2, i32>]>; 461 def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 462 SDTCisVT<2, i32>]>; 463 def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 464 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 465 466 def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 467 def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 468 def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 469 def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 470 def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 471 def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 472 def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 473 474 def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 475 def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 476 def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 477 478 def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 479 def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 480 def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 481 def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 482 def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 483 def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 484 485 def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 486 def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 487 def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 488 489 def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 490 def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 491 492 def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 493 SDTCisVT<2, i32>]>; 494 def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 495 def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 496 497 def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 498 def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; 499 def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; 500 def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; 501 502 def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 503 SDTCisVT<2, i32>]>; 504 def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; 505 def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; 506 507 def NEONvbsl : SDNode<"ARMISD::VBSL", 508 SDTypeProfile<1, 3, [SDTCisVec<0>, 509 SDTCisSameAs<0, 1>, 510 SDTCisSameAs<0, 2>, 511 SDTCisSameAs<0, 3>]>>; 512 513 def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 514 515 // VDUPLANE can produce a quad-register result from a double-register source, 516 // so the result is not constrained to match the source. 517 def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 518 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 519 SDTCisVT<2, i32>]>>; 520 521 def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 522 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 523 def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 524 525 def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 526 def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 527 def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 528 def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 529 530 def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 531 SDTCisSameAs<0, 2>, 532 SDTCisSameAs<0, 3>]>; 533 def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 534 def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 535 def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 536 537 def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 538 SDTCisSameAs<1, 2>]>; 539 def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; 540 def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; 541 542 def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, 543 SDTCisSameAs<0, 2>]>; 544 def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; 545 def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; 546 547 def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 548 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 549 unsigned EltBits = 0; 550 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 551 return (EltBits == 32 && EltVal == 0); 552 }]>; 553 554 def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 555 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 556 unsigned EltBits = 0; 557 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 558 return (EltBits == 8 && EltVal == 0xff); 559 }]>; 560 561 //===----------------------------------------------------------------------===// 562 // NEON load / store instructions 563 //===----------------------------------------------------------------------===// 564 565 // Use VLDM to load a Q register as a D register pair. 566 // This is a pseudo instruction that is expanded to VLDMD after reg alloc. 567 def VLDMQIA 568 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 569 IIC_fpLoad_m, "", 570 [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; 571 572 // Use VSTM to store a Q register as a D register pair. 573 // This is a pseudo instruction that is expanded to VSTMD after reg alloc. 574 def VSTMQIA 575 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 576 IIC_fpStore_m, "", 577 [(store (v2f64 DPair:$src), GPR:$Rn)]>; 578 579 // Classes for VLD* pseudo-instructions with multi-register operands. 580 // These are expanded to real instructions after register allocation. 581 class VLDQPseudo<InstrItinClass itin> 582 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 583 class VLDQWBPseudo<InstrItinClass itin> 584 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 585 (ins addrmode6:$addr, am6offset:$offset), itin, 586 "$addr.addr = $wb">; 587 class VLDQWBfixedPseudo<InstrItinClass itin> 588 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 589 (ins addrmode6:$addr), itin, 590 "$addr.addr = $wb">; 591 class VLDQWBregisterPseudo<InstrItinClass itin> 592 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 593 (ins addrmode6:$addr, rGPR:$offset), itin, 594 "$addr.addr = $wb">; 595 596 class VLDQQPseudo<InstrItinClass itin> 597 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 598 class VLDQQWBPseudo<InstrItinClass itin> 599 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 600 (ins addrmode6:$addr, am6offset:$offset), itin, 601 "$addr.addr = $wb">; 602 class VLDQQWBfixedPseudo<InstrItinClass itin> 603 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 604 (ins addrmode6:$addr), itin, 605 "$addr.addr = $wb">; 606 class VLDQQWBregisterPseudo<InstrItinClass itin> 607 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 608 (ins addrmode6:$addr, rGPR:$offset), itin, 609 "$addr.addr = $wb">; 610 611 612 class VLDQQQQPseudo<InstrItinClass itin> 613 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 614 "$src = $dst">; 615 class VLDQQQQWBPseudo<InstrItinClass itin> 616 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 617 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 618 "$addr.addr = $wb, $src = $dst">; 619 620 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 621 622 // VLD1 : Vector Load (multiple single elements) 623 class VLD1D<bits<4> op7_4, string Dt> 624 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 625 (ins addrmode6:$Rn), IIC_VLD1, 626 "vld1", Dt, "$Vd, $Rn", "", []> { 627 let Rm = 0b1111; 628 let Inst{4} = Rn{4}; 629 let DecoderMethod = "DecodeVLDInstruction"; 630 } 631 class VLD1Q<bits<4> op7_4, string Dt> 632 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 633 (ins addrmode6:$Rn), IIC_VLD1x2, 634 "vld1", Dt, "$Vd, $Rn", "", []> { 635 let Rm = 0b1111; 636 let Inst{5-4} = Rn{5-4}; 637 let DecoderMethod = "DecodeVLDInstruction"; 638 } 639 640 def VLD1d8 : VLD1D<{0,0,0,?}, "8">; 641 def VLD1d16 : VLD1D<{0,1,0,?}, "16">; 642 def VLD1d32 : VLD1D<{1,0,0,?}, "32">; 643 def VLD1d64 : VLD1D<{1,1,0,?}, "64">; 644 645 def VLD1q8 : VLD1Q<{0,0,?,?}, "8">; 646 def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; 647 def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; 648 def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; 649 650 // ...with address register writeback: 651 multiclass VLD1DWB<bits<4> op7_4, string Dt> { 652 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 653 (ins addrmode6:$Rn), IIC_VLD1u, 654 "vld1", Dt, "$Vd, $Rn!", 655 "$Rn.addr = $wb", []> { 656 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 657 let Inst{4} = Rn{4}; 658 let DecoderMethod = "DecodeVLDInstruction"; 659 let AsmMatchConverter = "cvtVLDwbFixed"; 660 } 661 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 662 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, 663 "vld1", Dt, "$Vd, $Rn, $Rm", 664 "$Rn.addr = $wb", []> { 665 let Inst{4} = Rn{4}; 666 let DecoderMethod = "DecodeVLDInstruction"; 667 let AsmMatchConverter = "cvtVLDwbRegister"; 668 } 669 } 670 multiclass VLD1QWB<bits<4> op7_4, string Dt> { 671 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 672 (ins addrmode6:$Rn), IIC_VLD1x2u, 673 "vld1", Dt, "$Vd, $Rn!", 674 "$Rn.addr = $wb", []> { 675 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 676 let Inst{5-4} = Rn{5-4}; 677 let DecoderMethod = "DecodeVLDInstruction"; 678 let AsmMatchConverter = "cvtVLDwbFixed"; 679 } 680 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 681 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 682 "vld1", Dt, "$Vd, $Rn, $Rm", 683 "$Rn.addr = $wb", []> { 684 let Inst{5-4} = Rn{5-4}; 685 let DecoderMethod = "DecodeVLDInstruction"; 686 let AsmMatchConverter = "cvtVLDwbRegister"; 687 } 688 } 689 690 defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">; 691 defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">; 692 defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">; 693 defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">; 694 defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">; 695 defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; 696 defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; 697 defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; 698 699 // ...with 3 registers 700 class VLD1D3<bits<4> op7_4, string Dt> 701 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 702 (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, 703 "$Vd, $Rn", "", []> { 704 let Rm = 0b1111; 705 let Inst{4} = Rn{4}; 706 let DecoderMethod = "DecodeVLDInstruction"; 707 } 708 multiclass VLD1D3WB<bits<4> op7_4, string Dt> { 709 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 710 (ins addrmode6:$Rn), IIC_VLD1x2u, 711 "vld1", Dt, "$Vd, $Rn!", 712 "$Rn.addr = $wb", []> { 713 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 714 let Inst{4} = Rn{4}; 715 let DecoderMethod = "DecodeVLDInstruction"; 716 let AsmMatchConverter = "cvtVLDwbFixed"; 717 } 718 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 719 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 720 "vld1", Dt, "$Vd, $Rn, $Rm", 721 "$Rn.addr = $wb", []> { 722 let Inst{4} = Rn{4}; 723 let DecoderMethod = "DecodeVLDInstruction"; 724 let AsmMatchConverter = "cvtVLDwbRegister"; 725 } 726 } 727 728 def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; 729 def VLD1d16T : VLD1D3<{0,1,0,?}, "16">; 730 def VLD1d32T : VLD1D3<{1,0,0,?}, "32">; 731 def VLD1d64T : VLD1D3<{1,1,0,?}, "64">; 732 733 defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">; 734 defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">; 735 defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">; 736 defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">; 737 738 def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; 739 740 // ...with 4 registers 741 class VLD1D4<bits<4> op7_4, string Dt> 742 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 743 (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, 744 "$Vd, $Rn", "", []> { 745 let Rm = 0b1111; 746 let Inst{5-4} = Rn{5-4}; 747 let DecoderMethod = "DecodeVLDInstruction"; 748 } 749 multiclass VLD1D4WB<bits<4> op7_4, string Dt> { 750 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 751 (ins addrmode6:$Rn), IIC_VLD1x2u, 752 "vld1", Dt, "$Vd, $Rn!", 753 "$Rn.addr = $wb", []> { 754 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 755 let Inst{5-4} = Rn{5-4}; 756 let DecoderMethod = "DecodeVLDInstruction"; 757 let AsmMatchConverter = "cvtVLDwbFixed"; 758 } 759 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 760 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 761 "vld1", Dt, "$Vd, $Rn, $Rm", 762 "$Rn.addr = $wb", []> { 763 let Inst{5-4} = Rn{5-4}; 764 let DecoderMethod = "DecodeVLDInstruction"; 765 let AsmMatchConverter = "cvtVLDwbRegister"; 766 } 767 } 768 769 def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; 770 def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; 771 def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; 772 def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; 773 774 defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">; 775 defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">; 776 defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; 777 defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; 778 779 def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; 780 781 // VLD2 : Vector Load (multiple 2-element structures) 782 class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 783 InstrItinClass itin> 784 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 785 (ins addrmode6:$Rn), itin, 786 "vld2", Dt, "$Vd, $Rn", "", []> { 787 let Rm = 0b1111; 788 let Inst{5-4} = Rn{5-4}; 789 let DecoderMethod = "DecodeVLDInstruction"; 790 } 791 792 def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; 793 def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>; 794 def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>; 795 796 def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; 797 def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; 798 def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; 799 800 def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; 801 def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; 802 def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; 803 804 // ...with address register writeback: 805 multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 806 RegisterOperand VdTy, InstrItinClass itin> { 807 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 808 (ins addrmode6:$Rn), itin, 809 "vld2", Dt, "$Vd, $Rn!", 810 "$Rn.addr = $wb", []> { 811 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 812 let Inst{5-4} = Rn{5-4}; 813 let DecoderMethod = "DecodeVLDInstruction"; 814 let AsmMatchConverter = "cvtVLDwbFixed"; 815 } 816 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 817 (ins addrmode6:$Rn, rGPR:$Rm), itin, 818 "vld2", Dt, "$Vd, $Rn, $Rm", 819 "$Rn.addr = $wb", []> { 820 let Inst{5-4} = Rn{5-4}; 821 let DecoderMethod = "DecodeVLDInstruction"; 822 let AsmMatchConverter = "cvtVLDwbRegister"; 823 } 824 } 825 826 defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>; 827 defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>; 828 defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>; 829 830 defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; 831 defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; 832 defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; 833 834 def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 835 def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 836 def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 837 def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 838 def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 839 def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 840 841 // ...with double-spaced registers 842 def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>; 843 def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>; 844 def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>; 845 defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>; 846 defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>; 847 defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>; 848 849 // VLD3 : Vector Load (multiple 3-element structures) 850 class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 851 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 852 (ins addrmode6:$Rn), IIC_VLD3, 853 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { 854 let Rm = 0b1111; 855 let Inst{4} = Rn{4}; 856 let DecoderMethod = "DecodeVLDInstruction"; 857 } 858 859 def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 860 def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 861 def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 862 863 def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>; 864 def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>; 865 def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>; 866 867 // ...with address register writeback: 868 class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 869 : NLdSt<0, 0b10, op11_8, op7_4, 870 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 871 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 872 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 873 "$Rn.addr = $wb", []> { 874 let Inst{4} = Rn{4}; 875 let DecoderMethod = "DecodeVLDInstruction"; 876 } 877 878 def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 879 def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 880 def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 881 882 def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 883 def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 884 def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 885 886 // ...with double-spaced registers: 887 def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 888 def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 889 def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 890 def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 891 def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 892 def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 893 894 def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 895 def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 896 def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 897 898 // ...alternate versions to be allocated odd register numbers: 899 def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 900 def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 901 def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 902 903 def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 904 def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 905 def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 906 907 // VLD4 : Vector Load (multiple 4-element structures) 908 class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 909 : NLdSt<0, 0b10, op11_8, op7_4, 910 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 911 (ins addrmode6:$Rn), IIC_VLD4, 912 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { 913 let Rm = 0b1111; 914 let Inst{5-4} = Rn{5-4}; 915 let DecoderMethod = "DecodeVLDInstruction"; 916 } 917 918 def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 919 def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 920 def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 921 922 def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>; 923 def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>; 924 def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>; 925 926 // ...with address register writeback: 927 class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 928 : NLdSt<0, 0b10, op11_8, op7_4, 929 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 930 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 931 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 932 "$Rn.addr = $wb", []> { 933 let Inst{5-4} = Rn{5-4}; 934 let DecoderMethod = "DecodeVLDInstruction"; 935 } 936 937 def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 938 def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 939 def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 940 941 def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 942 def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 943 def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 944 945 // ...with double-spaced registers: 946 def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 947 def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 948 def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 949 def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 950 def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 951 def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 952 953 def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 954 def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 955 def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 956 957 // ...alternate versions to be allocated odd register numbers: 958 def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 959 def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 960 def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 961 962 def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 963 def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 964 def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 965 966 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 967 968 // Classes for VLD*LN pseudo-instructions with multi-register operands. 969 // These are expanded to real instructions after register allocation. 970 class VLDQLNPseudo<InstrItinClass itin> 971 : PseudoNLdSt<(outs QPR:$dst), 972 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 973 itin, "$src = $dst">; 974 class VLDQLNWBPseudo<InstrItinClass itin> 975 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 976 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 977 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 978 class VLDQQLNPseudo<InstrItinClass itin> 979 : PseudoNLdSt<(outs QQPR:$dst), 980 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 981 itin, "$src = $dst">; 982 class VLDQQLNWBPseudo<InstrItinClass itin> 983 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 984 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 985 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 986 class VLDQQQQLNPseudo<InstrItinClass itin> 987 : PseudoNLdSt<(outs QQQQPR:$dst), 988 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 989 itin, "$src = $dst">; 990 class VLDQQQQLNWBPseudo<InstrItinClass itin> 991 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 992 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 993 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 994 995 // VLD1LN : Vector Load (single element to one lane) 996 class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 997 PatFrag LoadOp> 998 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 999 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 1000 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1001 "$src = $Vd", 1002 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1003 (i32 (LoadOp addrmode6:$Rn)), 1004 imm:$lane))]> { 1005 let Rm = 0b1111; 1006 let DecoderMethod = "DecodeVLD1LN"; 1007 } 1008 class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1009 PatFrag LoadOp> 1010 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1011 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1012 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1013 "$src = $Vd", 1014 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1015 (i32 (LoadOp addrmode6oneL32:$Rn)), 1016 imm:$lane))]> { 1017 let Rm = 0b1111; 1018 let DecoderMethod = "DecodeVLD1LN"; 1019 } 1020 class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { 1021 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1022 (i32 (LoadOp addrmode6:$addr)), 1023 imm:$lane))]; 1024 } 1025 1026 def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1027 let Inst{7-5} = lane{2-0}; 1028 } 1029 def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1030 let Inst{7-6} = lane{1-0}; 1031 let Inst{5-4} = Rn{5-4}; 1032 } 1033 def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1034 let Inst{7} = lane{0}; 1035 let Inst{5-4} = Rn{5-4}; 1036 } 1037 1038 def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1039 def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1040 def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1041 1042 def : Pat<(vector_insert (v2f32 DPR:$src), 1043 (f32 (load addrmode6:$addr)), imm:$lane), 1044 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1045 def : Pat<(vector_insert (v4f32 QPR:$src), 1046 (f32 (load addrmode6:$addr)), imm:$lane), 1047 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1048 1049 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 1050 1051 // ...with address register writeback: 1052 class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1053 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1054 (ins addrmode6:$Rn, am6offset:$Rm, 1055 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1056 "\\{$Vd[$lane]\\}, $Rn$Rm", 1057 "$src = $Vd, $Rn.addr = $wb", []> { 1058 let DecoderMethod = "DecodeVLD1LN"; 1059 } 1060 1061 def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1062 let Inst{7-5} = lane{2-0}; 1063 } 1064 def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1065 let Inst{7-6} = lane{1-0}; 1066 let Inst{4} = Rn{4}; 1067 } 1068 def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1069 let Inst{7} = lane{0}; 1070 let Inst{5} = Rn{4}; 1071 let Inst{4} = Rn{4}; 1072 } 1073 1074 def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1075 def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1076 def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1077 1078 // VLD2LN : Vector Load (single 2-element structure to one lane) 1079 class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1080 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1081 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1082 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1083 "$src1 = $Vd, $src2 = $dst2", []> { 1084 let Rm = 0b1111; 1085 let Inst{4} = Rn{4}; 1086 let DecoderMethod = "DecodeVLD2LN"; 1087 } 1088 1089 def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1090 let Inst{7-5} = lane{2-0}; 1091 } 1092 def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1093 let Inst{7-6} = lane{1-0}; 1094 } 1095 def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1096 let Inst{7} = lane{0}; 1097 } 1098 1099 def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1100 def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1101 def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1102 1103 // ...with double-spaced registers: 1104 def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1105 let Inst{7-6} = lane{1-0}; 1106 } 1107 def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1108 let Inst{7} = lane{0}; 1109 } 1110 1111 def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1112 def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1113 1114 // ...with address register writeback: 1115 class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1116 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1117 (ins addrmode6:$Rn, am6offset:$Rm, 1118 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1119 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1120 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1121 let Inst{4} = Rn{4}; 1122 let DecoderMethod = "DecodeVLD2LN"; 1123 } 1124 1125 def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1126 let Inst{7-5} = lane{2-0}; 1127 } 1128 def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1129 let Inst{7-6} = lane{1-0}; 1130 } 1131 def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1132 let Inst{7} = lane{0}; 1133 } 1134 1135 def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1136 def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1137 def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1138 1139 def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1140 let Inst{7-6} = lane{1-0}; 1141 } 1142 def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1143 let Inst{7} = lane{0}; 1144 } 1145 1146 def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1147 def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1148 1149 // VLD3LN : Vector Load (single 3-element structure to one lane) 1150 class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1151 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1152 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1153 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1154 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1155 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { 1156 let Rm = 0b1111; 1157 let DecoderMethod = "DecodeVLD3LN"; 1158 } 1159 1160 def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1161 let Inst{7-5} = lane{2-0}; 1162 } 1163 def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1164 let Inst{7-6} = lane{1-0}; 1165 } 1166 def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1167 let Inst{7} = lane{0}; 1168 } 1169 1170 def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1171 def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1172 def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1173 1174 // ...with double-spaced registers: 1175 def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1176 let Inst{7-6} = lane{1-0}; 1177 } 1178 def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1179 let Inst{7} = lane{0}; 1180 } 1181 1182 def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1183 def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1184 1185 // ...with address register writeback: 1186 class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1187 : NLdStLn<1, 0b10, op11_8, op7_4, 1188 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1189 (ins addrmode6:$Rn, am6offset:$Rm, 1190 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1191 IIC_VLD3lnu, "vld3", Dt, 1192 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1193 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1194 []> { 1195 let DecoderMethod = "DecodeVLD3LN"; 1196 } 1197 1198 def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1199 let Inst{7-5} = lane{2-0}; 1200 } 1201 def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1202 let Inst{7-6} = lane{1-0}; 1203 } 1204 def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1205 let Inst{7} = lane{0}; 1206 } 1207 1208 def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1209 def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1210 def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1211 1212 def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1213 let Inst{7-6} = lane{1-0}; 1214 } 1215 def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1216 let Inst{7} = lane{0}; 1217 } 1218 1219 def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1220 def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1221 1222 // VLD4LN : Vector Load (single 4-element structure to one lane) 1223 class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1224 : NLdStLn<1, 0b10, op11_8, op7_4, 1225 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1226 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1227 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1228 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1229 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { 1230 let Rm = 0b1111; 1231 let Inst{4} = Rn{4}; 1232 let DecoderMethod = "DecodeVLD4LN"; 1233 } 1234 1235 def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1236 let Inst{7-5} = lane{2-0}; 1237 } 1238 def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1239 let Inst{7-6} = lane{1-0}; 1240 } 1241 def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1242 let Inst{7} = lane{0}; 1243 let Inst{5} = Rn{5}; 1244 } 1245 1246 def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1247 def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1248 def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1249 1250 // ...with double-spaced registers: 1251 def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1252 let Inst{7-6} = lane{1-0}; 1253 } 1254 def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1255 let Inst{7} = lane{0}; 1256 let Inst{5} = Rn{5}; 1257 } 1258 1259 def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1260 def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1261 1262 // ...with address register writeback: 1263 class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1264 : NLdStLn<1, 0b10, op11_8, op7_4, 1265 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1266 (ins addrmode6:$Rn, am6offset:$Rm, 1267 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1268 IIC_VLD4lnu, "vld4", Dt, 1269 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1270 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1271 []> { 1272 let Inst{4} = Rn{4}; 1273 let DecoderMethod = "DecodeVLD4LN" ; 1274 } 1275 1276 def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1277 let Inst{7-5} = lane{2-0}; 1278 } 1279 def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1280 let Inst{7-6} = lane{1-0}; 1281 } 1282 def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1283 let Inst{7} = lane{0}; 1284 let Inst{5} = Rn{5}; 1285 } 1286 1287 def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1288 def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1289 def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1290 1291 def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1292 let Inst{7-6} = lane{1-0}; 1293 } 1294 def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1295 let Inst{7} = lane{0}; 1296 let Inst{5} = Rn{5}; 1297 } 1298 1299 def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1300 def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1301 1302 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1303 1304 // VLD1DUP : Vector Load (single element to all lanes) 1305 class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> 1306 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1307 (ins addrmode6dup:$Rn), 1308 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1309 [(set VecListOneDAllLanes:$Vd, 1310 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { 1311 let Rm = 0b1111; 1312 let Inst{4} = Rn{4}; 1313 let DecoderMethod = "DecodeVLD1DupInstruction"; 1314 } 1315 def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; 1316 def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; 1317 def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; 1318 1319 def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1320 (VLD1DUPd32 addrmode6:$addr)>; 1321 1322 class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> 1323 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1324 (ins addrmode6dup:$Rn), IIC_VLD1dup, 1325 "vld1", Dt, "$Vd, $Rn", "", 1326 [(set VecListDPairAllLanes:$Vd, 1327 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { 1328 let Rm = 0b1111; 1329 let Inst{4} = Rn{4}; 1330 let DecoderMethod = "DecodeVLD1DupInstruction"; 1331 } 1332 1333 def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>; 1334 def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>; 1335 def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>; 1336 1337 def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1338 (VLD1DUPq32 addrmode6:$addr)>; 1339 1340 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 1341 // ...with address register writeback: 1342 multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { 1343 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1344 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1345 (ins addrmode6dup:$Rn), IIC_VLD1dupu, 1346 "vld1", Dt, "$Vd, $Rn!", 1347 "$Rn.addr = $wb", []> { 1348 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1349 let Inst{4} = Rn{4}; 1350 let DecoderMethod = "DecodeVLD1DupInstruction"; 1351 let AsmMatchConverter = "cvtVLDwbFixed"; 1352 } 1353 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1354 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1355 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1356 "vld1", Dt, "$Vd, $Rn, $Rm", 1357 "$Rn.addr = $wb", []> { 1358 let Inst{4} = Rn{4}; 1359 let DecoderMethod = "DecodeVLD1DupInstruction"; 1360 let AsmMatchConverter = "cvtVLDwbRegister"; 1361 } 1362 } 1363 multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { 1364 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1365 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1366 (ins addrmode6dup:$Rn), IIC_VLD1dupu, 1367 "vld1", Dt, "$Vd, $Rn!", 1368 "$Rn.addr = $wb", []> { 1369 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1370 let Inst{4} = Rn{4}; 1371 let DecoderMethod = "DecodeVLD1DupInstruction"; 1372 let AsmMatchConverter = "cvtVLDwbFixed"; 1373 } 1374 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1375 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1376 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1377 "vld1", Dt, "$Vd, $Rn, $Rm", 1378 "$Rn.addr = $wb", []> { 1379 let Inst{4} = Rn{4}; 1380 let DecoderMethod = "DecodeVLD1DupInstruction"; 1381 let AsmMatchConverter = "cvtVLDwbRegister"; 1382 } 1383 } 1384 1385 defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">; 1386 defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">; 1387 defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">; 1388 1389 defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; 1390 defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; 1391 defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; 1392 1393 // VLD2DUP : Vector Load (single 2-element structure to all lanes) 1394 class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy> 1395 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1396 (ins addrmode6dup:$Rn), IIC_VLD2dup, 1397 "vld2", Dt, "$Vd, $Rn", "", []> { 1398 let Rm = 0b1111; 1399 let Inst{4} = Rn{4}; 1400 let DecoderMethod = "DecodeVLD2DupInstruction"; 1401 } 1402 1403 def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>; 1404 def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>; 1405 def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>; 1406 1407 // ...with double-spaced registers 1408 def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>; 1409 def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; 1410 def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; 1411 1412 // ...with address register writeback: 1413 multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { 1414 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1415 (outs VdTy:$Vd, GPR:$wb), 1416 (ins addrmode6dup:$Rn), IIC_VLD2dupu, 1417 "vld2", Dt, "$Vd, $Rn!", 1418 "$Rn.addr = $wb", []> { 1419 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1420 let Inst{4} = Rn{4}; 1421 let DecoderMethod = "DecodeVLD2DupInstruction"; 1422 let AsmMatchConverter = "cvtVLDwbFixed"; 1423 } 1424 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1425 (outs VdTy:$Vd, GPR:$wb), 1426 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1427 "vld2", Dt, "$Vd, $Rn, $Rm", 1428 "$Rn.addr = $wb", []> { 1429 let Inst{4} = Rn{4}; 1430 let DecoderMethod = "DecodeVLD2DupInstruction"; 1431 let AsmMatchConverter = "cvtVLDwbRegister"; 1432 } 1433 } 1434 1435 defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>; 1436 defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>; 1437 defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>; 1438 1439 defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>; 1440 defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; 1441 defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; 1442 1443 // VLD3DUP : Vector Load (single 3-element structure to all lanes) 1444 class VLD3DUP<bits<4> op7_4, string Dt> 1445 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1446 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1447 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { 1448 let Rm = 0b1111; 1449 let Inst{4} = 0; 1450 let DecoderMethod = "DecodeVLD3DupInstruction"; 1451 } 1452 1453 def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1454 def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1455 def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1456 1457 def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1458 def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1459 def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1460 1461 // ...with double-spaced registers (not used for codegen): 1462 def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1463 def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1464 def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1465 1466 // ...with address register writeback: 1467 class VLD3DUPWB<bits<4> op7_4, string Dt> 1468 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1469 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1470 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1471 "$Rn.addr = $wb", []> { 1472 let Inst{4} = 0; 1473 let DecoderMethod = "DecodeVLD3DupInstruction"; 1474 } 1475 1476 def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; 1477 def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; 1478 def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; 1479 1480 def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">; 1481 def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">; 1482 def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">; 1483 1484 def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1485 def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1486 def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1487 1488 // VLD4DUP : Vector Load (single 4-element structure to all lanes) 1489 class VLD4DUP<bits<4> op7_4, string Dt> 1490 : NLdSt<1, 0b10, 0b1111, op7_4, 1491 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1492 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1493 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1494 let Rm = 0b1111; 1495 let Inst{4} = Rn{4}; 1496 let DecoderMethod = "DecodeVLD4DupInstruction"; 1497 } 1498 1499 def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1500 def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1501 def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1502 1503 def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1504 def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1505 def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1506 1507 // ...with double-spaced registers (not used for codegen): 1508 def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1509 def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1510 def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1511 1512 // ...with address register writeback: 1513 class VLD4DUPWB<bits<4> op7_4, string Dt> 1514 : NLdSt<1, 0b10, 0b1111, op7_4, 1515 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1516 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1517 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1518 "$Rn.addr = $wb", []> { 1519 let Inst{4} = Rn{4}; 1520 let DecoderMethod = "DecodeVLD4DupInstruction"; 1521 } 1522 1523 def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1524 def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1525 def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1526 1527 def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1528 def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1529 def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1530 1531 def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1532 def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1533 def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1534 1535 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1536 1537 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 1538 1539 // Classes for VST* pseudo-instructions with multi-register operands. 1540 // These are expanded to real instructions after register allocation. 1541 class VSTQPseudo<InstrItinClass itin> 1542 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1543 class VSTQWBPseudo<InstrItinClass itin> 1544 : PseudoNLdSt<(outs GPR:$wb), 1545 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1546 "$addr.addr = $wb">; 1547 class VSTQWBfixedPseudo<InstrItinClass itin> 1548 : PseudoNLdSt<(outs GPR:$wb), 1549 (ins addrmode6:$addr, QPR:$src), itin, 1550 "$addr.addr = $wb">; 1551 class VSTQWBregisterPseudo<InstrItinClass itin> 1552 : PseudoNLdSt<(outs GPR:$wb), 1553 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1554 "$addr.addr = $wb">; 1555 class VSTQQPseudo<InstrItinClass itin> 1556 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1557 class VSTQQWBPseudo<InstrItinClass itin> 1558 : PseudoNLdSt<(outs GPR:$wb), 1559 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1560 "$addr.addr = $wb">; 1561 class VSTQQWBfixedPseudo<InstrItinClass itin> 1562 : PseudoNLdSt<(outs GPR:$wb), 1563 (ins addrmode6:$addr, QQPR:$src), itin, 1564 "$addr.addr = $wb">; 1565 class VSTQQWBregisterPseudo<InstrItinClass itin> 1566 : PseudoNLdSt<(outs GPR:$wb), 1567 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1568 "$addr.addr = $wb">; 1569 1570 class VSTQQQQPseudo<InstrItinClass itin> 1571 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1572 class VSTQQQQWBPseudo<InstrItinClass itin> 1573 : PseudoNLdSt<(outs GPR:$wb), 1574 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1575 "$addr.addr = $wb">; 1576 1577 // VST1 : Vector Store (multiple single elements) 1578 class VST1D<bits<4> op7_4, string Dt> 1579 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd), 1580 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { 1581 let Rm = 0b1111; 1582 let Inst{4} = Rn{4}; 1583 let DecoderMethod = "DecodeVSTInstruction"; 1584 } 1585 class VST1Q<bits<4> op7_4, string Dt> 1586 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), 1587 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { 1588 let Rm = 0b1111; 1589 let Inst{5-4} = Rn{5-4}; 1590 let DecoderMethod = "DecodeVSTInstruction"; 1591 } 1592 1593 def VST1d8 : VST1D<{0,0,0,?}, "8">; 1594 def VST1d16 : VST1D<{0,1,0,?}, "16">; 1595 def VST1d32 : VST1D<{1,0,0,?}, "32">; 1596 def VST1d64 : VST1D<{1,1,0,?}, "64">; 1597 1598 def VST1q8 : VST1Q<{0,0,?,?}, "8">; 1599 def VST1q16 : VST1Q<{0,1,?,?}, "16">; 1600 def VST1q32 : VST1Q<{1,0,?,?}, "32">; 1601 def VST1q64 : VST1Q<{1,1,?,?}, "64">; 1602 1603 // ...with address register writeback: 1604 multiclass VST1DWB<bits<4> op7_4, string Dt> { 1605 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1606 (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1607 "vst1", Dt, "$Vd, $Rn!", 1608 "$Rn.addr = $wb", []> { 1609 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1610 let Inst{4} = Rn{4}; 1611 let DecoderMethod = "DecodeVSTInstruction"; 1612 let AsmMatchConverter = "cvtVSTwbFixed"; 1613 } 1614 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1615 (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1616 IIC_VLD1u, 1617 "vst1", Dt, "$Vd, $Rn, $Rm", 1618 "$Rn.addr = $wb", []> { 1619 let Inst{4} = Rn{4}; 1620 let DecoderMethod = "DecodeVSTInstruction"; 1621 let AsmMatchConverter = "cvtVSTwbRegister"; 1622 } 1623 } 1624 multiclass VST1QWB<bits<4> op7_4, string Dt> { 1625 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1626 (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1627 "vst1", Dt, "$Vd, $Rn!", 1628 "$Rn.addr = $wb", []> { 1629 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1630 let Inst{5-4} = Rn{5-4}; 1631 let DecoderMethod = "DecodeVSTInstruction"; 1632 let AsmMatchConverter = "cvtVSTwbFixed"; 1633 } 1634 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1635 (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1636 IIC_VLD1x2u, 1637 "vst1", Dt, "$Vd, $Rn, $Rm", 1638 "$Rn.addr = $wb", []> { 1639 let Inst{5-4} = Rn{5-4}; 1640 let DecoderMethod = "DecodeVSTInstruction"; 1641 let AsmMatchConverter = "cvtVSTwbRegister"; 1642 } 1643 } 1644 1645 defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">; 1646 defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">; 1647 defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">; 1648 defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">; 1649 1650 defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">; 1651 defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; 1652 defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; 1653 defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; 1654 1655 // ...with 3 registers 1656 class VST1D3<bits<4> op7_4, string Dt> 1657 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1658 (ins addrmode6:$Rn, VecListThreeD:$Vd), 1659 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { 1660 let Rm = 0b1111; 1661 let Inst{4} = Rn{4}; 1662 let DecoderMethod = "DecodeVSTInstruction"; 1663 } 1664 multiclass VST1D3WB<bits<4> op7_4, string Dt> { 1665 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1666 (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1667 "vst1", Dt, "$Vd, $Rn!", 1668 "$Rn.addr = $wb", []> { 1669 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1670 let Inst{5-4} = Rn{5-4}; 1671 let DecoderMethod = "DecodeVSTInstruction"; 1672 let AsmMatchConverter = "cvtVSTwbFixed"; 1673 } 1674 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1675 (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1676 IIC_VLD1x3u, 1677 "vst1", Dt, "$Vd, $Rn, $Rm", 1678 "$Rn.addr = $wb", []> { 1679 let Inst{5-4} = Rn{5-4}; 1680 let DecoderMethod = "DecodeVSTInstruction"; 1681 let AsmMatchConverter = "cvtVSTwbRegister"; 1682 } 1683 } 1684 1685 def VST1d8T : VST1D3<{0,0,0,?}, "8">; 1686 def VST1d16T : VST1D3<{0,1,0,?}, "16">; 1687 def VST1d32T : VST1D3<{1,0,0,?}, "32">; 1688 def VST1d64T : VST1D3<{1,1,0,?}, "64">; 1689 1690 defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">; 1691 defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">; 1692 defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; 1693 defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; 1694 1695 def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; 1696 def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>; 1697 def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; 1698 1699 // ...with 4 registers 1700 class VST1D4<bits<4> op7_4, string Dt> 1701 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1702 (ins addrmode6:$Rn, VecListFourD:$Vd), 1703 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1704 []> { 1705 let Rm = 0b1111; 1706 let Inst{5-4} = Rn{5-4}; 1707 let DecoderMethod = "DecodeVSTInstruction"; 1708 } 1709 multiclass VST1D4WB<bits<4> op7_4, string Dt> { 1710 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1711 (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1712 "vst1", Dt, "$Vd, $Rn!", 1713 "$Rn.addr = $wb", []> { 1714 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1715 let Inst{5-4} = Rn{5-4}; 1716 let DecoderMethod = "DecodeVSTInstruction"; 1717 let AsmMatchConverter = "cvtVSTwbFixed"; 1718 } 1719 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1720 (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1721 IIC_VLD1x4u, 1722 "vst1", Dt, "$Vd, $Rn, $Rm", 1723 "$Rn.addr = $wb", []> { 1724 let Inst{5-4} = Rn{5-4}; 1725 let DecoderMethod = "DecodeVSTInstruction"; 1726 let AsmMatchConverter = "cvtVSTwbRegister"; 1727 } 1728 } 1729 1730 def VST1d8Q : VST1D4<{0,0,?,?}, "8">; 1731 def VST1d16Q : VST1D4<{0,1,?,?}, "16">; 1732 def VST1d32Q : VST1D4<{1,0,?,?}, "32">; 1733 def VST1d64Q : VST1D4<{1,1,?,?}, "64">; 1734 1735 defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">; 1736 defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">; 1737 defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; 1738 defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; 1739 1740 def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; 1741 def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>; 1742 def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; 1743 1744 // VST2 : Vector Store (multiple 2-element structures) 1745 class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1746 InstrItinClass itin> 1747 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd), 1748 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1749 let Rm = 0b1111; 1750 let Inst{5-4} = Rn{5-4}; 1751 let DecoderMethod = "DecodeVSTInstruction"; 1752 } 1753 1754 def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; 1755 def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>; 1756 def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>; 1757 1758 def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; 1759 def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; 1760 def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; 1761 1762 def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; 1763 def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; 1764 def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; 1765 1766 // ...with address register writeback: 1767 multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1768 RegisterOperand VdTy> { 1769 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1770 (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u, 1771 "vst2", Dt, "$Vd, $Rn!", 1772 "$Rn.addr = $wb", []> { 1773 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1774 let Inst{5-4} = Rn{5-4}; 1775 let DecoderMethod = "DecodeVSTInstruction"; 1776 let AsmMatchConverter = "cvtVSTwbFixed"; 1777 } 1778 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1779 (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1780 "vst2", Dt, "$Vd, $Rn, $Rm", 1781 "$Rn.addr = $wb", []> { 1782 let Inst{5-4} = Rn{5-4}; 1783 let DecoderMethod = "DecodeVSTInstruction"; 1784 let AsmMatchConverter = "cvtVSTwbRegister"; 1785 } 1786 } 1787 multiclass VST2QWB<bits<4> op7_4, string Dt> { 1788 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1789 (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1790 "vst2", Dt, "$Vd, $Rn!", 1791 "$Rn.addr = $wb", []> { 1792 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1793 let Inst{5-4} = Rn{5-4}; 1794 let DecoderMethod = "DecodeVSTInstruction"; 1795 let AsmMatchConverter = "cvtVSTwbFixed"; 1796 } 1797 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1798 (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1799 IIC_VLD1u, 1800 "vst2", Dt, "$Vd, $Rn, $Rm", 1801 "$Rn.addr = $wb", []> { 1802 let Inst{5-4} = Rn{5-4}; 1803 let DecoderMethod = "DecodeVSTInstruction"; 1804 let AsmMatchConverter = "cvtVSTwbRegister"; 1805 } 1806 } 1807 1808 defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>; 1809 defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>; 1810 defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>; 1811 1812 defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; 1813 defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; 1814 defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; 1815 1816 def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1817 def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1818 def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1819 def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1820 def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1821 def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1822 1823 // ...with double-spaced registers 1824 def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>; 1825 def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>; 1826 def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>; 1827 defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>; 1828 defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>; 1829 defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>; 1830 1831 // VST3 : Vector Store (multiple 3-element structures) 1832 class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1833 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1834 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1835 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { 1836 let Rm = 0b1111; 1837 let Inst{4} = Rn{4}; 1838 let DecoderMethod = "DecodeVSTInstruction"; 1839 } 1840 1841 def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1842 def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1843 def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1844 1845 def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>; 1846 def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>; 1847 def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>; 1848 1849 // ...with address register writeback: 1850 class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1851 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1852 (ins addrmode6:$Rn, am6offset:$Rm, 1853 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 1854 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1855 "$Rn.addr = $wb", []> { 1856 let Inst{4} = Rn{4}; 1857 let DecoderMethod = "DecodeVSTInstruction"; 1858 } 1859 1860 def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 1861 def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 1862 def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 1863 1864 def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1865 def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1866 def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1867 1868 // ...with double-spaced registers: 1869 def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 1870 def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 1871 def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 1872 def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 1873 def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 1874 def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 1875 1876 def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1877 def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1878 def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1879 1880 // ...alternate versions to be allocated odd register numbers: 1881 def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1882 def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1883 def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1884 1885 def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1886 def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1887 def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1888 1889 // VST4 : Vector Store (multiple 4-element structures) 1890 class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 1891 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1892 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 1893 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 1894 "", []> { 1895 let Rm = 0b1111; 1896 let Inst{5-4} = Rn{5-4}; 1897 let DecoderMethod = "DecodeVSTInstruction"; 1898 } 1899 1900 def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 1901 def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 1902 def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 1903 1904 def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>; 1905 def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>; 1906 def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>; 1907 1908 // ...with address register writeback: 1909 class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1910 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1911 (ins addrmode6:$Rn, am6offset:$Rm, 1912 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 1913 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 1914 "$Rn.addr = $wb", []> { 1915 let Inst{5-4} = Rn{5-4}; 1916 let DecoderMethod = "DecodeVSTInstruction"; 1917 } 1918 1919 def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 1920 def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 1921 def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 1922 1923 def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1924 def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1925 def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1926 1927 // ...with double-spaced registers: 1928 def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 1929 def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 1930 def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 1931 def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 1932 def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 1933 def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 1934 1935 def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1936 def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1937 def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1938 1939 // ...alternate versions to be allocated odd register numbers: 1940 def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1941 def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1942 def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1943 1944 def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1945 def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1946 def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1947 1948 } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 1949 1950 // Classes for VST*LN pseudo-instructions with multi-register operands. 1951 // These are expanded to real instructions after register allocation. 1952 class VSTQLNPseudo<InstrItinClass itin> 1953 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1954 itin, "">; 1955 class VSTQLNWBPseudo<InstrItinClass itin> 1956 : PseudoNLdSt<(outs GPR:$wb), 1957 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1958 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1959 class VSTQQLNPseudo<InstrItinClass itin> 1960 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1961 itin, "">; 1962 class VSTQQLNWBPseudo<InstrItinClass itin> 1963 : PseudoNLdSt<(outs GPR:$wb), 1964 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1965 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1966 class VSTQQQQLNPseudo<InstrItinClass itin> 1967 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1968 itin, "">; 1969 class VSTQQQQLNWBPseudo<InstrItinClass itin> 1970 : PseudoNLdSt<(outs GPR:$wb), 1971 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1972 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1973 1974 // VST1LN : Vector Store (single element from one lane) 1975 class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1976 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 1977 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1978 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 1979 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 1980 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { 1981 let Rm = 0b1111; 1982 let DecoderMethod = "DecodeVST1LN"; 1983 } 1984 class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 1985 : VSTQLNPseudo<IIC_VST1ln> { 1986 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 1987 addrmode6:$addr)]; 1988 } 1989 1990 def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 1991 NEONvgetlaneu, addrmode6> { 1992 let Inst{7-5} = lane{2-0}; 1993 } 1994 def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 1995 NEONvgetlaneu, addrmode6> { 1996 let Inst{7-6} = lane{1-0}; 1997 let Inst{4} = Rn{4}; 1998 } 1999 2000 def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 2001 addrmode6oneL32> { 2002 let Inst{7} = lane{0}; 2003 let Inst{5-4} = Rn{5-4}; 2004 } 2005 2006 def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>; 2007 def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>; 2008 def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 2009 2010 def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 2011 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 2012 def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 2013 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2014 2015 // ...with address register writeback: 2016 class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2017 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 2018 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2019 (ins AdrMode:$Rn, am6offset:$Rm, 2020 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 2021 "\\{$Vd[$lane]\\}, $Rn$Rm", 2022 "$Rn.addr = $wb", 2023 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 2024 AdrMode:$Rn, am6offset:$Rm))]> { 2025 let DecoderMethod = "DecodeVST1LN"; 2026 } 2027 class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2028 : VSTQLNWBPseudo<IIC_VST1lnu> { 2029 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2030 addrmode6:$addr, am6offset:$offset))]; 2031 } 2032 2033 def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2034 NEONvgetlaneu, addrmode6> { 2035 let Inst{7-5} = lane{2-0}; 2036 } 2037 def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2038 NEONvgetlaneu, addrmode6> { 2039 let Inst{7-6} = lane{1-0}; 2040 let Inst{4} = Rn{4}; 2041 } 2042 def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2043 extractelt, addrmode6oneL32> { 2044 let Inst{7} = lane{0}; 2045 let Inst{5-4} = Rn{5-4}; 2046 } 2047 2048 def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; 2049 def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; 2050 def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2051 2052 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 2053 2054 // VST2LN : Vector Store (single 2-element structure from one lane) 2055 class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2056 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2057 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2058 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2059 "", []> { 2060 let Rm = 0b1111; 2061 let Inst{4} = Rn{4}; 2062 let DecoderMethod = "DecodeVST2LN"; 2063 } 2064 2065 def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2066 let Inst{7-5} = lane{2-0}; 2067 } 2068 def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2069 let Inst{7-6} = lane{1-0}; 2070 } 2071 def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2072 let Inst{7} = lane{0}; 2073 } 2074 2075 def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2076 def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2077 def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2078 2079 // ...with double-spaced registers: 2080 def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2081 let Inst{7-6} = lane{1-0}; 2082 let Inst{4} = Rn{4}; 2083 } 2084 def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2085 let Inst{7} = lane{0}; 2086 let Inst{4} = Rn{4}; 2087 } 2088 2089 def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2090 def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2091 2092 // ...with address register writeback: 2093 class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2094 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2095 (ins addrmode6:$Rn, am6offset:$Rm, 2096 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2097 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2098 "$Rn.addr = $wb", []> { 2099 let Inst{4} = Rn{4}; 2100 let DecoderMethod = "DecodeVST2LN"; 2101 } 2102 2103 def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2104 let Inst{7-5} = lane{2-0}; 2105 } 2106 def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2107 let Inst{7-6} = lane{1-0}; 2108 } 2109 def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2110 let Inst{7} = lane{0}; 2111 } 2112 2113 def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2114 def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2115 def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2116 2117 def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2118 let Inst{7-6} = lane{1-0}; 2119 } 2120 def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2121 let Inst{7} = lane{0}; 2122 } 2123 2124 def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2125 def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2126 2127 // VST3LN : Vector Store (single 3-element structure from one lane) 2128 class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2129 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2130 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2131 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2132 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { 2133 let Rm = 0b1111; 2134 let DecoderMethod = "DecodeVST3LN"; 2135 } 2136 2137 def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2138 let Inst{7-5} = lane{2-0}; 2139 } 2140 def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2141 let Inst{7-6} = lane{1-0}; 2142 } 2143 def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2144 let Inst{7} = lane{0}; 2145 } 2146 2147 def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2148 def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2149 def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2150 2151 // ...with double-spaced registers: 2152 def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2153 let Inst{7-6} = lane{1-0}; 2154 } 2155 def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2156 let Inst{7} = lane{0}; 2157 } 2158 2159 def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2160 def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2161 2162 // ...with address register writeback: 2163 class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2164 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2165 (ins addrmode6:$Rn, am6offset:$Rm, 2166 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2167 IIC_VST3lnu, "vst3", Dt, 2168 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2169 "$Rn.addr = $wb", []> { 2170 let DecoderMethod = "DecodeVST3LN"; 2171 } 2172 2173 def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2174 let Inst{7-5} = lane{2-0}; 2175 } 2176 def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2177 let Inst{7-6} = lane{1-0}; 2178 } 2179 def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2180 let Inst{7} = lane{0}; 2181 } 2182 2183 def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2184 def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2185 def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2186 2187 def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2188 let Inst{7-6} = lane{1-0}; 2189 } 2190 def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2191 let Inst{7} = lane{0}; 2192 } 2193 2194 def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2195 def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2196 2197 // VST4LN : Vector Store (single 4-element structure from one lane) 2198 class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2199 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2200 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2201 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2202 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2203 "", []> { 2204 let Rm = 0b1111; 2205 let Inst{4} = Rn{4}; 2206 let DecoderMethod = "DecodeVST4LN"; 2207 } 2208 2209 def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2210 let Inst{7-5} = lane{2-0}; 2211 } 2212 def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2213 let Inst{7-6} = lane{1-0}; 2214 } 2215 def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2216 let Inst{7} = lane{0}; 2217 let Inst{5} = Rn{5}; 2218 } 2219 2220 def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2221 def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2222 def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2223 2224 // ...with double-spaced registers: 2225 def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2226 let Inst{7-6} = lane{1-0}; 2227 } 2228 def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2229 let Inst{7} = lane{0}; 2230 let Inst{5} = Rn{5}; 2231 } 2232 2233 def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2234 def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2235 2236 // ...with address register writeback: 2237 class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2238 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2239 (ins addrmode6:$Rn, am6offset:$Rm, 2240 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2241 IIC_VST4lnu, "vst4", Dt, 2242 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2243 "$Rn.addr = $wb", []> { 2244 let Inst{4} = Rn{4}; 2245 let DecoderMethod = "DecodeVST4LN"; 2246 } 2247 2248 def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2249 let Inst{7-5} = lane{2-0}; 2250 } 2251 def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2252 let Inst{7-6} = lane{1-0}; 2253 } 2254 def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2255 let Inst{7} = lane{0}; 2256 let Inst{5} = Rn{5}; 2257 } 2258 2259 def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2260 def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2261 def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2262 2263 def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2264 let Inst{7-6} = lane{1-0}; 2265 } 2266 def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2267 let Inst{7} = lane{0}; 2268 let Inst{5} = Rn{5}; 2269 } 2270 2271 def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2272 def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2273 2274 } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 2275 2276 // Use vld1/vst1 for unaligned f64 load / store 2277 def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2278 (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; 2279 def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2280 (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2281 def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2282 (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; 2283 def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2284 (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2285 def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2286 (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; 2287 def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2288 (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; 2289 2290 // Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2291 // load / store if it's legal. 2292 def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2293 (VLD1q64 addrmode6:$addr)>; 2294 def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2295 (VST1q64 addrmode6:$addr, QPR:$value)>; 2296 def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2297 (VLD1q32 addrmode6:$addr)>; 2298 def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2299 (VST1q32 addrmode6:$addr, QPR:$value)>; 2300 def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2301 (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; 2302 def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2303 (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2304 def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2305 (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>; 2306 def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2307 (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2308 2309 //===----------------------------------------------------------------------===// 2310 // NEON pattern fragments 2311 //===----------------------------------------------------------------------===// 2312 2313 // Extract D sub-registers of Q registers. 2314 def DSubReg_i8_reg : SDNodeXForm<imm, [{ 2315 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2316 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); 2317 }]>; 2318 def DSubReg_i16_reg : SDNodeXForm<imm, [{ 2319 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2320 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); 2321 }]>; 2322 def DSubReg_i32_reg : SDNodeXForm<imm, [{ 2323 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2324 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); 2325 }]>; 2326 def DSubReg_f64_reg : SDNodeXForm<imm, [{ 2327 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2328 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); 2329 }]>; 2330 2331 // Extract S sub-registers of Q/D registers. 2332 def SSubReg_f32_reg : SDNodeXForm<imm, [{ 2333 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 2334 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); 2335 }]>; 2336 2337 // Translate lane numbers from Q registers to D subregs. 2338 def SubReg_i8_lane : SDNodeXForm<imm, [{ 2339 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 2340 }]>; 2341 def SubReg_i16_lane : SDNodeXForm<imm, [{ 2342 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 2343 }]>; 2344 def SubReg_i32_lane : SDNodeXForm<imm, [{ 2345 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 2346 }]>; 2347 2348 //===----------------------------------------------------------------------===// 2349 // Instruction Classes 2350 //===----------------------------------------------------------------------===// 2351 2352 // Basic 2-register operations: double- and quad-register. 2353 class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2354 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2355 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2356 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2357 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2358 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2359 class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2360 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2361 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2362 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2363 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2364 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2365 2366 // Basic 2-register intrinsics, both double- and quad-register. 2367 class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2368 bits<2> op17_16, bits<5> op11_7, bit op4, 2369 InstrItinClass itin, string OpcodeStr, string Dt, 2370 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2371 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2372 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2373 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2374 class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2375 bits<2> op17_16, bits<5> op11_7, bit op4, 2376 InstrItinClass itin, string OpcodeStr, string Dt, 2377 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2378 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2379 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2380 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2381 2382 // Narrow 2-register operations. 2383 class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2384 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2385 InstrItinClass itin, string OpcodeStr, string Dt, 2386 ValueType TyD, ValueType TyQ, SDNode OpNode> 2387 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2388 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2389 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2390 2391 // Narrow 2-register intrinsics. 2392 class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2393 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2394 InstrItinClass itin, string OpcodeStr, string Dt, 2395 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2396 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2397 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2398 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2399 2400 // Long 2-register operations (currently only used for VMOVL). 2401 class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2402 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2403 InstrItinClass itin, string OpcodeStr, string Dt, 2404 ValueType TyQ, ValueType TyD, SDNode OpNode> 2405 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2406 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2407 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2408 2409 // Long 2-register intrinsics. 2410 class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2411 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2412 InstrItinClass itin, string OpcodeStr, string Dt, 2413 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2414 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2415 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2416 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2417 2418 // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2419 class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2420 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2421 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2422 OpcodeStr, Dt, "$Vd, $Vm", 2423 "$src1 = $Vd, $src2 = $Vm", []>; 2424 class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2425 InstrItinClass itin, string OpcodeStr, string Dt> 2426 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2427 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2428 "$src1 = $Vd, $src2 = $Vm", []>; 2429 2430 // Basic 3-register operations: double- and quad-register. 2431 class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2432 InstrItinClass itin, string OpcodeStr, string Dt, 2433 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2434 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2435 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2436 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2437 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2438 // All of these have a two-operand InstAlias. 2439 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2440 let isCommutable = Commutable; 2441 } 2442 // Same as N3VD but no data type. 2443 class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2444 InstrItinClass itin, string OpcodeStr, 2445 ValueType ResTy, ValueType OpTy, 2446 SDNode OpNode, bit Commutable> 2447 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2448 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2449 OpcodeStr, "$Vd, $Vn, $Vm", "", 2450 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2451 // All of these have a two-operand InstAlias. 2452 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2453 let isCommutable = Commutable; 2454 } 2455 2456 class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2457 InstrItinClass itin, string OpcodeStr, string Dt, 2458 ValueType Ty, SDNode ShOp> 2459 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2460 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2461 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2462 [(set (Ty DPR:$Vd), 2463 (Ty (ShOp (Ty DPR:$Vn), 2464 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2465 // All of these have a two-operand InstAlias. 2466 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2467 let isCommutable = 0; 2468 } 2469 class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2470 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2471 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2472 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2473 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2474 [(set (Ty DPR:$Vd), 2475 (Ty (ShOp (Ty DPR:$Vn), 2476 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2477 // All of these have a two-operand InstAlias. 2478 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2479 let isCommutable = 0; 2480 } 2481 2482 class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2483 InstrItinClass itin, string OpcodeStr, string Dt, 2484 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2485 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2486 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2487 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2488 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2489 // All of these have a two-operand InstAlias. 2490 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2491 let isCommutable = Commutable; 2492 } 2493 class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2494 InstrItinClass itin, string OpcodeStr, 2495 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2496 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2497 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2498 OpcodeStr, "$Vd, $Vn, $Vm", "", 2499 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2500 // All of these have a two-operand InstAlias. 2501 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2502 let isCommutable = Commutable; 2503 } 2504 class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2505 InstrItinClass itin, string OpcodeStr, string Dt, 2506 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2507 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2508 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2509 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2510 [(set (ResTy QPR:$Vd), 2511 (ResTy (ShOp (ResTy QPR:$Vn), 2512 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2513 imm:$lane)))))]> { 2514 // All of these have a two-operand InstAlias. 2515 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2516 let isCommutable = 0; 2517 } 2518 class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2519 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2520 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2521 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2522 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2523 [(set (ResTy QPR:$Vd), 2524 (ResTy (ShOp (ResTy QPR:$Vn), 2525 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2526 imm:$lane)))))]> { 2527 // All of these have a two-operand InstAlias. 2528 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2529 let isCommutable = 0; 2530 } 2531 2532 // Basic 3-register intrinsics, both double- and quad-register. 2533 class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2534 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2535 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2536 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2537 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2538 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2539 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2540 // All of these have a two-operand InstAlias. 2541 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2542 let isCommutable = Commutable; 2543 } 2544 class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2545 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2546 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2547 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2548 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2549 [(set (Ty DPR:$Vd), 2550 (Ty (IntOp (Ty DPR:$Vn), 2551 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2552 imm:$lane)))))]> { 2553 let isCommutable = 0; 2554 } 2555 class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2556 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2557 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2558 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2559 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2560 [(set (Ty DPR:$Vd), 2561 (Ty (IntOp (Ty DPR:$Vn), 2562 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2563 let isCommutable = 0; 2564 } 2565 class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2566 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2567 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2568 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2569 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2570 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2571 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2572 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2573 let isCommutable = 0; 2574 } 2575 2576 class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2577 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2578 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2579 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2580 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2581 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2582 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2583 // All of these have a two-operand InstAlias. 2584 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2585 let isCommutable = Commutable; 2586 } 2587 class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2588 string OpcodeStr, string Dt, 2589 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2590 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2591 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2592 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2593 [(set (ResTy QPR:$Vd), 2594 (ResTy (IntOp (ResTy QPR:$Vn), 2595 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2596 imm:$lane)))))]> { 2597 let isCommutable = 0; 2598 } 2599 class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2600 string OpcodeStr, string Dt, 2601 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2602 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2603 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2604 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2605 [(set (ResTy QPR:$Vd), 2606 (ResTy (IntOp (ResTy QPR:$Vn), 2607 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2608 imm:$lane)))))]> { 2609 let isCommutable = 0; 2610 } 2611 class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2612 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2613 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2614 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2615 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2616 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2617 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2618 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2619 let isCommutable = 0; 2620 } 2621 2622 // Multiply-Add/Sub operations: double- and quad-register. 2623 class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2624 InstrItinClass itin, string OpcodeStr, string Dt, 2625 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2626 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2627 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2628 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2629 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2630 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2631 2632 class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2633 string OpcodeStr, string Dt, 2634 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2635 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2636 (outs DPR:$Vd), 2637 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2638 NVMulSLFrm, itin, 2639 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2640 [(set (Ty DPR:$Vd), 2641 (Ty (ShOp (Ty DPR:$src1), 2642 (Ty (MulOp DPR:$Vn, 2643 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2644 imm:$lane)))))))]>; 2645 class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2646 string OpcodeStr, string Dt, 2647 ValueType Ty, SDNode MulOp, SDNode ShOp> 2648 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2649 (outs DPR:$Vd), 2650 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2651 NVMulSLFrm, itin, 2652 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2653 [(set (Ty DPR:$Vd), 2654 (Ty (ShOp (Ty DPR:$src1), 2655 (Ty (MulOp DPR:$Vn, 2656 (Ty (NEONvduplane (Ty DPR_8:$Vm), 2657 imm:$lane)))))))]>; 2658 2659 class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2660 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2661 SDPatternOperator MulOp, SDPatternOperator OpNode> 2662 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2663 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2664 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2665 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2666 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2667 class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2668 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2669 SDPatternOperator MulOp, SDPatternOperator ShOp> 2670 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2671 (outs QPR:$Vd), 2672 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2673 NVMulSLFrm, itin, 2674 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2675 [(set (ResTy QPR:$Vd), 2676 (ResTy (ShOp (ResTy QPR:$src1), 2677 (ResTy (MulOp QPR:$Vn, 2678 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2679 imm:$lane)))))))]>; 2680 class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2681 string OpcodeStr, string Dt, 2682 ValueType ResTy, ValueType OpTy, 2683 SDNode MulOp, SDNode ShOp> 2684 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2685 (outs QPR:$Vd), 2686 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2687 NVMulSLFrm, itin, 2688 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2689 [(set (ResTy QPR:$Vd), 2690 (ResTy (ShOp (ResTy QPR:$src1), 2691 (ResTy (MulOp QPR:$Vn, 2692 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2693 imm:$lane)))))))]>; 2694 2695 // Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2696 class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2697 InstrItinClass itin, string OpcodeStr, string Dt, 2698 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2699 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2700 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2701 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2702 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2703 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2704 class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2705 InstrItinClass itin, string OpcodeStr, string Dt, 2706 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2707 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2708 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2709 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2710 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2711 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2712 2713 // Neon 3-argument intrinsics, both double- and quad-register. 2714 // The destination register is also used as the first source operand register. 2715 class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2716 InstrItinClass itin, string OpcodeStr, string Dt, 2717 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2718 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2719 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2720 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2721 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2722 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2723 class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2724 InstrItinClass itin, string OpcodeStr, string Dt, 2725 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2726 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2727 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2728 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2729 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2730 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2731 2732 // Long Multiply-Add/Sub operations. 2733 class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2734 InstrItinClass itin, string OpcodeStr, string Dt, 2735 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2736 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2737 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2738 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2739 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2740 (TyQ (MulOp (TyD DPR:$Vn), 2741 (TyD DPR:$Vm)))))]>; 2742 class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2743 InstrItinClass itin, string OpcodeStr, string Dt, 2744 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2745 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2746 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2747 NVMulSLFrm, itin, 2748 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2749 [(set QPR:$Vd, 2750 (OpNode (TyQ QPR:$src1), 2751 (TyQ (MulOp (TyD DPR:$Vn), 2752 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm), 2753 imm:$lane))))))]>; 2754 class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2755 InstrItinClass itin, string OpcodeStr, string Dt, 2756 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2757 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2758 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2759 NVMulSLFrm, itin, 2760 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2761 [(set QPR:$Vd, 2762 (OpNode (TyQ QPR:$src1), 2763 (TyQ (MulOp (TyD DPR:$Vn), 2764 (TyD (NEONvduplane (TyD DPR_8:$Vm), 2765 imm:$lane))))))]>; 2766 2767 // Long Intrinsic-Op vector operations with explicit extend (VABAL). 2768 class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2769 InstrItinClass itin, string OpcodeStr, string Dt, 2770 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2771 SDNode OpNode> 2772 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2773 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2774 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2775 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2776 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2777 (TyD DPR:$Vm)))))))]>; 2778 2779 // Neon Long 3-argument intrinsic. The destination register is 2780 // a quad-register and is also used as the first source operand register. 2781 class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2782 InstrItinClass itin, string OpcodeStr, string Dt, 2783 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2784 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2785 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2786 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2787 [(set QPR:$Vd, 2788 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 2789 class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2790 string OpcodeStr, string Dt, 2791 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2792 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2793 (outs QPR:$Vd), 2794 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2795 NVMulSLFrm, itin, 2796 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2797 [(set (ResTy QPR:$Vd), 2798 (ResTy (IntOp (ResTy QPR:$src1), 2799 (OpTy DPR:$Vn), 2800 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2801 imm:$lane)))))]>; 2802 class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2803 InstrItinClass itin, string OpcodeStr, string Dt, 2804 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2805 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2806 (outs QPR:$Vd), 2807 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2808 NVMulSLFrm, itin, 2809 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2810 [(set (ResTy QPR:$Vd), 2811 (ResTy (IntOp (ResTy QPR:$src1), 2812 (OpTy DPR:$Vn), 2813 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2814 imm:$lane)))))]>; 2815 2816 // Narrowing 3-register intrinsics. 2817 class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2818 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 2819 SDPatternOperator IntOp, bit Commutable> 2820 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2821 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 2822 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2823 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 2824 let isCommutable = Commutable; 2825 } 2826 2827 // Long 3-register operations. 2828 class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2829 InstrItinClass itin, string OpcodeStr, string Dt, 2830 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 2831 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2832 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2833 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2834 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2835 let isCommutable = Commutable; 2836 } 2837 class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2838 InstrItinClass itin, string OpcodeStr, string Dt, 2839 ValueType TyQ, ValueType TyD, SDNode OpNode> 2840 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2841 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2842 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2843 [(set QPR:$Vd, 2844 (TyQ (OpNode (TyD DPR:$Vn), 2845 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 2846 class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2847 InstrItinClass itin, string OpcodeStr, string Dt, 2848 ValueType TyQ, ValueType TyD, SDNode OpNode> 2849 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2850 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2851 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2852 [(set QPR:$Vd, 2853 (TyQ (OpNode (TyD DPR:$Vn), 2854 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 2855 2856 // Long 3-register operations with explicitly extended operands. 2857 class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2858 InstrItinClass itin, string OpcodeStr, string Dt, 2859 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 2860 bit Commutable> 2861 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2862 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2863 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2864 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 2865 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2866 let isCommutable = Commutable; 2867 } 2868 2869 // Long 3-register intrinsics with explicit extend (VABDL). 2870 class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2871 InstrItinClass itin, string OpcodeStr, string Dt, 2872 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2873 bit Commutable> 2874 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2875 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2876 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2877 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2878 (TyD DPR:$Vm))))))]> { 2879 let isCommutable = Commutable; 2880 } 2881 2882 // Long 3-register intrinsics. 2883 class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2884 InstrItinClass itin, string OpcodeStr, string Dt, 2885 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 2886 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2887 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2888 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2889 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2890 let isCommutable = Commutable; 2891 } 2892 class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2893 string OpcodeStr, string Dt, 2894 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2895 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2896 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2897 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2898 [(set (ResTy QPR:$Vd), 2899 (ResTy (IntOp (OpTy DPR:$Vn), 2900 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2901 imm:$lane)))))]>; 2902 class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2903 InstrItinClass itin, string OpcodeStr, string Dt, 2904 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2905 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2906 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2907 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2908 [(set (ResTy QPR:$Vd), 2909 (ResTy (IntOp (OpTy DPR:$Vn), 2910 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2911 imm:$lane)))))]>; 2912 2913 // Wide 3-register operations. 2914 class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2915 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 2916 SDNode OpNode, SDNode ExtOp, bit Commutable> 2917 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2918 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 2919 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2920 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 2921 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2922 // All of these have a two-operand InstAlias. 2923 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2924 let isCommutable = Commutable; 2925 } 2926 2927 // Pairwise long 2-register intrinsics, both double- and quad-register. 2928 class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2929 bits<2> op17_16, bits<5> op11_7, bit op4, 2930 string OpcodeStr, string Dt, 2931 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2932 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2933 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2934 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2935 class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2936 bits<2> op17_16, bits<5> op11_7, bit op4, 2937 string OpcodeStr, string Dt, 2938 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2939 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2940 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2941 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2942 2943 // Pairwise long 2-register accumulate intrinsics, 2944 // both double- and quad-register. 2945 // The destination register is also used as the first source operand register. 2946 class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2947 bits<2> op17_16, bits<5> op11_7, bit op4, 2948 string OpcodeStr, string Dt, 2949 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2950 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 2951 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 2952 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 2953 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 2954 class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2955 bits<2> op17_16, bits<5> op11_7, bit op4, 2956 string OpcodeStr, string Dt, 2957 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2958 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 2959 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 2960 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 2961 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 2962 2963 // Shift by immediate, 2964 // both double- and quad-register. 2965 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 2966 class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2967 Format f, InstrItinClass itin, Operand ImmTy, 2968 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 2969 : N2VImm<op24, op23, op11_8, op7, 0, op4, 2970 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 2971 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2972 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 2973 class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2974 Format f, InstrItinClass itin, Operand ImmTy, 2975 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 2976 : N2VImm<op24, op23, op11_8, op7, 1, op4, 2977 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 2978 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2979 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 2980 } 2981 2982 // Long shift by immediate. 2983 class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 2984 string OpcodeStr, string Dt, 2985 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> 2986 : N2VImm<op24, op23, op11_8, op7, op6, op4, 2987 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 2988 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2989 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), 2990 (i32 imm:$SIMM))))]>; 2991 2992 // Narrow shift by immediate. 2993 class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 2994 InstrItinClass itin, string OpcodeStr, string Dt, 2995 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> 2996 : N2VImm<op24, op23, op11_8, op7, op6, op4, 2997 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 2998 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2999 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3000 (i32 imm:$SIMM))))]>; 3001 3002 // Shift right by immediate and accumulate, 3003 // both double- and quad-register. 3004 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3005 class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3006 Operand ImmTy, string OpcodeStr, string Dt, 3007 ValueType Ty, SDNode ShOp> 3008 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3009 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3010 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3011 [(set DPR:$Vd, (Ty (add DPR:$src1, 3012 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3013 class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3014 Operand ImmTy, string OpcodeStr, string Dt, 3015 ValueType Ty, SDNode ShOp> 3016 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3017 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3018 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3019 [(set QPR:$Vd, (Ty (add QPR:$src1, 3020 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3021 } 3022 3023 // Shift by immediate and insert, 3024 // both double- and quad-register. 3025 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3026 class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3027 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3028 ValueType Ty,SDNode ShOp> 3029 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3030 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3031 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3032 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3033 class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3034 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3035 ValueType Ty,SDNode ShOp> 3036 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3037 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3038 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3039 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3040 } 3041 3042 // Convert, with fractional bits immediate, 3043 // both double- and quad-register. 3044 class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3045 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3046 SDPatternOperator IntOp> 3047 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3048 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3049 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3050 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3051 class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3052 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3053 SDPatternOperator IntOp> 3054 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3055 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3056 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3057 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3058 3059 //===----------------------------------------------------------------------===// 3060 // Multiclasses 3061 //===----------------------------------------------------------------------===// 3062 3063 // Abbreviations used in multiclass suffixes: 3064 // Q = quarter int (8 bit) elements 3065 // H = half int (16 bit) elements 3066 // S = single int (32 bit) elements 3067 // D = double int (64 bit) elements 3068 3069 // Neon 2-register vector operations and intrinsics. 3070 3071 // Neon 2-register comparisons. 3072 // source operand element sizes of 8, 16 and 32 bits: 3073 multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3074 bits<5> op11_7, bit op4, string opc, string Dt, 3075 string asm, SDNode OpNode> { 3076 // 64-bit vector types. 3077 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3078 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3079 opc, !strconcat(Dt, "8"), asm, "", 3080 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; 3081 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3082 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3083 opc, !strconcat(Dt, "16"), asm, "", 3084 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; 3085 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3086 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3087 opc, !strconcat(Dt, "32"), asm, "", 3088 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; 3089 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3090 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3091 opc, "f32", asm, "", 3092 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { 3093 let Inst{10} = 1; // overwrite F = 1 3094 } 3095 3096 // 128-bit vector types. 3097 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3098 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3099 opc, !strconcat(Dt, "8"), asm, "", 3100 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; 3101 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3102 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3103 opc, !strconcat(Dt, "16"), asm, "", 3104 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; 3105 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3106 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3107 opc, !strconcat(Dt, "32"), asm, "", 3108 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; 3109 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3110 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3111 opc, "f32", asm, "", 3112 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { 3113 let Inst{10} = 1; // overwrite F = 1 3114 } 3115 } 3116 3117 3118 // Neon 2-register vector intrinsics, 3119 // element sizes of 8, 16 and 32 bits: 3120 multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3121 bits<5> op11_7, bit op4, 3122 InstrItinClass itinD, InstrItinClass itinQ, 3123 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3124 // 64-bit vector types. 3125 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3126 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3127 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3128 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3129 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3130 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3131 3132 // 128-bit vector types. 3133 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3134 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3135 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3136 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3137 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3138 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3139 } 3140 3141 3142 // Neon Narrowing 2-register vector operations, 3143 // source operand element sizes of 16, 32 and 64 bits: 3144 multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3145 bits<5> op11_7, bit op6, bit op4, 3146 InstrItinClass itin, string OpcodeStr, string Dt, 3147 SDNode OpNode> { 3148 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3149 itin, OpcodeStr, !strconcat(Dt, "16"), 3150 v8i8, v8i16, OpNode>; 3151 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3152 itin, OpcodeStr, !strconcat(Dt, "32"), 3153 v4i16, v4i32, OpNode>; 3154 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3155 itin, OpcodeStr, !strconcat(Dt, "64"), 3156 v2i32, v2i64, OpNode>; 3157 } 3158 3159 // Neon Narrowing 2-register vector intrinsics, 3160 // source operand element sizes of 16, 32 and 64 bits: 3161 multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3162 bits<5> op11_7, bit op6, bit op4, 3163 InstrItinClass itin, string OpcodeStr, string Dt, 3164 SDPatternOperator IntOp> { 3165 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3166 itin, OpcodeStr, !strconcat(Dt, "16"), 3167 v8i8, v8i16, IntOp>; 3168 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3169 itin, OpcodeStr, !strconcat(Dt, "32"), 3170 v4i16, v4i32, IntOp>; 3171 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3172 itin, OpcodeStr, !strconcat(Dt, "64"), 3173 v2i32, v2i64, IntOp>; 3174 } 3175 3176 3177 // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3178 // source operand element sizes of 16, 32 and 64 bits: 3179 multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3180 string OpcodeStr, string Dt, SDNode OpNode> { 3181 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3182 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3183 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3184 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3185 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3186 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3187 } 3188 3189 3190 // Neon 3-register vector operations. 3191 3192 // First with only element sizes of 8, 16 and 32 bits: 3193 multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3194 InstrItinClass itinD16, InstrItinClass itinD32, 3195 InstrItinClass itinQ16, InstrItinClass itinQ32, 3196 string OpcodeStr, string Dt, 3197 SDNode OpNode, bit Commutable = 0> { 3198 // 64-bit vector types. 3199 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3200 OpcodeStr, !strconcat(Dt, "8"), 3201 v8i8, v8i8, OpNode, Commutable>; 3202 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3203 OpcodeStr, !strconcat(Dt, "16"), 3204 v4i16, v4i16, OpNode, Commutable>; 3205 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3206 OpcodeStr, !strconcat(Dt, "32"), 3207 v2i32, v2i32, OpNode, Commutable>; 3208 3209 // 128-bit vector types. 3210 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3211 OpcodeStr, !strconcat(Dt, "8"), 3212 v16i8, v16i8, OpNode, Commutable>; 3213 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3214 OpcodeStr, !strconcat(Dt, "16"), 3215 v8i16, v8i16, OpNode, Commutable>; 3216 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3217 OpcodeStr, !strconcat(Dt, "32"), 3218 v4i32, v4i32, OpNode, Commutable>; 3219 } 3220 3221 multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3222 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3223 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3224 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3225 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3226 v4i32, v2i32, ShOp>; 3227 } 3228 3229 // ....then also with element size 64 bits: 3230 multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3231 InstrItinClass itinD, InstrItinClass itinQ, 3232 string OpcodeStr, string Dt, 3233 SDNode OpNode, bit Commutable = 0> 3234 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3235 OpcodeStr, Dt, OpNode, Commutable> { 3236 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3237 OpcodeStr, !strconcat(Dt, "64"), 3238 v1i64, v1i64, OpNode, Commutable>; 3239 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3240 OpcodeStr, !strconcat(Dt, "64"), 3241 v2i64, v2i64, OpNode, Commutable>; 3242 } 3243 3244 3245 // Neon 3-register vector intrinsics. 3246 3247 // First with only element sizes of 16 and 32 bits: 3248 multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3249 InstrItinClass itinD16, InstrItinClass itinD32, 3250 InstrItinClass itinQ16, InstrItinClass itinQ32, 3251 string OpcodeStr, string Dt, 3252 SDPatternOperator IntOp, bit Commutable = 0> { 3253 // 64-bit vector types. 3254 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3255 OpcodeStr, !strconcat(Dt, "16"), 3256 v4i16, v4i16, IntOp, Commutable>; 3257 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3258 OpcodeStr, !strconcat(Dt, "32"), 3259 v2i32, v2i32, IntOp, Commutable>; 3260 3261 // 128-bit vector types. 3262 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3263 OpcodeStr, !strconcat(Dt, "16"), 3264 v8i16, v8i16, IntOp, Commutable>; 3265 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3266 OpcodeStr, !strconcat(Dt, "32"), 3267 v4i32, v4i32, IntOp, Commutable>; 3268 } 3269 multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3270 InstrItinClass itinD16, InstrItinClass itinD32, 3271 InstrItinClass itinQ16, InstrItinClass itinQ32, 3272 string OpcodeStr, string Dt, 3273 SDPatternOperator IntOp> { 3274 // 64-bit vector types. 3275 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3276 OpcodeStr, !strconcat(Dt, "16"), 3277 v4i16, v4i16, IntOp>; 3278 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3279 OpcodeStr, !strconcat(Dt, "32"), 3280 v2i32, v2i32, IntOp>; 3281 3282 // 128-bit vector types. 3283 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3284 OpcodeStr, !strconcat(Dt, "16"), 3285 v8i16, v8i16, IntOp>; 3286 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3287 OpcodeStr, !strconcat(Dt, "32"), 3288 v4i32, v4i32, IntOp>; 3289 } 3290 3291 multiclass N3VIntSL_HS<bits<4> op11_8, 3292 InstrItinClass itinD16, InstrItinClass itinD32, 3293 InstrItinClass itinQ16, InstrItinClass itinQ32, 3294 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3295 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3296 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3297 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3298 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3299 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3300 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3301 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3302 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3303 } 3304 3305 // ....then also with element size of 8 bits: 3306 multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3307 InstrItinClass itinD16, InstrItinClass itinD32, 3308 InstrItinClass itinQ16, InstrItinClass itinQ32, 3309 string OpcodeStr, string Dt, 3310 SDPatternOperator IntOp, bit Commutable = 0> 3311 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3312 OpcodeStr, Dt, IntOp, Commutable> { 3313 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3314 OpcodeStr, !strconcat(Dt, "8"), 3315 v8i8, v8i8, IntOp, Commutable>; 3316 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3317 OpcodeStr, !strconcat(Dt, "8"), 3318 v16i8, v16i8, IntOp, Commutable>; 3319 } 3320 multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3321 InstrItinClass itinD16, InstrItinClass itinD32, 3322 InstrItinClass itinQ16, InstrItinClass itinQ32, 3323 string OpcodeStr, string Dt, 3324 SDPatternOperator IntOp> 3325 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3326 OpcodeStr, Dt, IntOp> { 3327 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3328 OpcodeStr, !strconcat(Dt, "8"), 3329 v8i8, v8i8, IntOp>; 3330 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3331 OpcodeStr, !strconcat(Dt, "8"), 3332 v16i8, v16i8, IntOp>; 3333 } 3334 3335 3336 // ....then also with element size of 64 bits: 3337 multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3338 InstrItinClass itinD16, InstrItinClass itinD32, 3339 InstrItinClass itinQ16, InstrItinClass itinQ32, 3340 string OpcodeStr, string Dt, 3341 SDPatternOperator IntOp, bit Commutable = 0> 3342 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3343 OpcodeStr, Dt, IntOp, Commutable> { 3344 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3345 OpcodeStr, !strconcat(Dt, "64"), 3346 v1i64, v1i64, IntOp, Commutable>; 3347 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3348 OpcodeStr, !strconcat(Dt, "64"), 3349 v2i64, v2i64, IntOp, Commutable>; 3350 } 3351 multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3352 InstrItinClass itinD16, InstrItinClass itinD32, 3353 InstrItinClass itinQ16, InstrItinClass itinQ32, 3354 string OpcodeStr, string Dt, 3355 SDPatternOperator IntOp> 3356 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3357 OpcodeStr, Dt, IntOp> { 3358 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3359 OpcodeStr, !strconcat(Dt, "64"), 3360 v1i64, v1i64, IntOp>; 3361 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3362 OpcodeStr, !strconcat(Dt, "64"), 3363 v2i64, v2i64, IntOp>; 3364 } 3365 3366 // Neon Narrowing 3-register vector intrinsics, 3367 // source operand element sizes of 16, 32 and 64 bits: 3368 multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3369 string OpcodeStr, string Dt, 3370 SDPatternOperator IntOp, bit Commutable = 0> { 3371 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3372 OpcodeStr, !strconcat(Dt, "16"), 3373 v8i8, v8i16, IntOp, Commutable>; 3374 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3375 OpcodeStr, !strconcat(Dt, "32"), 3376 v4i16, v4i32, IntOp, Commutable>; 3377 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3378 OpcodeStr, !strconcat(Dt, "64"), 3379 v2i32, v2i64, IntOp, Commutable>; 3380 } 3381 3382 3383 // Neon Long 3-register vector operations. 3384 3385 multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3386 InstrItinClass itin16, InstrItinClass itin32, 3387 string OpcodeStr, string Dt, 3388 SDNode OpNode, bit Commutable = 0> { 3389 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3390 OpcodeStr, !strconcat(Dt, "8"), 3391 v8i16, v8i8, OpNode, Commutable>; 3392 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3393 OpcodeStr, !strconcat(Dt, "16"), 3394 v4i32, v4i16, OpNode, Commutable>; 3395 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3396 OpcodeStr, !strconcat(Dt, "32"), 3397 v2i64, v2i32, OpNode, Commutable>; 3398 } 3399 3400 multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3401 InstrItinClass itin, string OpcodeStr, string Dt, 3402 SDNode OpNode> { 3403 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3404 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3405 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3406 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3407 } 3408 3409 multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3410 InstrItinClass itin16, InstrItinClass itin32, 3411 string OpcodeStr, string Dt, 3412 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3413 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3414 OpcodeStr, !strconcat(Dt, "8"), 3415 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3416 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3417 OpcodeStr, !strconcat(Dt, "16"), 3418 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3419 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3420 OpcodeStr, !strconcat(Dt, "32"), 3421 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3422 } 3423 3424 // Neon Long 3-register vector intrinsics. 3425 3426 // First with only element sizes of 16 and 32 bits: 3427 multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3428 InstrItinClass itin16, InstrItinClass itin32, 3429 string OpcodeStr, string Dt, 3430 SDPatternOperator IntOp, bit Commutable = 0> { 3431 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3432 OpcodeStr, !strconcat(Dt, "16"), 3433 v4i32, v4i16, IntOp, Commutable>; 3434 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3435 OpcodeStr, !strconcat(Dt, "32"), 3436 v2i64, v2i32, IntOp, Commutable>; 3437 } 3438 3439 multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3440 InstrItinClass itin, string OpcodeStr, string Dt, 3441 SDPatternOperator IntOp> { 3442 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3443 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3444 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3445 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3446 } 3447 3448 // ....then also with element size of 8 bits: 3449 multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3450 InstrItinClass itin16, InstrItinClass itin32, 3451 string OpcodeStr, string Dt, 3452 SDPatternOperator IntOp, bit Commutable = 0> 3453 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3454 IntOp, Commutable> { 3455 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3456 OpcodeStr, !strconcat(Dt, "8"), 3457 v8i16, v8i8, IntOp, Commutable>; 3458 } 3459 3460 // ....with explicit extend (VABDL). 3461 multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3462 InstrItinClass itin, string OpcodeStr, string Dt, 3463 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3464 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3465 OpcodeStr, !strconcat(Dt, "8"), 3466 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3467 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3468 OpcodeStr, !strconcat(Dt, "16"), 3469 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3470 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3471 OpcodeStr, !strconcat(Dt, "32"), 3472 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3473 } 3474 3475 3476 // Neon Wide 3-register vector intrinsics, 3477 // source operand element sizes of 8, 16 and 32 bits: 3478 multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3479 string OpcodeStr, string Dt, 3480 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3481 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3482 OpcodeStr, !strconcat(Dt, "8"), 3483 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3484 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3485 OpcodeStr, !strconcat(Dt, "16"), 3486 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3487 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3488 OpcodeStr, !strconcat(Dt, "32"), 3489 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3490 } 3491 3492 3493 // Neon Multiply-Op vector operations, 3494 // element sizes of 8, 16 and 32 bits: 3495 multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3496 InstrItinClass itinD16, InstrItinClass itinD32, 3497 InstrItinClass itinQ16, InstrItinClass itinQ32, 3498 string OpcodeStr, string Dt, SDNode OpNode> { 3499 // 64-bit vector types. 3500 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3501 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3502 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3503 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3504 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3505 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3506 3507 // 128-bit vector types. 3508 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3509 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3510 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3511 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3512 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3513 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3514 } 3515 3516 multiclass N3VMulOpSL_HS<bits<4> op11_8, 3517 InstrItinClass itinD16, InstrItinClass itinD32, 3518 InstrItinClass itinQ16, InstrItinClass itinQ32, 3519 string OpcodeStr, string Dt, SDNode ShOp> { 3520 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3521 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3522 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3523 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3524 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3525 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3526 mul, ShOp>; 3527 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3528 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3529 mul, ShOp>; 3530 } 3531 3532 // Neon Intrinsic-Op vector operations, 3533 // element sizes of 8, 16 and 32 bits: 3534 multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3535 InstrItinClass itinD, InstrItinClass itinQ, 3536 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3537 SDNode OpNode> { 3538 // 64-bit vector types. 3539 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3540 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3541 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3542 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3543 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3544 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3545 3546 // 128-bit vector types. 3547 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3548 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3549 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3550 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3551 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3552 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3553 } 3554 3555 // Neon 3-argument intrinsics, 3556 // element sizes of 8, 16 and 32 bits: 3557 multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3558 InstrItinClass itinD, InstrItinClass itinQ, 3559 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3560 // 64-bit vector types. 3561 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, 3562 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3563 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, 3564 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3565 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, 3566 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3567 3568 // 128-bit vector types. 3569 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, 3570 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3571 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, 3572 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3573 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, 3574 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3575 } 3576 3577 3578 // Neon Long Multiply-Op vector operations, 3579 // element sizes of 8, 16 and 32 bits: 3580 multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3581 InstrItinClass itin16, InstrItinClass itin32, 3582 string OpcodeStr, string Dt, SDNode MulOp, 3583 SDNode OpNode> { 3584 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3585 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3586 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3587 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3588 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3589 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3590 } 3591 3592 multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3593 string Dt, SDNode MulOp, SDNode OpNode> { 3594 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3595 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3596 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3597 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3598 } 3599 3600 3601 // Neon Long 3-argument intrinsics. 3602 3603 // First with only element sizes of 16 and 32 bits: 3604 multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3605 InstrItinClass itin16, InstrItinClass itin32, 3606 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3607 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3608 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3609 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3610 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3611 } 3612 3613 multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3614 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3615 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3616 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3617 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3618 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3619 } 3620 3621 // ....then also with element size of 8 bits: 3622 multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3623 InstrItinClass itin16, InstrItinClass itin32, 3624 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3625 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3626 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3627 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3628 } 3629 3630 // ....with explicit extend (VABAL). 3631 multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3632 InstrItinClass itin, string OpcodeStr, string Dt, 3633 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3634 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3635 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3636 IntOp, ExtOp, OpNode>; 3637 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3638 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3639 IntOp, ExtOp, OpNode>; 3640 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3641 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3642 IntOp, ExtOp, OpNode>; 3643 } 3644 3645 3646 // Neon Pairwise long 2-register intrinsics, 3647 // element sizes of 8, 16 and 32 bits: 3648 multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3649 bits<5> op11_7, bit op4, 3650 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3651 // 64-bit vector types. 3652 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3653 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3654 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3655 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3656 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3657 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3658 3659 // 128-bit vector types. 3660 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3661 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3662 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3663 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3664 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3665 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3666 } 3667 3668 3669 // Neon Pairwise long 2-register accumulate intrinsics, 3670 // element sizes of 8, 16 and 32 bits: 3671 multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3672 bits<5> op11_7, bit op4, 3673 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3674 // 64-bit vector types. 3675 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3676 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3677 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3678 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3679 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3680 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3681 3682 // 128-bit vector types. 3683 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3684 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3685 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3686 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3687 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3688 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3689 } 3690 3691 3692 // Neon 2-register vector shift by immediate, 3693 // with f of either N2RegVShLFrm or N2RegVShRFrm 3694 // element sizes of 8, 16, 32 and 64 bits: 3695 multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3696 InstrItinClass itin, string OpcodeStr, string Dt, 3697 SDNode OpNode> { 3698 // 64-bit vector types. 3699 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3700 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3701 let Inst{21-19} = 0b001; // imm6 = 001xxx 3702 } 3703 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3704 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3705 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3706 } 3707 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3708 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3709 let Inst{21} = 0b1; // imm6 = 1xxxxx 3710 } 3711 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3712 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3713 // imm6 = xxxxxx 3714 3715 // 128-bit vector types. 3716 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3717 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3718 let Inst{21-19} = 0b001; // imm6 = 001xxx 3719 } 3720 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3721 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3722 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3723 } 3724 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3725 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3726 let Inst{21} = 0b1; // imm6 = 1xxxxx 3727 } 3728 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3729 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3730 // imm6 = xxxxxx 3731 } 3732 multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3733 InstrItinClass itin, string OpcodeStr, string Dt, 3734 string baseOpc, SDNode OpNode> { 3735 // 64-bit vector types. 3736 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3737 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3738 let Inst{21-19} = 0b001; // imm6 = 001xxx 3739 } 3740 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3741 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3742 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3743 } 3744 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3745 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3746 let Inst{21} = 0b1; // imm6 = 1xxxxx 3747 } 3748 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3749 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3750 // imm6 = xxxxxx 3751 3752 // 128-bit vector types. 3753 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3754 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3755 let Inst{21-19} = 0b001; // imm6 = 001xxx 3756 } 3757 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3758 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3759 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3760 } 3761 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3762 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3763 let Inst{21} = 0b1; // imm6 = 1xxxxx 3764 } 3765 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3766 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3767 // imm6 = xxxxxx 3768 } 3769 3770 // Neon Shift-Accumulate vector operations, 3771 // element sizes of 8, 16, 32 and 64 bits: 3772 multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3773 string OpcodeStr, string Dt, SDNode ShOp> { 3774 // 64-bit vector types. 3775 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3776 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 3777 let Inst{21-19} = 0b001; // imm6 = 001xxx 3778 } 3779 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3780 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 3781 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3782 } 3783 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3784 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 3785 let Inst{21} = 0b1; // imm6 = 1xxxxx 3786 } 3787 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3788 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 3789 // imm6 = xxxxxx 3790 3791 // 128-bit vector types. 3792 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3793 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 3794 let Inst{21-19} = 0b001; // imm6 = 001xxx 3795 } 3796 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3797 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 3798 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3799 } 3800 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3801 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 3802 let Inst{21} = 0b1; // imm6 = 1xxxxx 3803 } 3804 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3805 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 3806 // imm6 = xxxxxx 3807 } 3808 3809 // Neon Shift-Insert vector operations, 3810 // with f of either N2RegVShLFrm or N2RegVShRFrm 3811 // element sizes of 8, 16, 32 and 64 bits: 3812 multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3813 string OpcodeStr> { 3814 // 64-bit vector types. 3815 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3816 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> { 3817 let Inst{21-19} = 0b001; // imm6 = 001xxx 3818 } 3819 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3820 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> { 3821 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3822 } 3823 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3824 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> { 3825 let Inst{21} = 0b1; // imm6 = 1xxxxx 3826 } 3827 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 3828 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>; 3829 // imm6 = xxxxxx 3830 3831 // 128-bit vector types. 3832 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3833 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> { 3834 let Inst{21-19} = 0b001; // imm6 = 001xxx 3835 } 3836 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3837 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> { 3838 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3839 } 3840 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3841 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> { 3842 let Inst{21} = 0b1; // imm6 = 1xxxxx 3843 } 3844 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 3845 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>; 3846 // imm6 = xxxxxx 3847 } 3848 multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3849 string OpcodeStr> { 3850 // 64-bit vector types. 3851 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 3852 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> { 3853 let Inst{21-19} = 0b001; // imm6 = 001xxx 3854 } 3855 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 3856 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> { 3857 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3858 } 3859 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 3860 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> { 3861 let Inst{21} = 0b1; // imm6 = 1xxxxx 3862 } 3863 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 3864 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>; 3865 // imm6 = xxxxxx 3866 3867 // 128-bit vector types. 3868 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 3869 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> { 3870 let Inst{21-19} = 0b001; // imm6 = 001xxx 3871 } 3872 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 3873 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> { 3874 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3875 } 3876 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 3877 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> { 3878 let Inst{21} = 0b1; // imm6 = 1xxxxx 3879 } 3880 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 3881 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>; 3882 // imm6 = xxxxxx 3883 } 3884 3885 // Neon Shift Long operations, 3886 // element sizes of 8, 16, 32 bits: 3887 multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3888 bit op4, string OpcodeStr, string Dt, SDNode OpNode> { 3889 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3890 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 3891 let Inst{21-19} = 0b001; // imm6 = 001xxx 3892 } 3893 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3894 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 3895 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3896 } 3897 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3898 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 3899 let Inst{21} = 0b1; // imm6 = 1xxxxx 3900 } 3901 } 3902 3903 // Neon Shift Narrow operations, 3904 // element sizes of 16, 32, 64 bits: 3905 multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3906 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 3907 SDNode OpNode> { 3908 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3909 OpcodeStr, !strconcat(Dt, "16"), 3910 v8i8, v8i16, shr_imm8, OpNode> { 3911 let Inst{21-19} = 0b001; // imm6 = 001xxx 3912 } 3913 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3914 OpcodeStr, !strconcat(Dt, "32"), 3915 v4i16, v4i32, shr_imm16, OpNode> { 3916 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3917 } 3918 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3919 OpcodeStr, !strconcat(Dt, "64"), 3920 v2i32, v2i64, shr_imm32, OpNode> { 3921 let Inst{21} = 0b1; // imm6 = 1xxxxx 3922 } 3923 } 3924 3925 //===----------------------------------------------------------------------===// 3926 // Instruction Definitions. 3927 //===----------------------------------------------------------------------===// 3928 3929 // Vector Add Operations. 3930 3931 // VADD : Vector Add (integer and floating-point) 3932 defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 3933 add, 1>; 3934 def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 3935 v2f32, v2f32, fadd, 1>; 3936 def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 3937 v4f32, v4f32, fadd, 1>; 3938 // VADDL : Vector Add Long (Q = D + D) 3939 defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3940 "vaddl", "s", add, sext, 1>; 3941 defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3942 "vaddl", "u", add, zext, 1>; 3943 // VADDW : Vector Add Wide (Q = Q + D) 3944 defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 3945 defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 3946 // VHADD : Vector Halving Add 3947 defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 3948 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3949 "vhadd", "s", int_arm_neon_vhadds, 1>; 3950 defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 3951 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3952 "vhadd", "u", int_arm_neon_vhaddu, 1>; 3953 // VRHADD : Vector Rounding Halving Add 3954 defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 3955 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3956 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 3957 defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 3958 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3959 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 3960 // VQADD : Vector Saturating Add 3961 defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 3962 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3963 "vqadd", "s", int_arm_neon_vqadds, 1>; 3964 defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 3965 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3966 "vqadd", "u", int_arm_neon_vqaddu, 1>; 3967 // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 3968 defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", 3969 int_arm_neon_vaddhn, 1>; 3970 // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 3971 defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 3972 int_arm_neon_vraddhn, 1>; 3973 3974 // Vector Multiply Operations. 3975 3976 // VMUL : Vector Multiply (integer, polynomial and floating-point) 3977 defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 3978 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 3979 def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 3980 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 3981 def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 3982 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 3983 def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 3984 v2f32, v2f32, fmul, 1>; 3985 def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 3986 v4f32, v4f32, fmul, 1>; 3987 defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 3988 def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 3989 def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 3990 v2f32, fmul>; 3991 3992 def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 3993 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 3994 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 3995 (v4i16 (EXTRACT_SUBREG QPR:$src2, 3996 (DSubReg_i16_reg imm:$lane))), 3997 (SubReg_i16_lane imm:$lane)))>; 3998 def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 3999 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 4000 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4001 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4002 (DSubReg_i32_reg imm:$lane))), 4003 (SubReg_i32_lane imm:$lane)))>; 4004 def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4005 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 4006 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4007 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4008 (DSubReg_i32_reg imm:$lane))), 4009 (SubReg_i32_lane imm:$lane)))>; 4010 4011 // VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4012 defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4013 IIC_VMULi16Q, IIC_VMULi32Q, 4014 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4015 defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4016 IIC_VMULi16Q, IIC_VMULi32Q, 4017 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4018 def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4019 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4020 imm:$lane)))), 4021 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4022 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4023 (DSubReg_i16_reg imm:$lane))), 4024 (SubReg_i16_lane imm:$lane)))>; 4025 def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4026 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4027 imm:$lane)))), 4028 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4029 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4030 (DSubReg_i32_reg imm:$lane))), 4031 (SubReg_i32_lane imm:$lane)))>; 4032 4033 // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4034 defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4035 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4036 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4037 defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4038 IIC_VMULi16Q, IIC_VMULi32Q, 4039 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4040 def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4041 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4042 imm:$lane)))), 4043 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4044 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4045 (DSubReg_i16_reg imm:$lane))), 4046 (SubReg_i16_lane imm:$lane)))>; 4047 def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4048 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4049 imm:$lane)))), 4050 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4051 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4052 (DSubReg_i32_reg imm:$lane))), 4053 (SubReg_i32_lane imm:$lane)))>; 4054 4055 // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4056 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4057 "vmull", "s", NEONvmulls, 1>; 4058 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4059 "vmull", "u", NEONvmullu, 1>; 4060 def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4061 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4062 defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; 4063 defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; 4064 4065 // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4066 defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4067 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4068 defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4069 "vqdmull", "s", int_arm_neon_vqdmull>; 4070 4071 // Vector Multiply-Accumulate and Multiply-Subtract Operations. 4072 4073 // VMLA : Vector Multiply Accumulate (integer and floating-point) 4074 defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4075 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4076 def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4077 v2f32, fmul_su, fadd_mlx>, 4078 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4079 def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4080 v4f32, fmul_su, fadd_mlx>, 4081 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4082 defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4083 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4084 def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4085 v2f32, fmul_su, fadd_mlx>, 4086 Requires<[HasNEON, UseFPVMLx]>; 4087 def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4088 v4f32, v2f32, fmul_su, fadd_mlx>, 4089 Requires<[HasNEON, UseFPVMLx]>; 4090 4091 def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4092 (mul (v8i16 QPR:$src2), 4093 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4094 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4095 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4096 (DSubReg_i16_reg imm:$lane))), 4097 (SubReg_i16_lane imm:$lane)))>; 4098 4099 def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4100 (mul (v4i32 QPR:$src2), 4101 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4102 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4103 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4104 (DSubReg_i32_reg imm:$lane))), 4105 (SubReg_i32_lane imm:$lane)))>; 4106 4107 def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4108 (fmul_su (v4f32 QPR:$src2), 4109 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4110 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4111 (v4f32 QPR:$src2), 4112 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4113 (DSubReg_i32_reg imm:$lane))), 4114 (SubReg_i32_lane imm:$lane)))>, 4115 Requires<[HasNEON, UseFPVMLx]>; 4116 4117 // VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4118 defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4119 "vmlal", "s", NEONvmulls, add>; 4120 defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4121 "vmlal", "u", NEONvmullu, add>; 4122 4123 defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; 4124 defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; 4125 4126 // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4127 defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4128 "vqdmlal", "s", int_arm_neon_vqdmlal>; 4129 defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; 4130 4131 // VMLS : Vector Multiply Subtract (integer and floating-point) 4132 defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4133 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4134 def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4135 v2f32, fmul_su, fsub_mlx>, 4136 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4137 def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4138 v4f32, fmul_su, fsub_mlx>, 4139 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4140 defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4141 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4142 def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4143 v2f32, fmul_su, fsub_mlx>, 4144 Requires<[HasNEON, UseFPVMLx]>; 4145 def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4146 v4f32, v2f32, fmul_su, fsub_mlx>, 4147 Requires<[HasNEON, UseFPVMLx]>; 4148 4149 def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4150 (mul (v8i16 QPR:$src2), 4151 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4152 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4153 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4154 (DSubReg_i16_reg imm:$lane))), 4155 (SubReg_i16_lane imm:$lane)))>; 4156 4157 def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4158 (mul (v4i32 QPR:$src2), 4159 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4160 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4161 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4162 (DSubReg_i32_reg imm:$lane))), 4163 (SubReg_i32_lane imm:$lane)))>; 4164 4165 def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4166 (fmul_su (v4f32 QPR:$src2), 4167 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4168 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4169 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4170 (DSubReg_i32_reg imm:$lane))), 4171 (SubReg_i32_lane imm:$lane)))>, 4172 Requires<[HasNEON, UseFPVMLx]>; 4173 4174 // VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4175 defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4176 "vmlsl", "s", NEONvmulls, sub>; 4177 defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4178 "vmlsl", "u", NEONvmullu, sub>; 4179 4180 defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; 4181 defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; 4182 4183 // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4184 defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4185 "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 4186 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 4187 4188 // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4189 def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4190 v2f32, fmul_su, fadd_mlx>, 4191 Requires<[HasVFP4,UseFusedMAC]>; 4192 4193 def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4194 v4f32, fmul_su, fadd_mlx>, 4195 Requires<[HasVFP4,UseFusedMAC]>; 4196 4197 // Fused Vector Multiply Subtract (floating-point) 4198 def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4199 v2f32, fmul_su, fsub_mlx>, 4200 Requires<[HasVFP4,UseFusedMAC]>; 4201 def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4202 v4f32, fmul_su, fsub_mlx>, 4203 Requires<[HasVFP4,UseFusedMAC]>; 4204 4205 // Match @llvm.fma.* intrinsics 4206 def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4207 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4208 Requires<[HasVFP4]>; 4209 def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4210 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4211 Requires<[HasVFP4]>; 4212 def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4213 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4214 Requires<[HasVFP4]>; 4215 def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4216 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4217 Requires<[HasVFP4]>; 4218 4219 // Vector Subtract Operations. 4220 4221 // VSUB : Vector Subtract (integer and floating-point) 4222 defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 4223 "vsub", "i", sub, 0>; 4224 def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 4225 v2f32, v2f32, fsub, 0>; 4226 def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 4227 v4f32, v4f32, fsub, 0>; 4228 // VSUBL : Vector Subtract Long (Q = D - D) 4229 defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4230 "vsubl", "s", sub, sext, 0>; 4231 defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4232 "vsubl", "u", sub, zext, 0>; 4233 // VSUBW : Vector Subtract Wide (Q = Q - D) 4234 defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 4235 defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 4236 // VHSUB : Vector Halving Subtract 4237 defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 4238 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4239 "vhsub", "s", int_arm_neon_vhsubs, 0>; 4240 defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 4241 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4242 "vhsub", "u", int_arm_neon_vhsubu, 0>; 4243 // VQSUB : Vector Saturing Subtract 4244 defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 4245 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4246 "vqsub", "s", int_arm_neon_vqsubs, 0>; 4247 defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 4248 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4249 "vqsub", "u", int_arm_neon_vqsubu, 0>; 4250 // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 4251 defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", 4252 int_arm_neon_vsubhn, 0>; 4253 // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 4254 defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 4255 int_arm_neon_vrsubhn, 0>; 4256 4257 // Vector Comparisons. 4258 4259 // VCEQ : Vector Compare Equal 4260 defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4261 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 4262 def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 4263 NEONvceq, 1>; 4264 def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 4265 NEONvceq, 1>; 4266 4267 let TwoOperandAliasConstraint = "$Vm = $Vd" in 4268 defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 4269 "$Vd, $Vm, #0", NEONvceqz>; 4270 4271 // VCGE : Vector Compare Greater Than or Equal 4272 defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4273 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 4274 defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4275 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 4276 def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 4277 NEONvcge, 0>; 4278 def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 4279 NEONvcge, 0>; 4280 4281 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4282 defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 4283 "$Vd, $Vm, #0", NEONvcgez>; 4284 defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 4285 "$Vd, $Vm, #0", NEONvclez>; 4286 } 4287 4288 // VCGT : Vector Compare Greater Than 4289 defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4290 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 4291 defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4292 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 4293 def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 4294 NEONvcgt, 0>; 4295 def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 4296 NEONvcgt, 0>; 4297 4298 let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4299 defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 4300 "$Vd, $Vm, #0", NEONvcgtz>; 4301 defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 4302 "$Vd, $Vm, #0", NEONvcltz>; 4303 } 4304 4305 // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 4306 def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 4307 "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; 4308 def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 4309 "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; 4310 // VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 4311 def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 4312 "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; 4313 def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 4314 "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; 4315 // VTST : Vector Test Bits 4316 defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 4317 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 4318 4319 // Vector Bitwise Operations. 4320 4321 def vnotd : PatFrag<(ops node:$in), 4322 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 4323 def vnotq : PatFrag<(ops node:$in), 4324 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 4325 4326 4327 // VAND : Vector Bitwise AND 4328 def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 4329 v2i32, v2i32, and, 1>; 4330 def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 4331 v4i32, v4i32, and, 1>; 4332 4333 // VEOR : Vector Bitwise Exclusive OR 4334 def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 4335 v2i32, v2i32, xor, 1>; 4336 def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 4337 v4i32, v4i32, xor, 1>; 4338 4339 // VORR : Vector Bitwise OR 4340 def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 4341 v2i32, v2i32, or, 1>; 4342 def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 4343 v4i32, v4i32, or, 1>; 4344 4345 def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 4346 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4347 IIC_VMOVImm, 4348 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4349 [(set DPR:$Vd, 4350 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4351 let Inst{9} = SIMM{9}; 4352 } 4353 4354 def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 4355 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4356 IIC_VMOVImm, 4357 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4358 [(set DPR:$Vd, 4359 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4360 let Inst{10-9} = SIMM{10-9}; 4361 } 4362 4363 def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 4364 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4365 IIC_VMOVImm, 4366 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4367 [(set QPR:$Vd, 4368 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4369 let Inst{9} = SIMM{9}; 4370 } 4371 4372 def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 4373 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4374 IIC_VMOVImm, 4375 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4376 [(set QPR:$Vd, 4377 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4378 let Inst{10-9} = SIMM{10-9}; 4379 } 4380 4381 4382 // VBIC : Vector Bitwise Bit Clear (AND NOT) 4383 let TwoOperandAliasConstraint = "$Vn = $Vd" in { 4384 def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4385 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4386 "vbic", "$Vd, $Vn, $Vm", "", 4387 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 4388 (vnotd DPR:$Vm))))]>; 4389 def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4390 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4391 "vbic", "$Vd, $Vn, $Vm", "", 4392 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 4393 (vnotq QPR:$Vm))))]>; 4394 } 4395 4396 def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 4397 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4398 IIC_VMOVImm, 4399 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4400 [(set DPR:$Vd, 4401 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4402 let Inst{9} = SIMM{9}; 4403 } 4404 4405 def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 4406 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4407 IIC_VMOVImm, 4408 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4409 [(set DPR:$Vd, 4410 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4411 let Inst{10-9} = SIMM{10-9}; 4412 } 4413 4414 def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 4415 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4416 IIC_VMOVImm, 4417 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4418 [(set QPR:$Vd, 4419 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4420 let Inst{9} = SIMM{9}; 4421 } 4422 4423 def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 4424 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4425 IIC_VMOVImm, 4426 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4427 [(set QPR:$Vd, 4428 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4429 let Inst{10-9} = SIMM{10-9}; 4430 } 4431 4432 // VORN : Vector Bitwise OR NOT 4433 def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 4434 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4435 "vorn", "$Vd, $Vn, $Vm", "", 4436 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 4437 (vnotd DPR:$Vm))))]>; 4438 def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 4439 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4440 "vorn", "$Vd, $Vn, $Vm", "", 4441 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 4442 (vnotq QPR:$Vm))))]>; 4443 4444 // VMVN : Vector Bitwise NOT (Immediate) 4445 4446 let isReMaterializable = 1 in { 4447 4448 def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 4449 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4450 "vmvn", "i16", "$Vd, $SIMM", "", 4451 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { 4452 let Inst{9} = SIMM{9}; 4453 } 4454 4455 def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 4456 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4457 "vmvn", "i16", "$Vd, $SIMM", "", 4458 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { 4459 let Inst{9} = SIMM{9}; 4460 } 4461 4462 def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 4463 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4464 "vmvn", "i32", "$Vd, $SIMM", "", 4465 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { 4466 let Inst{11-8} = SIMM{11-8}; 4467 } 4468 4469 def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 4470 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4471 "vmvn", "i32", "$Vd, $SIMM", "", 4472 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { 4473 let Inst{11-8} = SIMM{11-8}; 4474 } 4475 } 4476 4477 // VMVN : Vector Bitwise NOT 4478 def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 4479 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 4480 "vmvn", "$Vd, $Vm", "", 4481 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 4482 def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 4483 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 4484 "vmvn", "$Vd, $Vm", "", 4485 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 4486 def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 4487 def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 4488 4489 // VBSL : Vector Bitwise Select 4490 def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4491 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4492 N3RegFrm, IIC_VCNTiD, 4493 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4494 [(set DPR:$Vd, 4495 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 4496 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 4497 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 4498 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4499 Requires<[HasNEON]>; 4500 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 4501 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 4502 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4503 Requires<[HasNEON]>; 4504 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 4505 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 4506 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4507 Requires<[HasNEON]>; 4508 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 4509 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 4510 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4511 Requires<[HasNEON]>; 4512 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 4513 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 4514 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4515 Requires<[HasNEON]>; 4516 4517 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 4518 (and DPR:$Vm, (vnotd DPR:$Vd)))), 4519 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 4520 Requires<[HasNEON]>; 4521 4522 def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 4523 (and DPR:$Vm, (vnotd DPR:$Vd)))), 4524 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 4525 Requires<[HasNEON]>; 4526 4527 def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4528 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4529 N3RegFrm, IIC_VCNTiQ, 4530 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4531 [(set QPR:$Vd, 4532 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 4533 4534 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 4535 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 4536 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4537 Requires<[HasNEON]>; 4538 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 4539 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 4540 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4541 Requires<[HasNEON]>; 4542 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 4543 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 4544 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4545 Requires<[HasNEON]>; 4546 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 4547 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 4548 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4549 Requires<[HasNEON]>; 4550 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 4551 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 4552 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4553 Requires<[HasNEON]>; 4554 4555 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 4556 (and QPR:$Vm, (vnotq QPR:$Vd)))), 4557 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 4558 Requires<[HasNEON]>; 4559 def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 4560 (and QPR:$Vm, (vnotq QPR:$Vd)))), 4561 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 4562 Requires<[HasNEON]>; 4563 4564 // VBIF : Vector Bitwise Insert if False 4565 // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 4566 // FIXME: This instruction's encoding MAY NOT BE correct. 4567 def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 4568 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4569 N3RegFrm, IIC_VBINiD, 4570 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4571 []>; 4572 def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 4573 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4574 N3RegFrm, IIC_VBINiQ, 4575 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4576 []>; 4577 4578 // VBIT : Vector Bitwise Insert if True 4579 // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 4580 // FIXME: This instruction's encoding MAY NOT BE correct. 4581 def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 4582 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4583 N3RegFrm, IIC_VBINiD, 4584 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4585 []>; 4586 def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 4587 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4588 N3RegFrm, IIC_VBINiQ, 4589 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4590 []>; 4591 4592 // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 4593 // for equivalent operations with different register constraints; it just 4594 // inserts copies. 4595 4596 // Vector Absolute Differences. 4597 4598 // VABD : Vector Absolute Difference 4599 defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 4600 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4601 "vabd", "s", int_arm_neon_vabds, 1>; 4602 defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 4603 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4604 "vabd", "u", int_arm_neon_vabdu, 1>; 4605 def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 4606 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 4607 def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 4608 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 4609 4610 // VABDL : Vector Absolute Difference Long (Q = | D - D |) 4611 defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 4612 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 4613 defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 4614 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 4615 4616 // VABA : Vector Absolute Difference and Accumulate 4617 defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4618 "vaba", "s", int_arm_neon_vabds, add>; 4619 defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4620 "vaba", "u", int_arm_neon_vabdu, add>; 4621 4622 // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 4623 defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 4624 "vabal", "s", int_arm_neon_vabds, zext, add>; 4625 defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 4626 "vabal", "u", int_arm_neon_vabdu, zext, add>; 4627 4628 // Vector Maximum and Minimum. 4629 4630 // VMAX : Vector Maximum 4631 defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 4632 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4633 "vmax", "s", int_arm_neon_vmaxs, 1>; 4634 defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 4635 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4636 "vmax", "u", int_arm_neon_vmaxu, 1>; 4637 def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 4638 "vmax", "f32", 4639 v2f32, v2f32, int_arm_neon_vmaxs, 1>; 4640 def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4641 "vmax", "f32", 4642 v4f32, v4f32, int_arm_neon_vmaxs, 1>; 4643 4644 // VMIN : Vector Minimum 4645 defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 4646 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4647 "vmin", "s", int_arm_neon_vmins, 1>; 4648 defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 4649 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4650 "vmin", "u", int_arm_neon_vminu, 1>; 4651 def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 4652 "vmin", "f32", 4653 v2f32, v2f32, int_arm_neon_vmins, 1>; 4654 def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4655 "vmin", "f32", 4656 v4f32, v4f32, int_arm_neon_vmins, 1>; 4657 4658 // Vector Pairwise Operations. 4659 4660 // VPADD : Vector Pairwise Add 4661 def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4662 "vpadd", "i8", 4663 v8i8, v8i8, int_arm_neon_vpadd, 0>; 4664 def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4665 "vpadd", "i16", 4666 v4i16, v4i16, int_arm_neon_vpadd, 0>; 4667 def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4668 "vpadd", "i32", 4669 v2i32, v2i32, int_arm_neon_vpadd, 0>; 4670 def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 4671 IIC_VPBIND, "vpadd", "f32", 4672 v2f32, v2f32, int_arm_neon_vpadd, 0>; 4673 4674 // VPADDL : Vector Pairwise Add Long 4675 defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 4676 int_arm_neon_vpaddls>; 4677 defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 4678 int_arm_neon_vpaddlu>; 4679 4680 // VPADAL : Vector Pairwise Add and Accumulate Long 4681 defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 4682 int_arm_neon_vpadals>; 4683 defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 4684 int_arm_neon_vpadalu>; 4685 4686 // VPMAX : Vector Pairwise Maximum 4687 def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4688 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 4689 def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4690 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 4691 def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4692 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 4693 def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4694 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 4695 def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4696 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 4697 def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4698 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 4699 def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 4700 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 4701 4702 // VPMIN : Vector Pairwise Minimum 4703 def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4704 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 4705 def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4706 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 4707 def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4708 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 4709 def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4710 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 4711 def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4712 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 4713 def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4714 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 4715 def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 4716 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 4717 4718 // Vector Reciprocal and Reciprocal Square Root Estimate and Step. 4719 4720 // VRECPE : Vector Reciprocal Estimate 4721 def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4722 IIC_VUNAD, "vrecpe", "u32", 4723 v2i32, v2i32, int_arm_neon_vrecpe>; 4724 def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4725 IIC_VUNAQ, "vrecpe", "u32", 4726 v4i32, v4i32, int_arm_neon_vrecpe>; 4727 def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4728 IIC_VUNAD, "vrecpe", "f32", 4729 v2f32, v2f32, int_arm_neon_vrecpe>; 4730 def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4731 IIC_VUNAQ, "vrecpe", "f32", 4732 v4f32, v4f32, int_arm_neon_vrecpe>; 4733 4734 // VRECPS : Vector Reciprocal Step 4735 def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4736 IIC_VRECSD, "vrecps", "f32", 4737 v2f32, v2f32, int_arm_neon_vrecps, 1>; 4738 def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4739 IIC_VRECSQ, "vrecps", "f32", 4740 v4f32, v4f32, int_arm_neon_vrecps, 1>; 4741 4742 // VRSQRTE : Vector Reciprocal Square Root Estimate 4743 def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 4744 IIC_VUNAD, "vrsqrte", "u32", 4745 v2i32, v2i32, int_arm_neon_vrsqrte>; 4746 def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 4747 IIC_VUNAQ, "vrsqrte", "u32", 4748 v4i32, v4i32, int_arm_neon_vrsqrte>; 4749 def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 4750 IIC_VUNAD, "vrsqrte", "f32", 4751 v2f32, v2f32, int_arm_neon_vrsqrte>; 4752 def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 4753 IIC_VUNAQ, "vrsqrte", "f32", 4754 v4f32, v4f32, int_arm_neon_vrsqrte>; 4755 4756 // VRSQRTS : Vector Reciprocal Square Root Step 4757 def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 4758 IIC_VRECSD, "vrsqrts", "f32", 4759 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 4760 def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 4761 IIC_VRECSQ, "vrsqrts", "f32", 4762 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 4763 4764 // Vector Shifts. 4765 4766 // VSHL : Vector Shift 4767 defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 4768 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 4769 "vshl", "s", int_arm_neon_vshifts>; 4770 defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 4771 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 4772 "vshl", "u", int_arm_neon_vshiftu>; 4773 4774 // VSHL : Vector Shift Left (Immediate) 4775 defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; 4776 4777 // VSHR : Vector Shift Right (Immediate) 4778 defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", 4779 NEONvshrs>; 4780 defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", 4781 NEONvshru>; 4782 4783 // VSHLL : Vector Shift Left Long 4784 defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; 4785 defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; 4786 4787 // VSHLL : Vector Shift Left Long (with maximum shift count) 4788 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 4789 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 4790 ValueType OpTy, Operand ImmTy, SDNode OpNode> 4791 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 4792 ResTy, OpTy, ImmTy, OpNode> { 4793 let Inst{21-16} = op21_16; 4794 let DecoderMethod = "DecodeVSHLMaxInstruction"; 4795 } 4796 def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 4797 v8i16, v8i8, imm8, NEONvshlli>; 4798 def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 4799 v4i32, v4i16, imm16, NEONvshlli>; 4800 def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 4801 v2i64, v2i32, imm32, NEONvshlli>; 4802 4803 // VSHRN : Vector Shift Right and Narrow 4804 defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 4805 NEONvshrn>; 4806 4807 // VRSHL : Vector Rounding Shift 4808 defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 4809 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4810 "vrshl", "s", int_arm_neon_vrshifts>; 4811 defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 4812 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4813 "vrshl", "u", int_arm_neon_vrshiftu>; 4814 // VRSHR : Vector Rounding Shift Right 4815 defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", 4816 NEONvrshrs>; 4817 defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", 4818 NEONvrshru>; 4819 4820 // VRSHRN : Vector Rounding Shift Right and Narrow 4821 defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 4822 NEONvrshrn>; 4823 4824 // VQSHL : Vector Saturating Shift 4825 defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 4826 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4827 "vqshl", "s", int_arm_neon_vqshifts>; 4828 defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 4829 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4830 "vqshl", "u", int_arm_neon_vqshiftu>; 4831 // VQSHL : Vector Saturating Shift Left (Immediate) 4832 defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; 4833 defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; 4834 4835 // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 4836 defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; 4837 4838 // VQSHRN : Vector Saturating Shift Right and Narrow 4839 defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 4840 NEONvqshrns>; 4841 defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 4842 NEONvqshrnu>; 4843 4844 // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 4845 defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 4846 NEONvqshrnsu>; 4847 4848 // VQRSHL : Vector Saturating Rounding Shift 4849 defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 4850 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4851 "vqrshl", "s", int_arm_neon_vqrshifts>; 4852 defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 4853 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4854 "vqrshl", "u", int_arm_neon_vqrshiftu>; 4855 4856 // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 4857 defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 4858 NEONvqrshrns>; 4859 defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 4860 NEONvqrshrnu>; 4861 4862 // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 4863 defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 4864 NEONvqrshrnsu>; 4865 4866 // VSRA : Vector Shift Right and Accumulate 4867 defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 4868 defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 4869 // VRSRA : Vector Rounding Shift Right and Accumulate 4870 defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 4871 defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 4872 4873 // VSLI : Vector Shift Left and Insert 4874 defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 4875 4876 // VSRI : Vector Shift Right and Insert 4877 defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 4878 4879 // Vector Absolute and Saturating Absolute. 4880 4881 // VABS : Vector Absolute Value 4882 defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 4883 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", 4884 int_arm_neon_vabs>; 4885 def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 4886 "vabs", "f32", 4887 v2f32, v2f32, fabs>; 4888 def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 4889 "vabs", "f32", 4890 v4f32, v4f32, fabs>; 4891 4892 def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>; 4893 def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>; 4894 4895 // VQABS : Vector Saturating Absolute Value 4896 defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 4897 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 4898 int_arm_neon_vqabs>; 4899 4900 // Vector Negate. 4901 4902 def vnegd : PatFrag<(ops node:$in), 4903 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 4904 def vnegq : PatFrag<(ops node:$in), 4905 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 4906 4907 class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 4908 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 4909 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 4910 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 4911 class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 4912 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 4913 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 4914 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 4915 4916 // VNEG : Vector Negate (integer) 4917 def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 4918 def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 4919 def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 4920 def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 4921 def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 4922 def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 4923 4924 // VNEG : Vector Negate (floating-point) 4925 def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 4926 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 4927 "vneg", "f32", "$Vd, $Vm", "", 4928 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 4929 def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 4930 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 4931 "vneg", "f32", "$Vd, $Vm", "", 4932 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 4933 4934 def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 4935 def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 4936 def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 4937 def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 4938 def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 4939 def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 4940 4941 // VQNEG : Vector Saturating Negate 4942 defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 4943 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 4944 int_arm_neon_vqneg>; 4945 4946 // Vector Bit Counting Operations. 4947 4948 // VCLS : Vector Count Leading Sign Bits 4949 defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 4950 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 4951 int_arm_neon_vcls>; 4952 // VCLZ : Vector Count Leading Zeros 4953 defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 4954 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 4955 ctlz>; 4956 // VCNT : Vector Count One Bits 4957 def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 4958 IIC_VCNTiD, "vcnt", "8", 4959 v8i8, v8i8, ctpop>; 4960 def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 4961 IIC_VCNTiQ, "vcnt", "8", 4962 v16i8, v16i8, ctpop>; 4963 4964 // Vector Swap 4965 def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 4966 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 4967 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 4968 []>; 4969 def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 4970 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 4971 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 4972 []>; 4973 4974 // Vector Move Operations. 4975 4976 // VMOV : Vector Move (Register) 4977 def : InstAlias<"vmov${p} $Vd, $Vm", 4978 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 4979 def : InstAlias<"vmov${p} $Vd, $Vm", 4980 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 4981 4982 // VMOV : Vector Move (Immediate) 4983 4984 let isReMaterializable = 1 in { 4985 def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 4986 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 4987 "vmov", "i8", "$Vd, $SIMM", "", 4988 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; 4989 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 4990 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 4991 "vmov", "i8", "$Vd, $SIMM", "", 4992 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; 4993 4994 def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 4995 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4996 "vmov", "i16", "$Vd, $SIMM", "", 4997 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { 4998 let Inst{9} = SIMM{9}; 4999 } 5000 5001 def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 5002 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5003 "vmov", "i16", "$Vd, $SIMM", "", 5004 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { 5005 let Inst{9} = SIMM{9}; 5006 } 5007 5008 def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 5009 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5010 "vmov", "i32", "$Vd, $SIMM", "", 5011 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { 5012 let Inst{11-8} = SIMM{11-8}; 5013 } 5014 5015 def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 5016 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5017 "vmov", "i32", "$Vd, $SIMM", "", 5018 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { 5019 let Inst{11-8} = SIMM{11-8}; 5020 } 5021 5022 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 5023 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5024 "vmov", "i64", "$Vd, $SIMM", "", 5025 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; 5026 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 5027 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5028 "vmov", "i64", "$Vd, $SIMM", "", 5029 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; 5030 5031 def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 5032 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5033 "vmov", "f32", "$Vd, $SIMM", "", 5034 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; 5035 def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 5036 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5037 "vmov", "f32", "$Vd, $SIMM", "", 5038 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; 5039 } // isReMaterializable 5040 5041 // VMOV : Vector Get Lane (move scalar to ARM core register) 5042 5043 def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 5044 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5045 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 5046 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), 5047 imm:$lane))]> { 5048 let Inst{21} = lane{2}; 5049 let Inst{6-5} = lane{1-0}; 5050 } 5051 def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 5052 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5053 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 5054 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), 5055 imm:$lane))]> { 5056 let Inst{21} = lane{1}; 5057 let Inst{6} = lane{0}; 5058 } 5059 def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 5060 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5061 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 5062 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), 5063 imm:$lane))]> { 5064 let Inst{21} = lane{2}; 5065 let Inst{6-5} = lane{1-0}; 5066 } 5067 def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 5068 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5069 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 5070 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), 5071 imm:$lane))]> { 5072 let Inst{21} = lane{1}; 5073 let Inst{6} = lane{0}; 5074 } 5075 def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 5076 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 5077 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 5078 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 5079 imm:$lane))]>, 5080 Requires<[HasNEON, HasFastVGETLNi32]> { 5081 let Inst{21} = lane{0}; 5082 } 5083 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 5084 def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 5085 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5086 (DSubReg_i8_reg imm:$lane))), 5087 (SubReg_i8_lane imm:$lane))>; 5088 def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 5089 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5090 (DSubReg_i16_reg imm:$lane))), 5091 (SubReg_i16_lane imm:$lane))>; 5092 def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 5093 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5094 (DSubReg_i8_reg imm:$lane))), 5095 (SubReg_i8_lane imm:$lane))>; 5096 def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 5097 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5098 (DSubReg_i16_reg imm:$lane))), 5099 (SubReg_i16_lane imm:$lane))>; 5100 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5101 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 5102 (DSubReg_i32_reg imm:$lane))), 5103 (SubReg_i32_lane imm:$lane))>, 5104 Requires<[HasNEON, HasFastVGETLNi32]>; 5105 def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 5106 (COPY_TO_REGCLASS 5107 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5108 Requires<[HasNEON, HasSlowVGETLNi32]>; 5109 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5110 (COPY_TO_REGCLASS 5111 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5112 Requires<[HasNEON, HasSlowVGETLNi32]>; 5113 def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 5114 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 5115 (SSubReg_f32_reg imm:$src2))>; 5116 def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 5117 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 5118 (SSubReg_f32_reg imm:$src2))>; 5119 //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 5120 // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5121 def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 5122 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5123 5124 5125 // VMOV : Vector Set Lane (move ARM core register to scalar) 5126 5127 let Constraints = "$src1 = $V" in { 5128 def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 5129 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 5130 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 5131 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 5132 GPR:$R, imm:$lane))]> { 5133 let Inst{21} = lane{2}; 5134 let Inst{6-5} = lane{1-0}; 5135 } 5136 def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 5137 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 5138 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 5139 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 5140 GPR:$R, imm:$lane))]> { 5141 let Inst{21} = lane{1}; 5142 let Inst{6} = lane{0}; 5143 } 5144 def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 5145 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 5146 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 5147 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 5148 GPR:$R, imm:$lane))]> { 5149 let Inst{21} = lane{0}; 5150 } 5151 } 5152 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 5153 (v16i8 (INSERT_SUBREG QPR:$src1, 5154 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 5155 (DSubReg_i8_reg imm:$lane))), 5156 GPR:$src2, (SubReg_i8_lane imm:$lane))), 5157 (DSubReg_i8_reg imm:$lane)))>; 5158 def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 5159 (v8i16 (INSERT_SUBREG QPR:$src1, 5160 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 5161 (DSubReg_i16_reg imm:$lane))), 5162 GPR:$src2, (SubReg_i16_lane imm:$lane))), 5163 (DSubReg_i16_reg imm:$lane)))>; 5164 def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 5165 (v4i32 (INSERT_SUBREG QPR:$src1, 5166 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 5167 (DSubReg_i32_reg imm:$lane))), 5168 GPR:$src2, (SubReg_i32_lane imm:$lane))), 5169 (DSubReg_i32_reg imm:$lane)))>; 5170 5171 def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 5172 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 5173 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5174 def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 5175 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 5176 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5177 5178 //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5179 // (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5180 def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5181 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5182 5183 def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 5184 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5185 def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 5186 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 5187 def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 5188 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5189 5190 def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 5191 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5192 def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 5193 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5194 def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 5195 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5196 5197 def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 5198 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 5199 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5200 dsub_0)>; 5201 def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 5202 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 5203 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5204 dsub_0)>; 5205 def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 5206 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 5207 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5208 dsub_0)>; 5209 5210 // VDUP : Vector Duplicate (from ARM core register to all elements) 5211 5212 class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5213 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 5214 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5215 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5216 class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5217 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 5218 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5219 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5220 5221 def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 5222 def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 5223 def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 5224 Requires<[HasNEON, HasFastVDUP32]>; 5225 def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 5226 def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 5227 def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 5228 5229 // NEONvdup patterns for uarchs with fast VDUP.32. 5230 def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 5231 Requires<[HasNEON,HasFastVDUP32]>; 5232 def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; 5233 5234 // NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 5235 def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 5236 Requires<[HasNEON,HasSlowVDUP32]>; 5237 def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 5238 Requires<[HasNEON,HasSlowVDUP32]>; 5239 5240 // VDUP : Vector Duplicate Lane (from scalar to all elements) 5241 5242 class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 5243 ValueType Ty, Operand IdxTy> 5244 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 5245 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 5246 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>; 5247 5248 class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 5249 ValueType ResTy, ValueType OpTy, Operand IdxTy> 5250 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 5251 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 5252 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm), 5253 VectorIndex32:$lane)))]>; 5254 5255 // Inst{19-16} is partially specified depending on the element size. 5256 5257 def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 5258 bits<3> lane; 5259 let Inst{19-17} = lane{2-0}; 5260 } 5261 def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 5262 bits<2> lane; 5263 let Inst{19-18} = lane{1-0}; 5264 } 5265 def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 5266 bits<1> lane; 5267 let Inst{19} = lane{0}; 5268 } 5269 def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 5270 bits<3> lane; 5271 let Inst{19-17} = lane{2-0}; 5272 } 5273 def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 5274 bits<2> lane; 5275 let Inst{19-18} = lane{1-0}; 5276 } 5277 def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 5278 bits<1> lane; 5279 let Inst{19} = lane{0}; 5280 } 5281 5282 def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 5283 (VDUPLN32d DPR:$Vm, imm:$lane)>; 5284 5285 def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 5286 (VDUPLN32q DPR:$Vm, imm:$lane)>; 5287 5288 def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 5289 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 5290 (DSubReg_i8_reg imm:$lane))), 5291 (SubReg_i8_lane imm:$lane)))>; 5292 def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 5293 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 5294 (DSubReg_i16_reg imm:$lane))), 5295 (SubReg_i16_lane imm:$lane)))>; 5296 def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 5297 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 5298 (DSubReg_i32_reg imm:$lane))), 5299 (SubReg_i32_lane imm:$lane)))>; 5300 def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 5301 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 5302 (DSubReg_i32_reg imm:$lane))), 5303 (SubReg_i32_lane imm:$lane)))>; 5304 5305 def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 5306 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; 5307 def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 5308 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; 5309 5310 // VMOVN : Vector Narrowing Move 5311 defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 5312 "vmovn", "i", trunc>; 5313 // VQMOVN : Vector Saturating Narrowing Move 5314 defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 5315 "vqmovn", "s", int_arm_neon_vqmovns>; 5316 defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 5317 "vqmovn", "u", int_arm_neon_vqmovnu>; 5318 defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 5319 "vqmovun", "s", int_arm_neon_vqmovnsu>; 5320 // VMOVL : Vector Lengthening Move 5321 defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 5322 defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 5323 def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 5324 def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 5325 def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 5326 5327 // Vector Conversions. 5328 5329 // VCVT : Vector Convert Between Floating-Point and Integers 5330 def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 5331 v2i32, v2f32, fp_to_sint>; 5332 def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 5333 v2i32, v2f32, fp_to_uint>; 5334 def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 5335 v2f32, v2i32, sint_to_fp>; 5336 def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 5337 v2f32, v2i32, uint_to_fp>; 5338 5339 def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 5340 v4i32, v4f32, fp_to_sint>; 5341 def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 5342 v4i32, v4f32, fp_to_uint>; 5343 def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 5344 v4f32, v4i32, sint_to_fp>; 5345 def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 5346 v4f32, v4i32, uint_to_fp>; 5347 5348 // VCVT : Vector Convert Between Floating-Point and Fixed-Point. 5349 let DecoderMethod = "DecodeVCVTD" in { 5350 def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 5351 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 5352 def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 5353 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 5354 def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 5355 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 5356 def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 5357 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 5358 } 5359 5360 let DecoderMethod = "DecodeVCVTQ" in { 5361 def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 5362 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 5363 def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 5364 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 5365 def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 5366 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 5367 def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 5368 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 5369 } 5370 5371 // VCVT : Vector Convert Between Half-Precision and Single-Precision. 5372 def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 5373 IIC_VUNAQ, "vcvt", "f16.f32", 5374 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 5375 Requires<[HasNEON, HasFP16]>; 5376 def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 5377 IIC_VUNAQ, "vcvt", "f32.f16", 5378 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 5379 Requires<[HasNEON, HasFP16]>; 5380 5381 // Vector Reverse. 5382 5383 // VREV64 : Vector Reverse elements within 64-bit doublewords 5384 5385 class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5386 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 5387 (ins DPR:$Vm), IIC_VMOVD, 5388 OpcodeStr, Dt, "$Vd, $Vm", "", 5389 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>; 5390 class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5391 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 5392 (ins QPR:$Vm), IIC_VMOVQ, 5393 OpcodeStr, Dt, "$Vd, $Vm", "", 5394 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>; 5395 5396 def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 5397 def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 5398 def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 5399 def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 5400 5401 def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 5402 def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 5403 def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 5404 def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; 5405 5406 // VREV32 : Vector Reverse elements within 32-bit words 5407 5408 class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5409 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 5410 (ins DPR:$Vm), IIC_VMOVD, 5411 OpcodeStr, Dt, "$Vd, $Vm", "", 5412 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>; 5413 class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5414 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 5415 (ins QPR:$Vm), IIC_VMOVQ, 5416 OpcodeStr, Dt, "$Vd, $Vm", "", 5417 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>; 5418 5419 def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 5420 def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 5421 5422 def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 5423 def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 5424 5425 // VREV16 : Vector Reverse elements within 16-bit halfwords 5426 5427 class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5428 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 5429 (ins DPR:$Vm), IIC_VMOVD, 5430 OpcodeStr, Dt, "$Vd, $Vm", "", 5431 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>; 5432 class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5433 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 5434 (ins QPR:$Vm), IIC_VMOVQ, 5435 OpcodeStr, Dt, "$Vd, $Vm", "", 5436 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>; 5437 5438 def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 5439 def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 5440 5441 // Other Vector Shuffles. 5442 5443 // Aligned extractions: really just dropping registers 5444 5445 class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 5446 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 5447 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>; 5448 5449 def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 5450 5451 def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 5452 5453 def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 5454 5455 def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 5456 5457 def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 5458 5459 5460 // VEXT : Vector Extract 5461 5462 5463 // All of these have a two-operand InstAlias. 5464 let TwoOperandAliasConstraint = "$Vn = $Vd" in { 5465 class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 5466 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 5467 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 5468 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 5469 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 5470 (Ty DPR:$Vm), imm:$index)))]> { 5471 bits<4> index; 5472 let Inst{11-8} = index{3-0}; 5473 } 5474 5475 class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 5476 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 5477 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 5478 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 5479 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 5480 (Ty QPR:$Vm), imm:$index)))]> { 5481 bits<4> index; 5482 let Inst{11-8} = index{3-0}; 5483 } 5484 } 5485 5486 def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 5487 let Inst{11-8} = index{3-0}; 5488 } 5489 def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 5490 let Inst{11-9} = index{2-0}; 5491 let Inst{8} = 0b0; 5492 } 5493 def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 5494 let Inst{11-10} = index{1-0}; 5495 let Inst{9-8} = 0b00; 5496 } 5497 def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), 5498 (v2f32 DPR:$Vm), 5499 (i32 imm:$index))), 5500 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 5501 5502 def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 5503 let Inst{11-8} = index{3-0}; 5504 } 5505 def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 5506 let Inst{11-9} = index{2-0}; 5507 let Inst{8} = 0b0; 5508 } 5509 def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 5510 let Inst{11-10} = index{1-0}; 5511 let Inst{9-8} = 0b00; 5512 } 5513 def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 5514 let Inst{11} = index{0}; 5515 let Inst{10-8} = 0b000; 5516 } 5517 def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), 5518 (v4f32 QPR:$Vm), 5519 (i32 imm:$index))), 5520 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 5521 5522 // VTRN : Vector Transpose 5523 5524 def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 5525 def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 5526 def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 5527 5528 def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 5529 def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 5530 def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 5531 5532 // VUZP : Vector Unzip (Deinterleave) 5533 5534 def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 5535 def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 5536 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 5537 def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 5538 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 5539 5540 def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 5541 def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 5542 def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 5543 5544 // VZIP : Vector Zip (Interleave) 5545 5546 def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 5547 def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 5548 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 5549 def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 5550 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 5551 5552 def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 5553 def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 5554 def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 5555 5556 // Vector Table Lookup and Table Extension. 5557 5558 // VTBL : Vector Table Lookup 5559 let DecoderMethod = "DecodeTBLInstruction" in { 5560 def VTBL1 5561 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 5562 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 5563 "vtbl", "8", "$Vd, $Vn, $Vm", "", 5564 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 5565 let hasExtraSrcRegAllocReq = 1 in { 5566 def VTBL2 5567 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 5568 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 5569 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5570 def VTBL3 5571 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 5572 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 5573 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5574 def VTBL4 5575 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 5576 (ins VecListFourD:$Vn, DPR:$Vm), 5577 NVTBLFrm, IIC_VTB4, 5578 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5579 } // hasExtraSrcRegAllocReq = 1 5580 5581 def VTBL3Pseudo 5582 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 5583 def VTBL4Pseudo 5584 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 5585 5586 // VTBX : Vector Table Extension 5587 def VTBX1 5588 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 5589 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 5590 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 5591 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 5592 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 5593 let hasExtraSrcRegAllocReq = 1 in { 5594 def VTBX2 5595 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 5596 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 5597 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 5598 def VTBX3 5599 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 5600 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 5601 NVTBLFrm, IIC_VTBX3, 5602 "vtbx", "8", "$Vd, $Vn, $Vm", 5603 "$orig = $Vd", []>; 5604 def VTBX4 5605 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 5606 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 5607 "vtbx", "8", "$Vd, $Vn, $Vm", 5608 "$orig = $Vd", []>; 5609 } // hasExtraSrcRegAllocReq = 1 5610 5611 def VTBX3Pseudo 5612 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 5613 IIC_VTBX3, "$orig = $dst", []>; 5614 def VTBX4Pseudo 5615 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 5616 IIC_VTBX4, "$orig = $dst", []>; 5617 } // DecoderMethod = "DecodeTBLInstruction" 5618 5619 //===----------------------------------------------------------------------===// 5620 // NEON instructions for single-precision FP math 5621 //===----------------------------------------------------------------------===// 5622 5623 class N2VSPat<SDNode OpNode, NeonI Inst> 5624 : NEONFPPat<(f32 (OpNode SPR:$a)), 5625 (EXTRACT_SUBREG 5626 (v2f32 (COPY_TO_REGCLASS (Inst 5627 (INSERT_SUBREG 5628 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5629 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 5630 5631 class N3VSPat<SDNode OpNode, NeonI Inst> 5632 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 5633 (EXTRACT_SUBREG 5634 (v2f32 (COPY_TO_REGCLASS (Inst 5635 (INSERT_SUBREG 5636 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5637 SPR:$a, ssub_0), 5638 (INSERT_SUBREG 5639 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5640 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 5641 5642 class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 5643 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 5644 (EXTRACT_SUBREG 5645 (v2f32 (COPY_TO_REGCLASS (Inst 5646 (INSERT_SUBREG 5647 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5648 SPR:$acc, ssub_0), 5649 (INSERT_SUBREG 5650 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5651 SPR:$a, ssub_0), 5652 (INSERT_SUBREG 5653 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5654 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 5655 5656 def : N3VSPat<fadd, VADDfd>; 5657 def : N3VSPat<fsub, VSUBfd>; 5658 def : N3VSPat<fmul, VMULfd>; 5659 def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 5660 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 5661 def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 5662 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 5663 def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 5664 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 5665 def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 5666 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 5667 def : N2VSPat<fabs, VABSfd>; 5668 def : N2VSPat<fneg, VNEGfd>; 5669 def : N3VSPat<NEONfmax, VMAXfd>; 5670 def : N3VSPat<NEONfmin, VMINfd>; 5671 def : N2VSPat<arm_ftosi, VCVTf2sd>; 5672 def : N2VSPat<arm_ftoui, VCVTf2ud>; 5673 def : N2VSPat<arm_sitof, VCVTs2fd>; 5674 def : N2VSPat<arm_uitof, VCVTu2fd>; 5675 5676 // Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 5677 def : Pat<(f32 (bitconvert GPR:$a)), 5678 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 5679 Requires<[HasNEON, DontUseVMOVSR]>; 5680 5681 //===----------------------------------------------------------------------===// 5682 // Non-Instruction Patterns 5683 //===----------------------------------------------------------------------===// 5684 5685 // bit_convert 5686 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 5687 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 5688 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 5689 def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 5690 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 5691 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 5692 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 5693 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 5694 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 5695 def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 5696 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 5697 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 5698 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 5699 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 5700 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 5701 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 5702 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 5703 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 5704 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 5705 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 5706 def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 5707 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 5708 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 5709 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 5710 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 5711 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 5712 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 5713 def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 5714 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 5715 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 5716 5717 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 5718 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 5719 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 5720 def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 5721 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 5722 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 5723 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 5724 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 5725 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 5726 def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 5727 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 5728 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 5729 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 5730 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 5731 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 5732 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 5733 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 5734 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 5735 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 5736 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 5737 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 5738 def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 5739 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 5740 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 5741 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 5742 def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 5743 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 5744 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 5745 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 5746 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 5747 5748 // Fold extracting an element out of a v2i32 into a vfp register. 5749 def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 5750 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 5751 5752 // Vector lengthening move with load, matching extending loads. 5753 5754 // extload, zextload and sextload for a standard lengthening load. Example: 5755 // Lengthen_Single<"8", "i16", "8"> = 5756 // Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 5757 // (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 5758 // (f64 (IMPLICIT_DEF)), (i32 0)))>; 5759 multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 5760 let AddedComplexity = 10 in { 5761 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5762 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 5763 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 5764 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 5765 5766 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5767 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 5768 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 5769 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 5770 5771 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5772 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 5773 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 5774 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 5775 } 5776 } 5777 5778 // extload, zextload and sextload for a lengthening load which only uses 5779 // half the lanes available. Example: 5780 // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 5781 // Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 5782 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 5783 // (f64 (IMPLICIT_DEF)), (i32 0))), 5784 // dsub_0)>; 5785 multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 5786 string InsnLanes, string InsnTy> { 5787 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5788 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 5789 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 5790 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5791 dsub_0)>; 5792 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5793 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 5794 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 5795 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5796 dsub_0)>; 5797 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5798 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 5799 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 5800 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5801 dsub_0)>; 5802 } 5803 5804 // extload, zextload and sextload for a lengthening load followed by another 5805 // lengthening load, to quadruple the initial length. 5806 // 5807 // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 5808 // Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 5809 // (EXTRACT_SUBREG (VMOVLuv4i32 5810 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 5811 // (f64 (IMPLICIT_DEF)), 5812 // (i32 0))), 5813 // dsub_0)), 5814 // dsub_0)>; 5815 multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 5816 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 5817 string Insn2Ty> { 5818 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5819 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 5820 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 5821 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 5822 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5823 dsub_0))>; 5824 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5825 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 5826 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 5827 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 5828 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5829 dsub_0))>; 5830 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5831 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 5832 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 5833 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 5834 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5835 dsub_0))>; 5836 } 5837 5838 // extload, zextload and sextload for a lengthening load followed by another 5839 // lengthening load, to quadruple the initial length, but which ends up only 5840 // requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 5841 // 5842 // Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 5843 // Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 5844 // (EXTRACT_SUBREG (VMOVLuv4i32 5845 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 5846 // (f64 (IMPLICIT_DEF)), (i32 0))), 5847 // dsub_0)), 5848 // dsub_0)>; 5849 multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 5850 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 5851 string Insn2Ty> { 5852 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5853 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 5854 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 5855 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 5856 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5857 dsub_0)), 5858 dsub_0)>; 5859 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5860 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 5861 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 5862 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 5863 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5864 dsub_0)), 5865 dsub_0)>; 5866 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5867 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 5868 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 5869 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 5870 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5871 dsub_0)), 5872 dsub_0)>; 5873 } 5874 5875 defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 5876 defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 5877 defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 5878 5879 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 5880 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 5881 5882 // Double lengthening - v4i8 -> v4i16 -> v4i32 5883 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 5884 // v2i8 -> v2i16 -> v2i32 5885 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 5886 // v2i16 -> v2i32 -> v2i64 5887 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 5888 5889 // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 5890 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 5891 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 5892 (VLD1LNd16 addrmode6:$addr, 5893 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 5894 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 5895 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 5896 (VLD1LNd16 addrmode6:$addr, 5897 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 5898 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 5899 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 5900 (VLD1LNd16 addrmode6:$addr, 5901 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 5902 5903 //===----------------------------------------------------------------------===// 5904 // Assembler aliases 5905 // 5906 5907 def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 5908 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 5909 def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 5910 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 5911 5912 // VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 5913 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 5914 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 5915 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 5916 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 5917 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 5918 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 5919 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 5920 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 5921 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 5922 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 5923 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 5924 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 5925 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 5926 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 5927 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 5928 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 5929 // ... two-operand aliases 5930 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 5931 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5932 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 5933 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5934 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 5935 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5936 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 5937 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5938 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 5939 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5940 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 5941 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5942 5943 // VLD1 single-lane pseudo-instructions. These need special handling for 5944 // the lane index that an InstAlias can't handle, so we use these instead. 5945 def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 5946 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 5947 def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 5948 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 5949 def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 5950 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 5951 5952 def VLD1LNdWB_fixed_Asm_8 : 5953 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 5954 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 5955 def VLD1LNdWB_fixed_Asm_16 : 5956 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 5957 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 5958 def VLD1LNdWB_fixed_Asm_32 : 5959 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 5960 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 5961 def VLD1LNdWB_register_Asm_8 : 5962 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 5963 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 5964 rGPR:$Rm, pred:$p)>; 5965 def VLD1LNdWB_register_Asm_16 : 5966 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 5967 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, 5968 rGPR:$Rm, pred:$p)>; 5969 def VLD1LNdWB_register_Asm_32 : 5970 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 5971 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, 5972 rGPR:$Rm, pred:$p)>; 5973 5974 5975 // VST1 single-lane pseudo-instructions. These need special handling for 5976 // the lane index that an InstAlias can't handle, so we use these instead. 5977 def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 5978 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 5979 def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 5980 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 5981 def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 5982 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 5983 5984 def VST1LNdWB_fixed_Asm_8 : 5985 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 5986 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 5987 def VST1LNdWB_fixed_Asm_16 : 5988 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 5989 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 5990 def VST1LNdWB_fixed_Asm_32 : 5991 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 5992 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 5993 def VST1LNdWB_register_Asm_8 : 5994 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 5995 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 5996 rGPR:$Rm, pred:$p)>; 5997 def VST1LNdWB_register_Asm_16 : 5998 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 5999 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, 6000 rGPR:$Rm, pred:$p)>; 6001 def VST1LNdWB_register_Asm_32 : 6002 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 6003 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, 6004 rGPR:$Rm, pred:$p)>; 6005 6006 // VLD2 single-lane pseudo-instructions. These need special handling for 6007 // the lane index that an InstAlias can't handle, so we use these instead. 6008 def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 6009 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6010 def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 6011 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6012 def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 6013 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6014 def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 6015 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6016 def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 6017 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6018 6019 def VLD2LNdWB_fixed_Asm_8 : 6020 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 6021 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6022 def VLD2LNdWB_fixed_Asm_16 : 6023 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 6024 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6025 def VLD2LNdWB_fixed_Asm_32 : 6026 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 6027 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6028 def VLD2LNqWB_fixed_Asm_16 : 6029 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 6030 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6031 def VLD2LNqWB_fixed_Asm_32 : 6032 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 6033 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6034 def VLD2LNdWB_register_Asm_8 : 6035 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 6036 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, 6037 rGPR:$Rm, pred:$p)>; 6038 def VLD2LNdWB_register_Asm_16 : 6039 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 6040 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, 6041 rGPR:$Rm, pred:$p)>; 6042 def VLD2LNdWB_register_Asm_32 : 6043 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 6044 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, 6045 rGPR:$Rm, pred:$p)>; 6046 def VLD2LNqWB_register_Asm_16 : 6047 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 6048 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, 6049 rGPR:$Rm, pred:$p)>; 6050 def VLD2LNqWB_register_Asm_32 : 6051 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 6052 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, 6053 rGPR:$Rm, pred:$p)>; 6054 6055 6056 // VST2 single-lane pseudo-instructions. These need special handling for 6057 // the lane index that an InstAlias can't handle, so we use these instead. 6058 def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 6059 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6060 def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 6061 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6062 def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 6063 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6064 def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 6065 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6066 def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 6067 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6068 6069 def VST2LNdWB_fixed_Asm_8 : 6070 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 6071 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6072 def VST2LNdWB_fixed_Asm_16 : 6073 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 6074 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6075 def VST2LNdWB_fixed_Asm_32 : 6076 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 6077 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6078 def VST2LNqWB_fixed_Asm_16 : 6079 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 6080 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6081 def VST2LNqWB_fixed_Asm_32 : 6082 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 6083 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6084 def VST2LNdWB_register_Asm_8 : 6085 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 6086 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, 6087 rGPR:$Rm, pred:$p)>; 6088 def VST2LNdWB_register_Asm_16 : 6089 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 6090 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, 6091 rGPR:$Rm, pred:$p)>; 6092 def VST2LNdWB_register_Asm_32 : 6093 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 6094 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, 6095 rGPR:$Rm, pred:$p)>; 6096 def VST2LNqWB_register_Asm_16 : 6097 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 6098 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, 6099 rGPR:$Rm, pred:$p)>; 6100 def VST2LNqWB_register_Asm_32 : 6101 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 6102 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, 6103 rGPR:$Rm, pred:$p)>; 6104 6105 // VLD3 all-lanes pseudo-instructions. These need special handling for 6106 // the lane index that an InstAlias can't handle, so we use these instead. 6107 def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6108 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6109 def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6110 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6111 def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6112 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6113 def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6114 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6115 def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6116 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6117 def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6118 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6119 6120 def VLD3DUPdWB_fixed_Asm_8 : 6121 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6122 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6123 def VLD3DUPdWB_fixed_Asm_16 : 6124 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6125 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6126 def VLD3DUPdWB_fixed_Asm_32 : 6127 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6128 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6129 def VLD3DUPqWB_fixed_Asm_8 : 6130 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6131 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6132 def VLD3DUPqWB_fixed_Asm_16 : 6133 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6134 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6135 def VLD3DUPqWB_fixed_Asm_32 : 6136 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6137 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6138 def VLD3DUPdWB_register_Asm_8 : 6139 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6140 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6141 rGPR:$Rm, pred:$p)>; 6142 def VLD3DUPdWB_register_Asm_16 : 6143 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6144 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6145 rGPR:$Rm, pred:$p)>; 6146 def VLD3DUPdWB_register_Asm_32 : 6147 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6148 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6149 rGPR:$Rm, pred:$p)>; 6150 def VLD3DUPqWB_register_Asm_8 : 6151 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6152 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6153 rGPR:$Rm, pred:$p)>; 6154 def VLD3DUPqWB_register_Asm_16 : 6155 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6156 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6157 rGPR:$Rm, pred:$p)>; 6158 def VLD3DUPqWB_register_Asm_32 : 6159 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6160 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6161 rGPR:$Rm, pred:$p)>; 6162 6163 6164 // VLD3 single-lane pseudo-instructions. These need special handling for 6165 // the lane index that an InstAlias can't handle, so we use these instead. 6166 def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6167 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6168 def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6169 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6170 def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6171 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6172 def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6173 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6174 def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6175 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6176 6177 def VLD3LNdWB_fixed_Asm_8 : 6178 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6179 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6180 def VLD3LNdWB_fixed_Asm_16 : 6181 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6182 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6183 def VLD3LNdWB_fixed_Asm_32 : 6184 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6185 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6186 def VLD3LNqWB_fixed_Asm_16 : 6187 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6188 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6189 def VLD3LNqWB_fixed_Asm_32 : 6190 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6191 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6192 def VLD3LNdWB_register_Asm_8 : 6193 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6194 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, 6195 rGPR:$Rm, pred:$p)>; 6196 def VLD3LNdWB_register_Asm_16 : 6197 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6198 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, 6199 rGPR:$Rm, pred:$p)>; 6200 def VLD3LNdWB_register_Asm_32 : 6201 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6202 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, 6203 rGPR:$Rm, pred:$p)>; 6204 def VLD3LNqWB_register_Asm_16 : 6205 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6206 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, 6207 rGPR:$Rm, pred:$p)>; 6208 def VLD3LNqWB_register_Asm_32 : 6209 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6210 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, 6211 rGPR:$Rm, pred:$p)>; 6212 6213 // VLD3 multiple structure pseudo-instructions. These need special handling for 6214 // the vector operands that the normal instructions don't yet model. 6215 // FIXME: Remove these when the register classes and instructions are updated. 6216 def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6217 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6218 def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6219 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6220 def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6221 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6222 def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6223 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6224 def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6225 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6226 def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6227 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6228 6229 def VLD3dWB_fixed_Asm_8 : 6230 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6231 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6232 def VLD3dWB_fixed_Asm_16 : 6233 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6234 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6235 def VLD3dWB_fixed_Asm_32 : 6236 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6237 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6238 def VLD3qWB_fixed_Asm_8 : 6239 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6240 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6241 def VLD3qWB_fixed_Asm_16 : 6242 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6243 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6244 def VLD3qWB_fixed_Asm_32 : 6245 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6246 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6247 def VLD3dWB_register_Asm_8 : 6248 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6249 (ins VecListThreeD:$list, addrmode6:$addr, 6250 rGPR:$Rm, pred:$p)>; 6251 def VLD3dWB_register_Asm_16 : 6252 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6253 (ins VecListThreeD:$list, addrmode6:$addr, 6254 rGPR:$Rm, pred:$p)>; 6255 def VLD3dWB_register_Asm_32 : 6256 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6257 (ins VecListThreeD:$list, addrmode6:$addr, 6258 rGPR:$Rm, pred:$p)>; 6259 def VLD3qWB_register_Asm_8 : 6260 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6261 (ins VecListThreeQ:$list, addrmode6:$addr, 6262 rGPR:$Rm, pred:$p)>; 6263 def VLD3qWB_register_Asm_16 : 6264 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6265 (ins VecListThreeQ:$list, addrmode6:$addr, 6266 rGPR:$Rm, pred:$p)>; 6267 def VLD3qWB_register_Asm_32 : 6268 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6269 (ins VecListThreeQ:$list, addrmode6:$addr, 6270 rGPR:$Rm, pred:$p)>; 6271 6272 // VST3 single-lane pseudo-instructions. These need special handling for 6273 // the lane index that an InstAlias can't handle, so we use these instead. 6274 def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6275 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6276 def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6277 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6278 def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6279 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6280 def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6281 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6282 def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6283 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6284 6285 def VST3LNdWB_fixed_Asm_8 : 6286 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6287 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6288 def VST3LNdWB_fixed_Asm_16 : 6289 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6290 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6291 def VST3LNdWB_fixed_Asm_32 : 6292 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6293 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6294 def VST3LNqWB_fixed_Asm_16 : 6295 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6296 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6297 def VST3LNqWB_fixed_Asm_32 : 6298 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6299 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6300 def VST3LNdWB_register_Asm_8 : 6301 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6302 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, 6303 rGPR:$Rm, pred:$p)>; 6304 def VST3LNdWB_register_Asm_16 : 6305 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6306 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, 6307 rGPR:$Rm, pred:$p)>; 6308 def VST3LNdWB_register_Asm_32 : 6309 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6310 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, 6311 rGPR:$Rm, pred:$p)>; 6312 def VST3LNqWB_register_Asm_16 : 6313 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6314 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, 6315 rGPR:$Rm, pred:$p)>; 6316 def VST3LNqWB_register_Asm_32 : 6317 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6318 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, 6319 rGPR:$Rm, pred:$p)>; 6320 6321 6322 // VST3 multiple structure pseudo-instructions. These need special handling for 6323 // the vector operands that the normal instructions don't yet model. 6324 // FIXME: Remove these when the register classes and instructions are updated. 6325 def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6326 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6327 def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6328 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6329 def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6330 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6331 def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6332 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6333 def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6334 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6335 def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6336 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6337 6338 def VST3dWB_fixed_Asm_8 : 6339 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6340 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6341 def VST3dWB_fixed_Asm_16 : 6342 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6343 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6344 def VST3dWB_fixed_Asm_32 : 6345 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6346 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6347 def VST3qWB_fixed_Asm_8 : 6348 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6349 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6350 def VST3qWB_fixed_Asm_16 : 6351 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6352 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6353 def VST3qWB_fixed_Asm_32 : 6354 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6355 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6356 def VST3dWB_register_Asm_8 : 6357 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6358 (ins VecListThreeD:$list, addrmode6:$addr, 6359 rGPR:$Rm, pred:$p)>; 6360 def VST3dWB_register_Asm_16 : 6361 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6362 (ins VecListThreeD:$list, addrmode6:$addr, 6363 rGPR:$Rm, pred:$p)>; 6364 def VST3dWB_register_Asm_32 : 6365 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6366 (ins VecListThreeD:$list, addrmode6:$addr, 6367 rGPR:$Rm, pred:$p)>; 6368 def VST3qWB_register_Asm_8 : 6369 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6370 (ins VecListThreeQ:$list, addrmode6:$addr, 6371 rGPR:$Rm, pred:$p)>; 6372 def VST3qWB_register_Asm_16 : 6373 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6374 (ins VecListThreeQ:$list, addrmode6:$addr, 6375 rGPR:$Rm, pred:$p)>; 6376 def VST3qWB_register_Asm_32 : 6377 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6378 (ins VecListThreeQ:$list, addrmode6:$addr, 6379 rGPR:$Rm, pred:$p)>; 6380 6381 // VLD4 all-lanes pseudo-instructions. These need special handling for 6382 // the lane index that an InstAlias can't handle, so we use these instead. 6383 def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6384 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6385 def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6386 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6387 def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6388 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6389 def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6390 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6391 def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6392 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6393 def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6394 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6395 6396 def VLD4DUPdWB_fixed_Asm_8 : 6397 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6398 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6399 def VLD4DUPdWB_fixed_Asm_16 : 6400 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6401 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6402 def VLD4DUPdWB_fixed_Asm_32 : 6403 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6404 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6405 def VLD4DUPqWB_fixed_Asm_8 : 6406 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6407 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6408 def VLD4DUPqWB_fixed_Asm_16 : 6409 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6410 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6411 def VLD4DUPqWB_fixed_Asm_32 : 6412 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6413 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6414 def VLD4DUPdWB_register_Asm_8 : 6415 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6416 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6417 rGPR:$Rm, pred:$p)>; 6418 def VLD4DUPdWB_register_Asm_16 : 6419 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6420 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6421 rGPR:$Rm, pred:$p)>; 6422 def VLD4DUPdWB_register_Asm_32 : 6423 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6424 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6425 rGPR:$Rm, pred:$p)>; 6426 def VLD4DUPqWB_register_Asm_8 : 6427 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6428 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6429 rGPR:$Rm, pred:$p)>; 6430 def VLD4DUPqWB_register_Asm_16 : 6431 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6432 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6433 rGPR:$Rm, pred:$p)>; 6434 def VLD4DUPqWB_register_Asm_32 : 6435 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6436 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6437 rGPR:$Rm, pred:$p)>; 6438 6439 6440 // VLD4 single-lane pseudo-instructions. These need special handling for 6441 // the lane index that an InstAlias can't handle, so we use these instead. 6442 def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6443 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6444 def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6445 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6446 def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6447 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6448 def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6449 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6450 def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6451 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6452 6453 def VLD4LNdWB_fixed_Asm_8 : 6454 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6455 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6456 def VLD4LNdWB_fixed_Asm_16 : 6457 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6458 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6459 def VLD4LNdWB_fixed_Asm_32 : 6460 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6461 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6462 def VLD4LNqWB_fixed_Asm_16 : 6463 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6464 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6465 def VLD4LNqWB_fixed_Asm_32 : 6466 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6467 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6468 def VLD4LNdWB_register_Asm_8 : 6469 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6470 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, 6471 rGPR:$Rm, pred:$p)>; 6472 def VLD4LNdWB_register_Asm_16 : 6473 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6474 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, 6475 rGPR:$Rm, pred:$p)>; 6476 def VLD4LNdWB_register_Asm_32 : 6477 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6478 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, 6479 rGPR:$Rm, pred:$p)>; 6480 def VLD4LNqWB_register_Asm_16 : 6481 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6482 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, 6483 rGPR:$Rm, pred:$p)>; 6484 def VLD4LNqWB_register_Asm_32 : 6485 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6486 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, 6487 rGPR:$Rm, pred:$p)>; 6488 6489 6490 6491 // VLD4 multiple structure pseudo-instructions. These need special handling for 6492 // the vector operands that the normal instructions don't yet model. 6493 // FIXME: Remove these when the register classes and instructions are updated. 6494 def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6495 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6496 def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6497 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6498 def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6499 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6500 def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6501 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6502 def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6503 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6504 def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6505 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6506 6507 def VLD4dWB_fixed_Asm_8 : 6508 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6509 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6510 def VLD4dWB_fixed_Asm_16 : 6511 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6512 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6513 def VLD4dWB_fixed_Asm_32 : 6514 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6515 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6516 def VLD4qWB_fixed_Asm_8 : 6517 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6518 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6519 def VLD4qWB_fixed_Asm_16 : 6520 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6521 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6522 def VLD4qWB_fixed_Asm_32 : 6523 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6524 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6525 def VLD4dWB_register_Asm_8 : 6526 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6527 (ins VecListFourD:$list, addrmode6:$addr, 6528 rGPR:$Rm, pred:$p)>; 6529 def VLD4dWB_register_Asm_16 : 6530 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6531 (ins VecListFourD:$list, addrmode6:$addr, 6532 rGPR:$Rm, pred:$p)>; 6533 def VLD4dWB_register_Asm_32 : 6534 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6535 (ins VecListFourD:$list, addrmode6:$addr, 6536 rGPR:$Rm, pred:$p)>; 6537 def VLD4qWB_register_Asm_8 : 6538 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6539 (ins VecListFourQ:$list, addrmode6:$addr, 6540 rGPR:$Rm, pred:$p)>; 6541 def VLD4qWB_register_Asm_16 : 6542 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6543 (ins VecListFourQ:$list, addrmode6:$addr, 6544 rGPR:$Rm, pred:$p)>; 6545 def VLD4qWB_register_Asm_32 : 6546 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6547 (ins VecListFourQ:$list, addrmode6:$addr, 6548 rGPR:$Rm, pred:$p)>; 6549 6550 // VST4 single-lane pseudo-instructions. These need special handling for 6551 // the lane index that an InstAlias can't handle, so we use these instead. 6552 def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6553 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6554 def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6555 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6556 def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6557 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6558 def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6559 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6560 def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6561 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6562 6563 def VST4LNdWB_fixed_Asm_8 : 6564 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6565 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6566 def VST4LNdWB_fixed_Asm_16 : 6567 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6568 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6569 def VST4LNdWB_fixed_Asm_32 : 6570 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6571 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6572 def VST4LNqWB_fixed_Asm_16 : 6573 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6574 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6575 def VST4LNqWB_fixed_Asm_32 : 6576 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6577 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6578 def VST4LNdWB_register_Asm_8 : 6579 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6580 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, 6581 rGPR:$Rm, pred:$p)>; 6582 def VST4LNdWB_register_Asm_16 : 6583 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6584 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, 6585 rGPR:$Rm, pred:$p)>; 6586 def VST4LNdWB_register_Asm_32 : 6587 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6588 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, 6589 rGPR:$Rm, pred:$p)>; 6590 def VST4LNqWB_register_Asm_16 : 6591 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6592 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, 6593 rGPR:$Rm, pred:$p)>; 6594 def VST4LNqWB_register_Asm_32 : 6595 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6596 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, 6597 rGPR:$Rm, pred:$p)>; 6598 6599 6600 // VST4 multiple structure pseudo-instructions. These need special handling for 6601 // the vector operands that the normal instructions don't yet model. 6602 // FIXME: Remove these when the register classes and instructions are updated. 6603 def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6604 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6605 def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6606 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6607 def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6608 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6609 def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6610 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6611 def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6612 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6613 def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6614 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6615 6616 def VST4dWB_fixed_Asm_8 : 6617 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6618 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6619 def VST4dWB_fixed_Asm_16 : 6620 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6621 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6622 def VST4dWB_fixed_Asm_32 : 6623 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6624 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6625 def VST4qWB_fixed_Asm_8 : 6626 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6627 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6628 def VST4qWB_fixed_Asm_16 : 6629 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6630 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6631 def VST4qWB_fixed_Asm_32 : 6632 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6633 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6634 def VST4dWB_register_Asm_8 : 6635 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6636 (ins VecListFourD:$list, addrmode6:$addr, 6637 rGPR:$Rm, pred:$p)>; 6638 def VST4dWB_register_Asm_16 : 6639 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6640 (ins VecListFourD:$list, addrmode6:$addr, 6641 rGPR:$Rm, pred:$p)>; 6642 def VST4dWB_register_Asm_32 : 6643 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6644 (ins VecListFourD:$list, addrmode6:$addr, 6645 rGPR:$Rm, pred:$p)>; 6646 def VST4qWB_register_Asm_8 : 6647 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6648 (ins VecListFourQ:$list, addrmode6:$addr, 6649 rGPR:$Rm, pred:$p)>; 6650 def VST4qWB_register_Asm_16 : 6651 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6652 (ins VecListFourQ:$list, addrmode6:$addr, 6653 rGPR:$Rm, pred:$p)>; 6654 def VST4qWB_register_Asm_32 : 6655 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6656 (ins VecListFourQ:$list, addrmode6:$addr, 6657 rGPR:$Rm, pred:$p)>; 6658 6659 // VMOV takes an optional datatype suffix 6660 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 6661 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 6662 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 6663 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 6664 6665 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 6666 // D-register versions. 6667 def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 6668 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6669 def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 6670 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6671 def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 6672 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6673 def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 6674 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6675 def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 6676 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6677 def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 6678 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6679 def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 6680 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6681 // Q-register versions. 6682 def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 6683 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6684 def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 6685 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6686 def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 6687 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6688 def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 6689 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6690 def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 6691 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6692 def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 6693 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6694 def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 6695 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6696 6697 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 6698 // D-register versions. 6699 def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 6700 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6701 def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 6702 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6703 def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 6704 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6705 def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 6706 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6707 def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 6708 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6709 def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 6710 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6711 def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 6712 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6713 // Q-register versions. 6714 def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 6715 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6716 def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 6717 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6718 def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 6719 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6720 def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 6721 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6722 def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 6723 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6724 def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 6725 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6726 def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 6727 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6728 6729 // VSWP allows, but does not require, a type suffix. 6730 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 6731 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 6732 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 6733 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 6734 6735 // VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 6736 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 6737 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6738 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 6739 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6740 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 6741 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6742 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 6743 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6744 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 6745 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6746 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 6747 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6748 6749 // "vmov Rd, #-imm" can be handled via "vmvn". 6750 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 6751 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 6752 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 6753 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 6754 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 6755 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 6756 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 6757 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 6758 6759 // 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 6760 // these should restrict to just the Q register variants, but the register 6761 // classes are enough to match correctly regardless, so we keep it simple 6762 // and just use MnemonicAlias. 6763 def : NEONMnemonicAlias<"vbicq", "vbic">; 6764 def : NEONMnemonicAlias<"vandq", "vand">; 6765 def : NEONMnemonicAlias<"veorq", "veor">; 6766 def : NEONMnemonicAlias<"vorrq", "vorr">; 6767 6768 def : NEONMnemonicAlias<"vmovq", "vmov">; 6769 def : NEONMnemonicAlias<"vmvnq", "vmvn">; 6770 // Explicit versions for floating point so that the FPImm variants get 6771 // handled early. The parser gets confused otherwise. 6772 def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 6773 def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 6774 6775 def : NEONMnemonicAlias<"vaddq", "vadd">; 6776 def : NEONMnemonicAlias<"vsubq", "vsub">; 6777 6778 def : NEONMnemonicAlias<"vminq", "vmin">; 6779 def : NEONMnemonicAlias<"vmaxq", "vmax">; 6780 6781 def : NEONMnemonicAlias<"vmulq", "vmul">; 6782 6783 def : NEONMnemonicAlias<"vabsq", "vabs">; 6784 6785 def : NEONMnemonicAlias<"vshlq", "vshl">; 6786 def : NEONMnemonicAlias<"vshrq", "vshr">; 6787 6788 def : NEONMnemonicAlias<"vcvtq", "vcvt">; 6789 6790 def : NEONMnemonicAlias<"vcleq", "vcle">; 6791 def : NEONMnemonicAlias<"vceqq", "vceq">; 6792 6793 def : NEONMnemonicAlias<"vzipq", "vzip">; 6794 def : NEONMnemonicAlias<"vswpq", "vswp">; 6795 6796 def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 6797 def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 6798 6799 6800 // Alias for loading floating point immediates that aren't representable 6801 // using the vmov.f32 encoding but the bitpattern is representable using 6802 // the .i32 encoding. 6803 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 6804 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 6805 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 6806 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 6807