1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the SystemZTargetLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "SystemZISelLowering.h" 15 #include "SystemZCallingConv.h" 16 #include "SystemZConstantPoolValue.h" 17 #include "SystemZMachineFunctionInfo.h" 18 #include "SystemZTargetMachine.h" 19 #include "llvm/CodeGen/CallingConvLower.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 23 #include "llvm/IR/Intrinsics.h" 24 #include <cctype> 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "systemz-lower" 29 30 namespace { 31 // Represents a sequence for extracting a 0/1 value from an IPM result: 32 // (((X ^ XORValue) + AddValue) >> Bit) 33 struct IPMConversion { 34 IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit) 35 : XORValue(xorValue), AddValue(addValue), Bit(bit) {} 36 37 int64_t XORValue; 38 int64_t AddValue; 39 unsigned Bit; 40 }; 41 42 // Represents information about a comparison. 43 struct Comparison { 44 Comparison(SDValue Op0In, SDValue Op1In) 45 : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} 46 47 // The operands to the comparison. 48 SDValue Op0, Op1; 49 50 // The opcode that should be used to compare Op0 and Op1. 51 unsigned Opcode; 52 53 // A SystemZICMP value. Only used for integer comparisons. 54 unsigned ICmpType; 55 56 // The mask of CC values that Opcode can produce. 57 unsigned CCValid; 58 59 // The mask of CC values for which the original condition is true. 60 unsigned CCMask; 61 }; 62 } // end anonymous namespace 63 64 // Classify VT as either 32 or 64 bit. 65 static bool is32Bit(EVT VT) { 66 switch (VT.getSimpleVT().SimpleTy) { 67 case MVT::i32: 68 return true; 69 case MVT::i64: 70 return false; 71 default: 72 llvm_unreachable("Unsupported type"); 73 } 74 } 75 76 // Return a version of MachineOperand that can be safely used before the 77 // final use. 78 static MachineOperand earlyUseOperand(MachineOperand Op) { 79 if (Op.isReg()) 80 Op.setIsKill(false); 81 return Op; 82 } 83 84 SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, 85 const SystemZSubtarget &STI) 86 : TargetLowering(TM), Subtarget(STI) { 87 MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize()); 88 89 // Set up the register classes. 90 if (Subtarget.hasHighWord()) 91 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass); 92 else 93 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); 94 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); 95 if (Subtarget.hasVector()) { 96 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); 97 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); 98 } else { 99 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); 100 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); 101 } 102 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); 103 104 if (Subtarget.hasVector()) { 105 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); 106 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass); 107 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass); 108 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass); 109 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass); 110 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass); 111 } 112 113 // Compute derived properties from the register classes 114 computeRegisterProperties(Subtarget.getRegisterInfo()); 115 116 // Set up special registers. 117 setStackPointerRegisterToSaveRestore(SystemZ::R15D); 118 119 // TODO: It may be better to default to latency-oriented scheduling, however 120 // LLVM's current latency-oriented scheduler can't handle physreg definitions 121 // such as SystemZ has with CC, so set this to the register-pressure 122 // scheduler, because it can. 123 setSchedulingPreference(Sched::RegPressure); 124 125 setBooleanContents(ZeroOrOneBooleanContent); 126 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 127 128 // Instructions are strings of 2-byte aligned 2-byte values. 129 setMinFunctionAlignment(2); 130 131 // Handle operations that are handled in a similar way for all types. 132 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; 133 I <= MVT::LAST_FP_VALUETYPE; 134 ++I) { 135 MVT VT = MVT::SimpleValueType(I); 136 if (isTypeLegal(VT)) { 137 // Lower SET_CC into an IPM-based sequence. 138 setOperationAction(ISD::SETCC, VT, Custom); 139 140 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). 141 setOperationAction(ISD::SELECT, VT, Expand); 142 143 // Lower SELECT_CC and BR_CC into separate comparisons and branches. 144 setOperationAction(ISD::SELECT_CC, VT, Custom); 145 setOperationAction(ISD::BR_CC, VT, Custom); 146 } 147 } 148 149 // Expand jump table branches as address arithmetic followed by an 150 // indirect jump. 151 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 152 153 // Expand BRCOND into a BR_CC (see above). 154 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 155 156 // Handle integer types. 157 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; 158 I <= MVT::LAST_INTEGER_VALUETYPE; 159 ++I) { 160 MVT VT = MVT::SimpleValueType(I); 161 if (isTypeLegal(VT)) { 162 // Expand individual DIV and REMs into DIVREMs. 163 setOperationAction(ISD::SDIV, VT, Expand); 164 setOperationAction(ISD::UDIV, VT, Expand); 165 setOperationAction(ISD::SREM, VT, Expand); 166 setOperationAction(ISD::UREM, VT, Expand); 167 setOperationAction(ISD::SDIVREM, VT, Custom); 168 setOperationAction(ISD::UDIVREM, VT, Custom); 169 170 // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and 171 // stores, putting a serialization instruction after the stores. 172 setOperationAction(ISD::ATOMIC_LOAD, VT, Custom); 173 setOperationAction(ISD::ATOMIC_STORE, VT, Custom); 174 175 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are 176 // available, or if the operand is constant. 177 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); 178 179 // Use POPCNT on z196 and above. 180 if (Subtarget.hasPopulationCount()) 181 setOperationAction(ISD::CTPOP, VT, Custom); 182 else 183 setOperationAction(ISD::CTPOP, VT, Expand); 184 185 // No special instructions for these. 186 setOperationAction(ISD::CTTZ, VT, Expand); 187 setOperationAction(ISD::ROTR, VT, Expand); 188 189 // Use *MUL_LOHI where possible instead of MULH*. 190 setOperationAction(ISD::MULHS, VT, Expand); 191 setOperationAction(ISD::MULHU, VT, Expand); 192 setOperationAction(ISD::SMUL_LOHI, VT, Custom); 193 setOperationAction(ISD::UMUL_LOHI, VT, Custom); 194 195 // Only z196 and above have native support for conversions to unsigned. 196 if (!Subtarget.hasFPExtension()) 197 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 198 } 199 } 200 201 // Type legalization will convert 8- and 16-bit atomic operations into 202 // forms that operate on i32s (but still keeping the original memory VT). 203 // Lower them into full i32 operations. 204 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom); 205 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom); 206 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); 207 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); 208 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom); 209 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom); 210 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom); 211 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom); 212 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); 213 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); 214 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); 215 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); 216 217 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 218 219 // Traps are legal, as we will convert them to "j .+2". 220 setOperationAction(ISD::TRAP, MVT::Other, Legal); 221 222 // z10 has instructions for signed but not unsigned FP conversion. 223 // Handle unsigned 32-bit types as signed 64-bit types. 224 if (!Subtarget.hasFPExtension()) { 225 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); 226 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 227 } 228 229 // We have native support for a 64-bit CTLZ, via FLOGR. 230 setOperationAction(ISD::CTLZ, MVT::i32, Promote); 231 setOperationAction(ISD::CTLZ, MVT::i64, Legal); 232 233 // Give LowerOperation the chance to replace 64-bit ORs with subregs. 234 setOperationAction(ISD::OR, MVT::i64, Custom); 235 236 // FIXME: Can we support these natively? 237 setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); 238 setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); 239 setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); 240 241 // We have native instructions for i8, i16 and i32 extensions, but not i1. 242 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 243 for (MVT VT : MVT::integer_valuetypes()) { 244 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 245 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 246 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); 247 } 248 249 // Handle the various types of symbolic address. 250 setOperationAction(ISD::ConstantPool, PtrVT, Custom); 251 setOperationAction(ISD::GlobalAddress, PtrVT, Custom); 252 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); 253 setOperationAction(ISD::BlockAddress, PtrVT, Custom); 254 setOperationAction(ISD::JumpTable, PtrVT, Custom); 255 256 // We need to handle dynamic allocations specially because of the 257 // 160-byte area at the bottom of the stack. 258 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); 259 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom); 260 261 // Use custom expanders so that we can force the function to use 262 // a frame pointer. 263 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); 264 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); 265 266 // Handle prefetches with PFD or PFDRL. 267 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 268 269 for (MVT VT : MVT::vector_valuetypes()) { 270 // Assume by default that all vector operations need to be expanded. 271 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode) 272 if (getOperationAction(Opcode, VT) == Legal) 273 setOperationAction(Opcode, VT, Expand); 274 275 // Likewise all truncating stores and extending loads. 276 for (MVT InnerVT : MVT::vector_valuetypes()) { 277 setTruncStoreAction(VT, InnerVT, Expand); 278 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 279 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 280 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 281 } 282 283 if (isTypeLegal(VT)) { 284 // These operations are legal for anything that can be stored in a 285 // vector register, even if there is no native support for the format 286 // as such. In particular, we can do these for v4f32 even though there 287 // are no specific instructions for that format. 288 setOperationAction(ISD::LOAD, VT, Legal); 289 setOperationAction(ISD::STORE, VT, Legal); 290 setOperationAction(ISD::VSELECT, VT, Legal); 291 setOperationAction(ISD::BITCAST, VT, Legal); 292 setOperationAction(ISD::UNDEF, VT, Legal); 293 294 // Likewise, except that we need to replace the nodes with something 295 // more specific. 296 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 297 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 298 } 299 } 300 301 // Handle integer vector types. 302 for (MVT VT : MVT::integer_vector_valuetypes()) { 303 if (isTypeLegal(VT)) { 304 // These operations have direct equivalents. 305 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); 306 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); 307 setOperationAction(ISD::ADD, VT, Legal); 308 setOperationAction(ISD::SUB, VT, Legal); 309 if (VT != MVT::v2i64) 310 setOperationAction(ISD::MUL, VT, Legal); 311 setOperationAction(ISD::AND, VT, Legal); 312 setOperationAction(ISD::OR, VT, Legal); 313 setOperationAction(ISD::XOR, VT, Legal); 314 setOperationAction(ISD::CTPOP, VT, Custom); 315 setOperationAction(ISD::CTTZ, VT, Legal); 316 setOperationAction(ISD::CTLZ, VT, Legal); 317 318 // Convert a GPR scalar to a vector by inserting it into element 0. 319 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 320 321 // Use a series of unpacks for extensions. 322 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); 323 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); 324 325 // Detect shifts by a scalar amount and convert them into 326 // V*_BY_SCALAR. 327 setOperationAction(ISD::SHL, VT, Custom); 328 setOperationAction(ISD::SRA, VT, Custom); 329 setOperationAction(ISD::SRL, VT, Custom); 330 331 // At present ROTL isn't matched by DAGCombiner. ROTR should be 332 // converted into ROTL. 333 setOperationAction(ISD::ROTL, VT, Expand); 334 setOperationAction(ISD::ROTR, VT, Expand); 335 336 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands 337 // and inverting the result as necessary. 338 setOperationAction(ISD::SETCC, VT, Custom); 339 } 340 } 341 342 if (Subtarget.hasVector()) { 343 // There should be no need to check for float types other than v2f64 344 // since <2 x f32> isn't a legal type. 345 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 346 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 347 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 348 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 349 } 350 351 // Handle floating-point types. 352 for (unsigned I = MVT::FIRST_FP_VALUETYPE; 353 I <= MVT::LAST_FP_VALUETYPE; 354 ++I) { 355 MVT VT = MVT::SimpleValueType(I); 356 if (isTypeLegal(VT)) { 357 // We can use FI for FRINT. 358 setOperationAction(ISD::FRINT, VT, Legal); 359 360 // We can use the extended form of FI for other rounding operations. 361 if (Subtarget.hasFPExtension()) { 362 setOperationAction(ISD::FNEARBYINT, VT, Legal); 363 setOperationAction(ISD::FFLOOR, VT, Legal); 364 setOperationAction(ISD::FCEIL, VT, Legal); 365 setOperationAction(ISD::FTRUNC, VT, Legal); 366 setOperationAction(ISD::FROUND, VT, Legal); 367 } 368 369 // No special instructions for these. 370 setOperationAction(ISD::FSIN, VT, Expand); 371 setOperationAction(ISD::FCOS, VT, Expand); 372 setOperationAction(ISD::FSINCOS, VT, Expand); 373 setOperationAction(ISD::FREM, VT, Expand); 374 setOperationAction(ISD::FPOW, VT, Expand); 375 } 376 } 377 378 // Handle floating-point vector types. 379 if (Subtarget.hasVector()) { 380 // Scalar-to-vector conversion is just a subreg. 381 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 382 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 383 384 // Some insertions and extractions can be done directly but others 385 // need to go via integers. 386 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 387 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); 388 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 389 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 390 391 // These operations have direct equivalents. 392 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 393 setOperationAction(ISD::FNEG, MVT::v2f64, Legal); 394 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 395 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 396 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 397 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 398 setOperationAction(ISD::FABS, MVT::v2f64, Legal); 399 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 400 setOperationAction(ISD::FRINT, MVT::v2f64, Legal); 401 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 402 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 403 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 404 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 405 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 406 } 407 408 // We have fused multiply-addition for f32 and f64 but not f128. 409 setOperationAction(ISD::FMA, MVT::f32, Legal); 410 setOperationAction(ISD::FMA, MVT::f64, Legal); 411 setOperationAction(ISD::FMA, MVT::f128, Expand); 412 413 // Needed so that we don't try to implement f128 constant loads using 414 // a load-and-extend of a f80 constant (in cases where the constant 415 // would fit in an f80). 416 for (MVT VT : MVT::fp_valuetypes()) 417 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); 418 419 // Floating-point truncation and stores need to be done separately. 420 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 421 setTruncStoreAction(MVT::f128, MVT::f32, Expand); 422 setTruncStoreAction(MVT::f128, MVT::f64, Expand); 423 424 // We have 64-bit FPR<->GPR moves, but need special handling for 425 // 32-bit forms. 426 if (!Subtarget.hasVector()) { 427 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 428 setOperationAction(ISD::BITCAST, MVT::f32, Custom); 429 } 430 431 // VASTART and VACOPY need to deal with the SystemZ-specific varargs 432 // structure, but VAEND is a no-op. 433 setOperationAction(ISD::VASTART, MVT::Other, Custom); 434 setOperationAction(ISD::VACOPY, MVT::Other, Custom); 435 setOperationAction(ISD::VAEND, MVT::Other, Expand); 436 437 // Codes for which we want to perform some z-specific combinations. 438 setTargetDAGCombine(ISD::SIGN_EXTEND); 439 setTargetDAGCombine(ISD::STORE); 440 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); 441 setTargetDAGCombine(ISD::FP_ROUND); 442 setTargetDAGCombine(ISD::BSWAP); 443 setTargetDAGCombine(ISD::SHL); 444 setTargetDAGCombine(ISD::SRA); 445 setTargetDAGCombine(ISD::SRL); 446 setTargetDAGCombine(ISD::ROTL); 447 448 // Handle intrinsics. 449 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 450 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 451 452 // We want to use MVC in preference to even a single load/store pair. 453 MaxStoresPerMemcpy = 0; 454 MaxStoresPerMemcpyOptSize = 0; 455 456 // The main memset sequence is a byte store followed by an MVC. 457 // Two STC or MV..I stores win over that, but the kind of fused stores 458 // generated by target-independent code don't when the byte value is 459 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better 460 // than "STC;MVC". Handle the choice in target-specific code instead. 461 MaxStoresPerMemset = 0; 462 MaxStoresPerMemsetOptSize = 0; 463 } 464 465 EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, 466 LLVMContext &, EVT VT) const { 467 if (!VT.isVector()) 468 return MVT::i32; 469 return VT.changeVectorElementTypeToInteger(); 470 } 471 472 bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 473 VT = VT.getScalarType(); 474 475 if (!VT.isSimple()) 476 return false; 477 478 switch (VT.getSimpleVT().SimpleTy) { 479 case MVT::f32: 480 case MVT::f64: 481 return true; 482 case MVT::f128: 483 return false; 484 default: 485 break; 486 } 487 488 return false; 489 } 490 491 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 492 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. 493 return Imm.isZero() || Imm.isNegZero(); 494 } 495 496 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 497 // We can use CGFI or CLGFI. 498 return isInt<32>(Imm) || isUInt<32>(Imm); 499 } 500 501 bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { 502 // We can use ALGFI or SLGFI. 503 return isUInt<32>(Imm) || isUInt<32>(-Imm); 504 } 505 506 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 507 unsigned, 508 unsigned, 509 bool *Fast) const { 510 // Unaligned accesses should never be slower than the expanded version. 511 // We check specifically for aligned accesses in the few cases where 512 // they are required. 513 if (Fast) 514 *Fast = true; 515 return true; 516 } 517 518 bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, 519 const AddrMode &AM, Type *Ty, 520 unsigned AS) const { 521 // Punt on globals for now, although they can be used in limited 522 // RELATIVE LONG cases. 523 if (AM.BaseGV) 524 return false; 525 526 // Require a 20-bit signed offset. 527 if (!isInt<20>(AM.BaseOffs)) 528 return false; 529 530 // Indexing is OK but no scale factor can be applied. 531 return AM.Scale == 0 || AM.Scale == 1; 532 } 533 534 bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { 535 if (!FromType->isIntegerTy() || !ToType->isIntegerTy()) 536 return false; 537 unsigned FromBits = FromType->getPrimitiveSizeInBits(); 538 unsigned ToBits = ToType->getPrimitiveSizeInBits(); 539 return FromBits > ToBits; 540 } 541 542 bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const { 543 if (!FromVT.isInteger() || !ToVT.isInteger()) 544 return false; 545 unsigned FromBits = FromVT.getSizeInBits(); 546 unsigned ToBits = ToVT.getSizeInBits(); 547 return FromBits > ToBits; 548 } 549 550 //===----------------------------------------------------------------------===// 551 // Inline asm support 552 //===----------------------------------------------------------------------===// 553 554 TargetLowering::ConstraintType 555 SystemZTargetLowering::getConstraintType(StringRef Constraint) const { 556 if (Constraint.size() == 1) { 557 switch (Constraint[0]) { 558 case 'a': // Address register 559 case 'd': // Data register (equivalent to 'r') 560 case 'f': // Floating-point register 561 case 'h': // High-part register 562 case 'r': // General-purpose register 563 return C_RegisterClass; 564 565 case 'Q': // Memory with base and unsigned 12-bit displacement 566 case 'R': // Likewise, plus an index 567 case 'S': // Memory with base and signed 20-bit displacement 568 case 'T': // Likewise, plus an index 569 case 'm': // Equivalent to 'T'. 570 return C_Memory; 571 572 case 'I': // Unsigned 8-bit constant 573 case 'J': // Unsigned 12-bit constant 574 case 'K': // Signed 16-bit constant 575 case 'L': // Signed 20-bit displacement (on all targets we support) 576 case 'M': // 0x7fffffff 577 return C_Other; 578 579 default: 580 break; 581 } 582 } 583 return TargetLowering::getConstraintType(Constraint); 584 } 585 586 TargetLowering::ConstraintWeight SystemZTargetLowering:: 587 getSingleConstraintMatchWeight(AsmOperandInfo &info, 588 const char *constraint) const { 589 ConstraintWeight weight = CW_Invalid; 590 Value *CallOperandVal = info.CallOperandVal; 591 // If we don't have a value, we can't do a match, 592 // but allow it at the lowest weight. 593 if (!CallOperandVal) 594 return CW_Default; 595 Type *type = CallOperandVal->getType(); 596 // Look at the constraint type. 597 switch (*constraint) { 598 default: 599 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 600 break; 601 602 case 'a': // Address register 603 case 'd': // Data register (equivalent to 'r') 604 case 'h': // High-part register 605 case 'r': // General-purpose register 606 if (CallOperandVal->getType()->isIntegerTy()) 607 weight = CW_Register; 608 break; 609 610 case 'f': // Floating-point register 611 if (type->isFloatingPointTy()) 612 weight = CW_Register; 613 break; 614 615 case 'I': // Unsigned 8-bit constant 616 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 617 if (isUInt<8>(C->getZExtValue())) 618 weight = CW_Constant; 619 break; 620 621 case 'J': // Unsigned 12-bit constant 622 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 623 if (isUInt<12>(C->getZExtValue())) 624 weight = CW_Constant; 625 break; 626 627 case 'K': // Signed 16-bit constant 628 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 629 if (isInt<16>(C->getSExtValue())) 630 weight = CW_Constant; 631 break; 632 633 case 'L': // Signed 20-bit displacement (on all targets we support) 634 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 635 if (isInt<20>(C->getSExtValue())) 636 weight = CW_Constant; 637 break; 638 639 case 'M': // 0x7fffffff 640 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 641 if (C->getZExtValue() == 0x7fffffff) 642 weight = CW_Constant; 643 break; 644 } 645 return weight; 646 } 647 648 // Parse a "{tNNN}" register constraint for which the register type "t" 649 // has already been verified. MC is the class associated with "t" and 650 // Map maps 0-based register numbers to LLVM register numbers. 651 static std::pair<unsigned, const TargetRegisterClass *> 652 parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, 653 const unsigned *Map) { 654 assert(*(Constraint.end()-1) == '}' && "Missing '}'"); 655 if (isdigit(Constraint[2])) { 656 unsigned Index; 657 bool Failed = 658 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index); 659 if (!Failed && Index < 16 && Map[Index]) 660 return std::make_pair(Map[Index], RC); 661 } 662 return std::make_pair(0U, nullptr); 663 } 664 665 std::pair<unsigned, const TargetRegisterClass *> 666 SystemZTargetLowering::getRegForInlineAsmConstraint( 667 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 668 if (Constraint.size() == 1) { 669 // GCC Constraint Letters 670 switch (Constraint[0]) { 671 default: break; 672 case 'd': // Data register (equivalent to 'r') 673 case 'r': // General-purpose register 674 if (VT == MVT::i64) 675 return std::make_pair(0U, &SystemZ::GR64BitRegClass); 676 else if (VT == MVT::i128) 677 return std::make_pair(0U, &SystemZ::GR128BitRegClass); 678 return std::make_pair(0U, &SystemZ::GR32BitRegClass); 679 680 case 'a': // Address register 681 if (VT == MVT::i64) 682 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass); 683 else if (VT == MVT::i128) 684 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass); 685 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass); 686 687 case 'h': // High-part register (an LLVM extension) 688 return std::make_pair(0U, &SystemZ::GRH32BitRegClass); 689 690 case 'f': // Floating-point register 691 if (VT == MVT::f64) 692 return std::make_pair(0U, &SystemZ::FP64BitRegClass); 693 else if (VT == MVT::f128) 694 return std::make_pair(0U, &SystemZ::FP128BitRegClass); 695 return std::make_pair(0U, &SystemZ::FP32BitRegClass); 696 } 697 } 698 if (Constraint.size() > 0 && Constraint[0] == '{') { 699 // We need to override the default register parsing for GPRs and FPRs 700 // because the interpretation depends on VT. The internal names of 701 // the registers are also different from the external names 702 // (F0D and F0S instead of F0, etc.). 703 if (Constraint[1] == 'r') { 704 if (VT == MVT::i32) 705 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass, 706 SystemZMC::GR32Regs); 707 if (VT == MVT::i128) 708 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass, 709 SystemZMC::GR128Regs); 710 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass, 711 SystemZMC::GR64Regs); 712 } 713 if (Constraint[1] == 'f') { 714 if (VT == MVT::f32) 715 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, 716 SystemZMC::FP32Regs); 717 if (VT == MVT::f128) 718 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass, 719 SystemZMC::FP128Regs); 720 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass, 721 SystemZMC::FP64Regs); 722 } 723 } 724 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 725 } 726 727 void SystemZTargetLowering:: 728 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, 729 std::vector<SDValue> &Ops, 730 SelectionDAG &DAG) const { 731 // Only support length 1 constraints for now. 732 if (Constraint.length() == 1) { 733 switch (Constraint[0]) { 734 case 'I': // Unsigned 8-bit constant 735 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 736 if (isUInt<8>(C->getZExtValue())) 737 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 738 Op.getValueType())); 739 return; 740 741 case 'J': // Unsigned 12-bit constant 742 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 743 if (isUInt<12>(C->getZExtValue())) 744 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 745 Op.getValueType())); 746 return; 747 748 case 'K': // Signed 16-bit constant 749 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 750 if (isInt<16>(C->getSExtValue())) 751 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), 752 Op.getValueType())); 753 return; 754 755 case 'L': // Signed 20-bit displacement (on all targets we support) 756 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 757 if (isInt<20>(C->getSExtValue())) 758 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), 759 Op.getValueType())); 760 return; 761 762 case 'M': // 0x7fffffff 763 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 764 if (C->getZExtValue() == 0x7fffffff) 765 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 766 Op.getValueType())); 767 return; 768 } 769 } 770 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 771 } 772 773 //===----------------------------------------------------------------------===// 774 // Calling conventions 775 //===----------------------------------------------------------------------===// 776 777 #include "SystemZGenCallingConv.inc" 778 779 bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, 780 Type *ToType) const { 781 return isTruncateFree(FromType, ToType); 782 } 783 784 bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 785 return CI->isTailCall(); 786 } 787 788 // We do not yet support 128-bit single-element vector types. If the user 789 // attempts to use such types as function argument or return type, prefer 790 // to error out instead of emitting code violating the ABI. 791 static void VerifyVectorType(MVT VT, EVT ArgVT) { 792 if (ArgVT.isVector() && !VT.isVector()) 793 report_fatal_error("Unsupported vector argument or return type"); 794 } 795 796 static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) { 797 for (unsigned i = 0; i < Ins.size(); ++i) 798 VerifyVectorType(Ins[i].VT, Ins[i].ArgVT); 799 } 800 801 static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) { 802 for (unsigned i = 0; i < Outs.size(); ++i) 803 VerifyVectorType(Outs[i].VT, Outs[i].ArgVT); 804 } 805 806 // Value is a value that has been passed to us in the location described by VA 807 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining 808 // any loads onto Chain. 809 static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL, 810 CCValAssign &VA, SDValue Chain, 811 SDValue Value) { 812 // If the argument has been promoted from a smaller type, insert an 813 // assertion to capture this. 814 if (VA.getLocInfo() == CCValAssign::SExt) 815 Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value, 816 DAG.getValueType(VA.getValVT())); 817 else if (VA.getLocInfo() == CCValAssign::ZExt) 818 Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value, 819 DAG.getValueType(VA.getValVT())); 820 821 if (VA.isExtInLoc()) 822 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value); 823 else if (VA.getLocInfo() == CCValAssign::BCvt) { 824 // If this is a short vector argument loaded from the stack, 825 // extend from i64 to full vector size and then bitcast. 826 assert(VA.getLocVT() == MVT::i64); 827 assert(VA.getValVT().isVector()); 828 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)}); 829 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value); 830 } else 831 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); 832 return Value; 833 } 834 835 // Value is a value of type VA.getValVT() that we need to copy into 836 // the location described by VA. Return a copy of Value converted to 837 // VA.getValVT(). The caller is responsible for handling indirect values. 838 static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL, 839 CCValAssign &VA, SDValue Value) { 840 switch (VA.getLocInfo()) { 841 case CCValAssign::SExt: 842 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value); 843 case CCValAssign::ZExt: 844 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); 845 case CCValAssign::AExt: 846 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); 847 case CCValAssign::BCvt: 848 // If this is a short vector argument to be stored to the stack, 849 // bitcast to v2i64 and then extract first element. 850 assert(VA.getLocVT() == MVT::i64); 851 assert(VA.getValVT().isVector()); 852 Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value); 853 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value, 854 DAG.getConstant(0, DL, MVT::i32)); 855 case CCValAssign::Full: 856 return Value; 857 default: 858 llvm_unreachable("Unhandled getLocInfo()"); 859 } 860 } 861 862 SDValue SystemZTargetLowering::LowerFormalArguments( 863 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 864 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 865 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 866 MachineFunction &MF = DAG.getMachineFunction(); 867 MachineFrameInfo *MFI = MF.getFrameInfo(); 868 MachineRegisterInfo &MRI = MF.getRegInfo(); 869 SystemZMachineFunctionInfo *FuncInfo = 870 MF.getInfo<SystemZMachineFunctionInfo>(); 871 auto *TFL = 872 static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering()); 873 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 874 875 // Detect unsupported vector argument types. 876 if (Subtarget.hasVector()) 877 VerifyVectorTypes(Ins); 878 879 // Assign locations to all of the incoming arguments. 880 SmallVector<CCValAssign, 16> ArgLocs; 881 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 882 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); 883 884 unsigned NumFixedGPRs = 0; 885 unsigned NumFixedFPRs = 0; 886 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 887 SDValue ArgValue; 888 CCValAssign &VA = ArgLocs[I]; 889 EVT LocVT = VA.getLocVT(); 890 if (VA.isRegLoc()) { 891 // Arguments passed in registers 892 const TargetRegisterClass *RC; 893 switch (LocVT.getSimpleVT().SimpleTy) { 894 default: 895 // Integers smaller than i64 should be promoted to i64. 896 llvm_unreachable("Unexpected argument type"); 897 case MVT::i32: 898 NumFixedGPRs += 1; 899 RC = &SystemZ::GR32BitRegClass; 900 break; 901 case MVT::i64: 902 NumFixedGPRs += 1; 903 RC = &SystemZ::GR64BitRegClass; 904 break; 905 case MVT::f32: 906 NumFixedFPRs += 1; 907 RC = &SystemZ::FP32BitRegClass; 908 break; 909 case MVT::f64: 910 NumFixedFPRs += 1; 911 RC = &SystemZ::FP64BitRegClass; 912 break; 913 case MVT::v16i8: 914 case MVT::v8i16: 915 case MVT::v4i32: 916 case MVT::v2i64: 917 case MVT::v4f32: 918 case MVT::v2f64: 919 RC = &SystemZ::VR128BitRegClass; 920 break; 921 } 922 923 unsigned VReg = MRI.createVirtualRegister(RC); 924 MRI.addLiveIn(VA.getLocReg(), VReg); 925 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 926 } else { 927 assert(VA.isMemLoc() && "Argument not register or memory"); 928 929 // Create the frame index object for this incoming parameter. 930 int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8, 931 VA.getLocMemOffset(), true); 932 933 // Create the SelectionDAG nodes corresponding to a load 934 // from this parameter. Unpromoted ints and floats are 935 // passed as right-justified 8-byte values. 936 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 937 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) 938 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, 939 DAG.getIntPtrConstant(4, DL)); 940 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, 941 MachinePointerInfo::getFixedStack(MF, FI), false, 942 false, false, 0); 943 } 944 945 // Convert the value of the argument register into the value that's 946 // being passed. 947 if (VA.getLocInfo() == CCValAssign::Indirect) { 948 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, 949 ArgValue, MachinePointerInfo(), 950 false, false, false, 0)); 951 // If the original argument was split (e.g. i128), we need 952 // to load all parts of it here (using the same address). 953 unsigned ArgIndex = Ins[I].OrigArgIndex; 954 assert (Ins[I].PartOffset == 0); 955 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) { 956 CCValAssign &PartVA = ArgLocs[I + 1]; 957 unsigned PartOffset = Ins[I + 1].PartOffset; 958 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 959 DAG.getIntPtrConstant(PartOffset, DL)); 960 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, 961 Address, MachinePointerInfo(), 962 false, false, false, 0)); 963 ++I; 964 } 965 } else 966 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); 967 } 968 969 if (IsVarArg) { 970 // Save the number of non-varargs registers for later use by va_start, etc. 971 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); 972 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); 973 974 // Likewise the address (in the form of a frame index) of where the 975 // first stack vararg would be. The 1-byte size here is arbitrary. 976 int64_t StackSize = CCInfo.getNextStackOffset(); 977 FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true)); 978 979 // ...and a similar frame index for the caller-allocated save area 980 // that will be used to store the incoming registers. 981 int64_t RegSaveOffset = TFL->getOffsetOfLocalArea(); 982 unsigned RegSaveIndex = MFI->CreateFixedObject(1, RegSaveOffset, true); 983 FuncInfo->setRegSaveFrameIndex(RegSaveIndex); 984 985 // Store the FPR varargs in the reserved frame slots. (We store the 986 // GPRs as part of the prologue.) 987 if (NumFixedFPRs < SystemZ::NumArgFPRs) { 988 SDValue MemOps[SystemZ::NumArgFPRs]; 989 for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) { 990 unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]); 991 int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true); 992 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 993 unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I], 994 &SystemZ::FP64BitRegClass); 995 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); 996 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN, 997 MachinePointerInfo::getFixedStack(MF, FI), 998 false, false, 0); 999 } 1000 // Join the stores, which are independent of one another. 1001 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 1002 makeArrayRef(&MemOps[NumFixedFPRs], 1003 SystemZ::NumArgFPRs-NumFixedFPRs)); 1004 } 1005 } 1006 1007 return Chain; 1008 } 1009 1010 static bool canUseSiblingCall(const CCState &ArgCCInfo, 1011 SmallVectorImpl<CCValAssign> &ArgLocs, 1012 SmallVectorImpl<ISD::OutputArg> &Outs) { 1013 // Punt if there are any indirect or stack arguments, or if the call 1014 // needs the callee-saved argument register R6, or if the call uses 1015 // the callee-saved register arguments SwiftSelf and SwiftError. 1016 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 1017 CCValAssign &VA = ArgLocs[I]; 1018 if (VA.getLocInfo() == CCValAssign::Indirect) 1019 return false; 1020 if (!VA.isRegLoc()) 1021 return false; 1022 unsigned Reg = VA.getLocReg(); 1023 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) 1024 return false; 1025 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError()) 1026 return false; 1027 } 1028 return true; 1029 } 1030 1031 SDValue 1032 SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, 1033 SmallVectorImpl<SDValue> &InVals) const { 1034 SelectionDAG &DAG = CLI.DAG; 1035 SDLoc &DL = CLI.DL; 1036 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1037 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1038 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1039 SDValue Chain = CLI.Chain; 1040 SDValue Callee = CLI.Callee; 1041 bool &IsTailCall = CLI.IsTailCall; 1042 CallingConv::ID CallConv = CLI.CallConv; 1043 bool IsVarArg = CLI.IsVarArg; 1044 MachineFunction &MF = DAG.getMachineFunction(); 1045 EVT PtrVT = getPointerTy(MF.getDataLayout()); 1046 1047 // Detect unsupported vector argument and return types. 1048 if (Subtarget.hasVector()) { 1049 VerifyVectorTypes(Outs); 1050 VerifyVectorTypes(Ins); 1051 } 1052 1053 // Analyze the operands of the call, assigning locations to each operand. 1054 SmallVector<CCValAssign, 16> ArgLocs; 1055 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1056 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); 1057 1058 // We don't support GuaranteedTailCallOpt, only automatically-detected 1059 // sibling calls. 1060 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs)) 1061 IsTailCall = false; 1062 1063 // Get a count of how many bytes are to be pushed on the stack. 1064 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 1065 1066 // Mark the start of the call. 1067 if (!IsTailCall) 1068 Chain = DAG.getCALLSEQ_START(Chain, 1069 DAG.getConstant(NumBytes, DL, PtrVT, true), 1070 DL); 1071 1072 // Copy argument values to their designated locations. 1073 SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass; 1074 SmallVector<SDValue, 8> MemOpChains; 1075 SDValue StackPtr; 1076 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 1077 CCValAssign &VA = ArgLocs[I]; 1078 SDValue ArgValue = OutVals[I]; 1079 1080 if (VA.getLocInfo() == CCValAssign::Indirect) { 1081 // Store the argument in a stack slot and pass its address. 1082 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT); 1083 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 1084 MemOpChains.push_back(DAG.getStore( 1085 Chain, DL, ArgValue, SpillSlot, 1086 MachinePointerInfo::getFixedStack(MF, FI), false, false, 0)); 1087 // If the original argument was split (e.g. i128), we need 1088 // to store all parts of it here (and pass just one address). 1089 unsigned ArgIndex = Outs[I].OrigArgIndex; 1090 assert (Outs[I].PartOffset == 0); 1091 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { 1092 SDValue PartValue = OutVals[I + 1]; 1093 unsigned PartOffset = Outs[I + 1].PartOffset; 1094 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 1095 DAG.getIntPtrConstant(PartOffset, DL)); 1096 MemOpChains.push_back(DAG.getStore( 1097 Chain, DL, PartValue, Address, 1098 MachinePointerInfo::getFixedStack(MF, FI), false, false, 0)); 1099 ++I; 1100 } 1101 ArgValue = SpillSlot; 1102 } else 1103 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue); 1104 1105 if (VA.isRegLoc()) 1106 // Queue up the argument copies and emit them at the end. 1107 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 1108 else { 1109 assert(VA.isMemLoc() && "Argument not register or memory"); 1110 1111 // Work out the address of the stack slot. Unpromoted ints and 1112 // floats are passed as right-justified 8-byte values. 1113 if (!StackPtr.getNode()) 1114 StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT); 1115 unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset(); 1116 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) 1117 Offset += 4; 1118 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 1119 DAG.getIntPtrConstant(Offset, DL)); 1120 1121 // Emit the store. 1122 MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address, 1123 MachinePointerInfo(), 1124 false, false, 0)); 1125 } 1126 } 1127 1128 // Join the stores, which are independent of one another. 1129 if (!MemOpChains.empty()) 1130 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 1131 1132 // Accept direct calls by converting symbolic call addresses to the 1133 // associated Target* opcodes. Force %r1 to be used for indirect 1134 // tail calls. 1135 SDValue Glue; 1136 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1137 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); 1138 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 1139 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1140 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT); 1141 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 1142 } else if (IsTailCall) { 1143 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue); 1144 Glue = Chain.getValue(1); 1145 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType()); 1146 } 1147 1148 // Build a sequence of copy-to-reg nodes, chained and glued together. 1149 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { 1150 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, 1151 RegsToPass[I].second, Glue); 1152 Glue = Chain.getValue(1); 1153 } 1154 1155 // The first call operand is the chain and the second is the target address. 1156 SmallVector<SDValue, 8> Ops; 1157 Ops.push_back(Chain); 1158 Ops.push_back(Callee); 1159 1160 // Add argument registers to the end of the list so that they are 1161 // known live into the call. 1162 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) 1163 Ops.push_back(DAG.getRegister(RegsToPass[I].first, 1164 RegsToPass[I].second.getValueType())); 1165 1166 // Add a register mask operand representing the call-preserved registers. 1167 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1168 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 1169 assert(Mask && "Missing call preserved mask for calling convention"); 1170 Ops.push_back(DAG.getRegisterMask(Mask)); 1171 1172 // Glue the call to the argument copies, if any. 1173 if (Glue.getNode()) 1174 Ops.push_back(Glue); 1175 1176 // Emit the call. 1177 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1178 if (IsTailCall) 1179 return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops); 1180 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops); 1181 Glue = Chain.getValue(1); 1182 1183 // Mark the end of the call, which is glued to the call itself. 1184 Chain = DAG.getCALLSEQ_END(Chain, 1185 DAG.getConstant(NumBytes, DL, PtrVT, true), 1186 DAG.getConstant(0, DL, PtrVT, true), 1187 Glue, DL); 1188 Glue = Chain.getValue(1); 1189 1190 // Assign locations to each value returned by this call. 1191 SmallVector<CCValAssign, 16> RetLocs; 1192 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); 1193 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); 1194 1195 // Copy all of the result registers out of their specified physreg. 1196 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { 1197 CCValAssign &VA = RetLocs[I]; 1198 1199 // Copy the value out, gluing the copy to the end of the call sequence. 1200 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), 1201 VA.getLocVT(), Glue); 1202 Chain = RetValue.getValue(1); 1203 Glue = RetValue.getValue(2); 1204 1205 // Convert the value of the return register into the value that's 1206 // being returned. 1207 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue)); 1208 } 1209 1210 return Chain; 1211 } 1212 1213 bool SystemZTargetLowering:: 1214 CanLowerReturn(CallingConv::ID CallConv, 1215 MachineFunction &MF, bool isVarArg, 1216 const SmallVectorImpl<ISD::OutputArg> &Outs, 1217 LLVMContext &Context) const { 1218 // Detect unsupported vector return types. 1219 if (Subtarget.hasVector()) 1220 VerifyVectorTypes(Outs); 1221 1222 // Special case that we cannot easily detect in RetCC_SystemZ since 1223 // i128 is not a legal type. 1224 for (auto &Out : Outs) 1225 if (Out.ArgVT == MVT::i128) 1226 return false; 1227 1228 SmallVector<CCValAssign, 16> RetLocs; 1229 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context); 1230 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ); 1231 } 1232 1233 SDValue 1234 SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 1235 bool IsVarArg, 1236 const SmallVectorImpl<ISD::OutputArg> &Outs, 1237 const SmallVectorImpl<SDValue> &OutVals, 1238 const SDLoc &DL, SelectionDAG &DAG) const { 1239 MachineFunction &MF = DAG.getMachineFunction(); 1240 1241 // Detect unsupported vector return types. 1242 if (Subtarget.hasVector()) 1243 VerifyVectorTypes(Outs); 1244 1245 // Assign locations to each returned value. 1246 SmallVector<CCValAssign, 16> RetLocs; 1247 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); 1248 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); 1249 1250 // Quick exit for void returns 1251 if (RetLocs.empty()) 1252 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain); 1253 1254 // Copy the result values into the output registers. 1255 SDValue Glue; 1256 SmallVector<SDValue, 4> RetOps; 1257 RetOps.push_back(Chain); 1258 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { 1259 CCValAssign &VA = RetLocs[I]; 1260 SDValue RetValue = OutVals[I]; 1261 1262 // Make the return register live on exit. 1263 assert(VA.isRegLoc() && "Can only return in registers!"); 1264 1265 // Promote the value as required. 1266 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue); 1267 1268 // Chain and glue the copies together. 1269 unsigned Reg = VA.getLocReg(); 1270 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue); 1271 Glue = Chain.getValue(1); 1272 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT())); 1273 } 1274 1275 // Update chain and glue. 1276 RetOps[0] = Chain; 1277 if (Glue.getNode()) 1278 RetOps.push_back(Glue); 1279 1280 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps); 1281 } 1282 1283 SDValue SystemZTargetLowering::prepareVolatileOrAtomicLoad( 1284 SDValue Chain, const SDLoc &DL, SelectionDAG &DAG) const { 1285 return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain); 1286 } 1287 1288 // Return true if Op is an intrinsic node with chain that returns the CC value 1289 // as its only (other) argument. Provide the associated SystemZISD opcode and 1290 // the mask of valid CC values if so. 1291 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, 1292 unsigned &CCValid) { 1293 unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1294 switch (Id) { 1295 case Intrinsic::s390_tbegin: 1296 Opcode = SystemZISD::TBEGIN; 1297 CCValid = SystemZ::CCMASK_TBEGIN; 1298 return true; 1299 1300 case Intrinsic::s390_tbegin_nofloat: 1301 Opcode = SystemZISD::TBEGIN_NOFLOAT; 1302 CCValid = SystemZ::CCMASK_TBEGIN; 1303 return true; 1304 1305 case Intrinsic::s390_tend: 1306 Opcode = SystemZISD::TEND; 1307 CCValid = SystemZ::CCMASK_TEND; 1308 return true; 1309 1310 default: 1311 return false; 1312 } 1313 } 1314 1315 // Return true if Op is an intrinsic node without chain that returns the 1316 // CC value as its final argument. Provide the associated SystemZISD 1317 // opcode and the mask of valid CC values if so. 1318 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { 1319 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1320 switch (Id) { 1321 case Intrinsic::s390_vpkshs: 1322 case Intrinsic::s390_vpksfs: 1323 case Intrinsic::s390_vpksgs: 1324 Opcode = SystemZISD::PACKS_CC; 1325 CCValid = SystemZ::CCMASK_VCMP; 1326 return true; 1327 1328 case Intrinsic::s390_vpklshs: 1329 case Intrinsic::s390_vpklsfs: 1330 case Intrinsic::s390_vpklsgs: 1331 Opcode = SystemZISD::PACKLS_CC; 1332 CCValid = SystemZ::CCMASK_VCMP; 1333 return true; 1334 1335 case Intrinsic::s390_vceqbs: 1336 case Intrinsic::s390_vceqhs: 1337 case Intrinsic::s390_vceqfs: 1338 case Intrinsic::s390_vceqgs: 1339 Opcode = SystemZISD::VICMPES; 1340 CCValid = SystemZ::CCMASK_VCMP; 1341 return true; 1342 1343 case Intrinsic::s390_vchbs: 1344 case Intrinsic::s390_vchhs: 1345 case Intrinsic::s390_vchfs: 1346 case Intrinsic::s390_vchgs: 1347 Opcode = SystemZISD::VICMPHS; 1348 CCValid = SystemZ::CCMASK_VCMP; 1349 return true; 1350 1351 case Intrinsic::s390_vchlbs: 1352 case Intrinsic::s390_vchlhs: 1353 case Intrinsic::s390_vchlfs: 1354 case Intrinsic::s390_vchlgs: 1355 Opcode = SystemZISD::VICMPHLS; 1356 CCValid = SystemZ::CCMASK_VCMP; 1357 return true; 1358 1359 case Intrinsic::s390_vtm: 1360 Opcode = SystemZISD::VTM; 1361 CCValid = SystemZ::CCMASK_VCMP; 1362 return true; 1363 1364 case Intrinsic::s390_vfaebs: 1365 case Intrinsic::s390_vfaehs: 1366 case Intrinsic::s390_vfaefs: 1367 Opcode = SystemZISD::VFAE_CC; 1368 CCValid = SystemZ::CCMASK_ANY; 1369 return true; 1370 1371 case Intrinsic::s390_vfaezbs: 1372 case Intrinsic::s390_vfaezhs: 1373 case Intrinsic::s390_vfaezfs: 1374 Opcode = SystemZISD::VFAEZ_CC; 1375 CCValid = SystemZ::CCMASK_ANY; 1376 return true; 1377 1378 case Intrinsic::s390_vfeebs: 1379 case Intrinsic::s390_vfeehs: 1380 case Intrinsic::s390_vfeefs: 1381 Opcode = SystemZISD::VFEE_CC; 1382 CCValid = SystemZ::CCMASK_ANY; 1383 return true; 1384 1385 case Intrinsic::s390_vfeezbs: 1386 case Intrinsic::s390_vfeezhs: 1387 case Intrinsic::s390_vfeezfs: 1388 Opcode = SystemZISD::VFEEZ_CC; 1389 CCValid = SystemZ::CCMASK_ANY; 1390 return true; 1391 1392 case Intrinsic::s390_vfenebs: 1393 case Intrinsic::s390_vfenehs: 1394 case Intrinsic::s390_vfenefs: 1395 Opcode = SystemZISD::VFENE_CC; 1396 CCValid = SystemZ::CCMASK_ANY; 1397 return true; 1398 1399 case Intrinsic::s390_vfenezbs: 1400 case Intrinsic::s390_vfenezhs: 1401 case Intrinsic::s390_vfenezfs: 1402 Opcode = SystemZISD::VFENEZ_CC; 1403 CCValid = SystemZ::CCMASK_ANY; 1404 return true; 1405 1406 case Intrinsic::s390_vistrbs: 1407 case Intrinsic::s390_vistrhs: 1408 case Intrinsic::s390_vistrfs: 1409 Opcode = SystemZISD::VISTR_CC; 1410 CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3; 1411 return true; 1412 1413 case Intrinsic::s390_vstrcbs: 1414 case Intrinsic::s390_vstrchs: 1415 case Intrinsic::s390_vstrcfs: 1416 Opcode = SystemZISD::VSTRC_CC; 1417 CCValid = SystemZ::CCMASK_ANY; 1418 return true; 1419 1420 case Intrinsic::s390_vstrczbs: 1421 case Intrinsic::s390_vstrczhs: 1422 case Intrinsic::s390_vstrczfs: 1423 Opcode = SystemZISD::VSTRCZ_CC; 1424 CCValid = SystemZ::CCMASK_ANY; 1425 return true; 1426 1427 case Intrinsic::s390_vfcedbs: 1428 Opcode = SystemZISD::VFCMPES; 1429 CCValid = SystemZ::CCMASK_VCMP; 1430 return true; 1431 1432 case Intrinsic::s390_vfchdbs: 1433 Opcode = SystemZISD::VFCMPHS; 1434 CCValid = SystemZ::CCMASK_VCMP; 1435 return true; 1436 1437 case Intrinsic::s390_vfchedbs: 1438 Opcode = SystemZISD::VFCMPHES; 1439 CCValid = SystemZ::CCMASK_VCMP; 1440 return true; 1441 1442 case Intrinsic::s390_vftcidb: 1443 Opcode = SystemZISD::VFTCI; 1444 CCValid = SystemZ::CCMASK_VCMP; 1445 return true; 1446 1447 case Intrinsic::s390_tdc: 1448 Opcode = SystemZISD::TDC; 1449 CCValid = SystemZ::CCMASK_TDC; 1450 return true; 1451 1452 default: 1453 return false; 1454 } 1455 } 1456 1457 // Emit an intrinsic with chain with a glued value instead of its CC result. 1458 static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, 1459 unsigned Opcode) { 1460 // Copy all operands except the intrinsic ID. 1461 unsigned NumOps = Op.getNumOperands(); 1462 SmallVector<SDValue, 6> Ops; 1463 Ops.reserve(NumOps - 1); 1464 Ops.push_back(Op.getOperand(0)); 1465 for (unsigned I = 2; I < NumOps; ++I) 1466 Ops.push_back(Op.getOperand(I)); 1467 1468 assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); 1469 SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1470 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); 1471 SDValue OldChain = SDValue(Op.getNode(), 1); 1472 SDValue NewChain = SDValue(Intr.getNode(), 0); 1473 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); 1474 return Intr; 1475 } 1476 1477 // Emit an intrinsic with a glued value instead of its CC result. 1478 static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op, 1479 unsigned Opcode) { 1480 // Copy all operands except the intrinsic ID. 1481 unsigned NumOps = Op.getNumOperands(); 1482 SmallVector<SDValue, 6> Ops; 1483 Ops.reserve(NumOps - 1); 1484 for (unsigned I = 1; I < NumOps; ++I) 1485 Ops.push_back(Op.getOperand(I)); 1486 1487 if (Op->getNumValues() == 1) 1488 return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops); 1489 assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result"); 1490 SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue); 1491 return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); 1492 } 1493 1494 // CC is a comparison that will be implemented using an integer or 1495 // floating-point comparison. Return the condition code mask for 1496 // a branch on true. In the integer case, CCMASK_CMP_UO is set for 1497 // unsigned comparisons and clear for signed ones. In the floating-point 1498 // case, CCMASK_CMP_UO has its normal mask meaning (unordered). 1499 static unsigned CCMaskForCondCode(ISD::CondCode CC) { 1500 #define CONV(X) \ 1501 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \ 1502 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \ 1503 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X 1504 1505 switch (CC) { 1506 default: 1507 llvm_unreachable("Invalid integer condition!"); 1508 1509 CONV(EQ); 1510 CONV(NE); 1511 CONV(GT); 1512 CONV(GE); 1513 CONV(LT); 1514 CONV(LE); 1515 1516 case ISD::SETO: return SystemZ::CCMASK_CMP_O; 1517 case ISD::SETUO: return SystemZ::CCMASK_CMP_UO; 1518 } 1519 #undef CONV 1520 } 1521 1522 // Return a sequence for getting a 1 from an IPM result when CC has a 1523 // value in CCMask and a 0 when CC has a value in CCValid & ~CCMask. 1524 // The handling of CC values outside CCValid doesn't matter. 1525 static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { 1526 // Deal with cases where the result can be taken directly from a bit 1527 // of the IPM result. 1528 if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3))) 1529 return IPMConversion(0, 0, SystemZ::IPM_CC); 1530 if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3))) 1531 return IPMConversion(0, 0, SystemZ::IPM_CC + 1); 1532 1533 // Deal with cases where we can add a value to force the sign bit 1534 // to contain the right value. Putting the bit in 31 means we can 1535 // use SRL rather than RISBG(L), and also makes it easier to get a 1536 // 0/-1 value, so it has priority over the other tests below. 1537 // 1538 // These sequences rely on the fact that the upper two bits of the 1539 // IPM result are zero. 1540 uint64_t TopBit = uint64_t(1) << 31; 1541 if (CCMask == (CCValid & SystemZ::CCMASK_0)) 1542 return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31); 1543 if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1))) 1544 return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31); 1545 if (CCMask == (CCValid & (SystemZ::CCMASK_0 1546 | SystemZ::CCMASK_1 1547 | SystemZ::CCMASK_2))) 1548 return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31); 1549 if (CCMask == (CCValid & SystemZ::CCMASK_3)) 1550 return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31); 1551 if (CCMask == (CCValid & (SystemZ::CCMASK_1 1552 | SystemZ::CCMASK_2 1553 | SystemZ::CCMASK_3))) 1554 return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31); 1555 1556 // Next try inverting the value and testing a bit. 0/1 could be 1557 // handled this way too, but we dealt with that case above. 1558 if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2))) 1559 return IPMConversion(-1, 0, SystemZ::IPM_CC); 1560 1561 // Handle cases where adding a value forces a non-sign bit to contain 1562 // the right value. 1563 if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2))) 1564 return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1); 1565 if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3))) 1566 return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1); 1567 1568 // The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are 1569 // can be done by inverting the low CC bit and applying one of the 1570 // sign-based extractions above. 1571 if (CCMask == (CCValid & SystemZ::CCMASK_1)) 1572 return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31); 1573 if (CCMask == (CCValid & SystemZ::CCMASK_2)) 1574 return IPMConversion(1 << SystemZ::IPM_CC, 1575 TopBit - (3 << SystemZ::IPM_CC), 31); 1576 if (CCMask == (CCValid & (SystemZ::CCMASK_0 1577 | SystemZ::CCMASK_1 1578 | SystemZ::CCMASK_3))) 1579 return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31); 1580 if (CCMask == (CCValid & (SystemZ::CCMASK_0 1581 | SystemZ::CCMASK_2 1582 | SystemZ::CCMASK_3))) 1583 return IPMConversion(1 << SystemZ::IPM_CC, 1584 TopBit - (1 << SystemZ::IPM_CC), 31); 1585 1586 llvm_unreachable("Unexpected CC combination"); 1587 } 1588 1589 // If C can be converted to a comparison against zero, adjust the operands 1590 // as necessary. 1591 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { 1592 if (C.ICmpType == SystemZICMP::UnsignedOnly) 1593 return; 1594 1595 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode()); 1596 if (!ConstOp1) 1597 return; 1598 1599 int64_t Value = ConstOp1->getSExtValue(); 1600 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) || 1601 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) || 1602 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) || 1603 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) { 1604 C.CCMask ^= SystemZ::CCMASK_CMP_EQ; 1605 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType()); 1606 } 1607 } 1608 1609 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, 1610 // adjust the operands as necessary. 1611 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, 1612 Comparison &C) { 1613 // For us to make any changes, it must a comparison between a single-use 1614 // load and a constant. 1615 if (!C.Op0.hasOneUse() || 1616 C.Op0.getOpcode() != ISD::LOAD || 1617 C.Op1.getOpcode() != ISD::Constant) 1618 return; 1619 1620 // We must have an 8- or 16-bit load. 1621 auto *Load = cast<LoadSDNode>(C.Op0); 1622 unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits(); 1623 if (NumBits != 8 && NumBits != 16) 1624 return; 1625 1626 // The load must be an extending one and the constant must be within the 1627 // range of the unextended value. 1628 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1); 1629 uint64_t Value = ConstOp1->getZExtValue(); 1630 uint64_t Mask = (1 << NumBits) - 1; 1631 if (Load->getExtensionType() == ISD::SEXTLOAD) { 1632 // Make sure that ConstOp1 is in range of C.Op0. 1633 int64_t SignedValue = ConstOp1->getSExtValue(); 1634 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask) 1635 return; 1636 if (C.ICmpType != SystemZICMP::SignedOnly) { 1637 // Unsigned comparison between two sign-extended values is equivalent 1638 // to unsigned comparison between two zero-extended values. 1639 Value &= Mask; 1640 } else if (NumBits == 8) { 1641 // Try to treat the comparison as unsigned, so that we can use CLI. 1642 // Adjust CCMask and Value as necessary. 1643 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT) 1644 // Test whether the high bit of the byte is set. 1645 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT; 1646 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE) 1647 // Test whether the high bit of the byte is clear. 1648 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT; 1649 else 1650 // No instruction exists for this combination. 1651 return; 1652 C.ICmpType = SystemZICMP::UnsignedOnly; 1653 } 1654 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) { 1655 if (Value > Mask) 1656 return; 1657 // If the constant is in range, we can use any comparison. 1658 C.ICmpType = SystemZICMP::Any; 1659 } else 1660 return; 1661 1662 // Make sure that the first operand is an i32 of the right extension type. 1663 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ? 1664 ISD::SEXTLOAD : 1665 ISD::ZEXTLOAD); 1666 if (C.Op0.getValueType() != MVT::i32 || 1667 Load->getExtensionType() != ExtType) 1668 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, 1669 Load->getChain(), Load->getBasePtr(), 1670 Load->getPointerInfo(), Load->getMemoryVT(), 1671 Load->isVolatile(), Load->isNonTemporal(), 1672 Load->isInvariant(), Load->getAlignment()); 1673 1674 // Make sure that the second operand is an i32 with the right value. 1675 if (C.Op1.getValueType() != MVT::i32 || 1676 Value != ConstOp1->getZExtValue()) 1677 C.Op1 = DAG.getConstant(Value, DL, MVT::i32); 1678 } 1679 1680 // Return true if Op is either an unextended load, or a load suitable 1681 // for integer register-memory comparisons of type ICmpType. 1682 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) { 1683 auto *Load = dyn_cast<LoadSDNode>(Op.getNode()); 1684 if (Load) { 1685 // There are no instructions to compare a register with a memory byte. 1686 if (Load->getMemoryVT() == MVT::i8) 1687 return false; 1688 // Otherwise decide on extension type. 1689 switch (Load->getExtensionType()) { 1690 case ISD::NON_EXTLOAD: 1691 return true; 1692 case ISD::SEXTLOAD: 1693 return ICmpType != SystemZICMP::UnsignedOnly; 1694 case ISD::ZEXTLOAD: 1695 return ICmpType != SystemZICMP::SignedOnly; 1696 default: 1697 break; 1698 } 1699 } 1700 return false; 1701 } 1702 1703 // Return true if it is better to swap the operands of C. 1704 static bool shouldSwapCmpOperands(const Comparison &C) { 1705 // Leave f128 comparisons alone, since they have no memory forms. 1706 if (C.Op0.getValueType() == MVT::f128) 1707 return false; 1708 1709 // Always keep a floating-point constant second, since comparisons with 1710 // zero can use LOAD TEST and comparisons with other constants make a 1711 // natural memory operand. 1712 if (isa<ConstantFPSDNode>(C.Op1)) 1713 return false; 1714 1715 // Never swap comparisons with zero since there are many ways to optimize 1716 // those later. 1717 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); 1718 if (ConstOp1 && ConstOp1->getZExtValue() == 0) 1719 return false; 1720 1721 // Also keep natural memory operands second if the loaded value is 1722 // only used here. Several comparisons have memory forms. 1723 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse()) 1724 return false; 1725 1726 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't. 1727 // In that case we generally prefer the memory to be second. 1728 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) { 1729 // The only exceptions are when the second operand is a constant and 1730 // we can use things like CHHSI. 1731 if (!ConstOp1) 1732 return true; 1733 // The unsigned memory-immediate instructions can handle 16-bit 1734 // unsigned integers. 1735 if (C.ICmpType != SystemZICMP::SignedOnly && 1736 isUInt<16>(ConstOp1->getZExtValue())) 1737 return false; 1738 // The signed memory-immediate instructions can handle 16-bit 1739 // signed integers. 1740 if (C.ICmpType != SystemZICMP::UnsignedOnly && 1741 isInt<16>(ConstOp1->getSExtValue())) 1742 return false; 1743 return true; 1744 } 1745 1746 // Try to promote the use of CGFR and CLGFR. 1747 unsigned Opcode0 = C.Op0.getOpcode(); 1748 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND) 1749 return true; 1750 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND) 1751 return true; 1752 if (C.ICmpType != SystemZICMP::SignedOnly && 1753 Opcode0 == ISD::AND && 1754 C.Op0.getOperand(1).getOpcode() == ISD::Constant && 1755 cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff) 1756 return true; 1757 1758 return false; 1759 } 1760 1761 // Return a version of comparison CC mask CCMask in which the LT and GT 1762 // actions are swapped. 1763 static unsigned reverseCCMask(unsigned CCMask) { 1764 return ((CCMask & SystemZ::CCMASK_CMP_EQ) | 1765 (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) | 1766 (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) | 1767 (CCMask & SystemZ::CCMASK_CMP_UO)); 1768 } 1769 1770 // Check whether C tests for equality between X and Y and whether X - Y 1771 // or Y - X is also computed. In that case it's better to compare the 1772 // result of the subtraction against zero. 1773 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, 1774 Comparison &C) { 1775 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 1776 C.CCMask == SystemZ::CCMASK_CMP_NE) { 1777 for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { 1778 SDNode *N = *I; 1779 if (N->getOpcode() == ISD::SUB && 1780 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) || 1781 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) { 1782 C.Op0 = SDValue(N, 0); 1783 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0)); 1784 return; 1785 } 1786 } 1787 } 1788 } 1789 1790 // Check whether C compares a floating-point value with zero and if that 1791 // floating-point value is also negated. In this case we can use the 1792 // negation to set CC, so avoiding separate LOAD AND TEST and 1793 // LOAD (NEGATIVE/COMPLEMENT) instructions. 1794 static void adjustForFNeg(Comparison &C) { 1795 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1); 1796 if (C1 && C1->isZero()) { 1797 for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { 1798 SDNode *N = *I; 1799 if (N->getOpcode() == ISD::FNEG) { 1800 C.Op0 = SDValue(N, 0); 1801 C.CCMask = reverseCCMask(C.CCMask); 1802 return; 1803 } 1804 } 1805 } 1806 } 1807 1808 // Check whether C compares (shl X, 32) with 0 and whether X is 1809 // also sign-extended. In that case it is better to test the result 1810 // of the sign extension using LTGFR. 1811 // 1812 // This case is important because InstCombine transforms a comparison 1813 // with (sext (trunc X)) into a comparison with (shl X, 32). 1814 static void adjustForLTGFR(Comparison &C) { 1815 // Check for a comparison between (shl X, 32) and 0. 1816 if (C.Op0.getOpcode() == ISD::SHL && 1817 C.Op0.getValueType() == MVT::i64 && 1818 C.Op1.getOpcode() == ISD::Constant && 1819 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) { 1820 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1)); 1821 if (C1 && C1->getZExtValue() == 32) { 1822 SDValue ShlOp0 = C.Op0.getOperand(0); 1823 // See whether X has any SIGN_EXTEND_INREG uses. 1824 for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) { 1825 SDNode *N = *I; 1826 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG && 1827 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) { 1828 C.Op0 = SDValue(N, 0); 1829 return; 1830 } 1831 } 1832 } 1833 } 1834 } 1835 1836 // If C compares the truncation of an extending load, try to compare 1837 // the untruncated value instead. This exposes more opportunities to 1838 // reuse CC. 1839 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, 1840 Comparison &C) { 1841 if (C.Op0.getOpcode() == ISD::TRUNCATE && 1842 C.Op0.getOperand(0).getOpcode() == ISD::LOAD && 1843 C.Op1.getOpcode() == ISD::Constant && 1844 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) { 1845 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0)); 1846 if (L->getMemoryVT().getStoreSizeInBits() 1847 <= C.Op0.getValueType().getSizeInBits()) { 1848 unsigned Type = L->getExtensionType(); 1849 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) || 1850 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) { 1851 C.Op0 = C.Op0.getOperand(0); 1852 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType()); 1853 } 1854 } 1855 } 1856 } 1857 1858 // Return true if shift operation N has an in-range constant shift value. 1859 // Store it in ShiftVal if so. 1860 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) { 1861 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1862 if (!Shift) 1863 return false; 1864 1865 uint64_t Amount = Shift->getZExtValue(); 1866 if (Amount >= N.getValueType().getSizeInBits()) 1867 return false; 1868 1869 ShiftVal = Amount; 1870 return true; 1871 } 1872 1873 // Check whether an AND with Mask is suitable for a TEST UNDER MASK 1874 // instruction and whether the CC value is descriptive enough to handle 1875 // a comparison of type Opcode between the AND result and CmpVal. 1876 // CCMask says which comparison result is being tested and BitSize is 1877 // the number of bits in the operands. If TEST UNDER MASK can be used, 1878 // return the corresponding CC mask, otherwise return 0. 1879 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, 1880 uint64_t Mask, uint64_t CmpVal, 1881 unsigned ICmpType) { 1882 assert(Mask != 0 && "ANDs with zero should have been removed by now"); 1883 1884 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL. 1885 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) && 1886 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask)) 1887 return 0; 1888 1889 // Work out the masks for the lowest and highest bits. 1890 unsigned HighShift = 63 - countLeadingZeros(Mask); 1891 uint64_t High = uint64_t(1) << HighShift; 1892 uint64_t Low = uint64_t(1) << countTrailingZeros(Mask); 1893 1894 // Signed ordered comparisons are effectively unsigned if the sign 1895 // bit is dropped. 1896 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly); 1897 1898 // Check for equality comparisons with 0, or the equivalent. 1899 if (CmpVal == 0) { 1900 if (CCMask == SystemZ::CCMASK_CMP_EQ) 1901 return SystemZ::CCMASK_TM_ALL_0; 1902 if (CCMask == SystemZ::CCMASK_CMP_NE) 1903 return SystemZ::CCMASK_TM_SOME_1; 1904 } 1905 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) { 1906 if (CCMask == SystemZ::CCMASK_CMP_LT) 1907 return SystemZ::CCMASK_TM_ALL_0; 1908 if (CCMask == SystemZ::CCMASK_CMP_GE) 1909 return SystemZ::CCMASK_TM_SOME_1; 1910 } 1911 if (EffectivelyUnsigned && CmpVal < Low) { 1912 if (CCMask == SystemZ::CCMASK_CMP_LE) 1913 return SystemZ::CCMASK_TM_ALL_0; 1914 if (CCMask == SystemZ::CCMASK_CMP_GT) 1915 return SystemZ::CCMASK_TM_SOME_1; 1916 } 1917 1918 // Check for equality comparisons with the mask, or the equivalent. 1919 if (CmpVal == Mask) { 1920 if (CCMask == SystemZ::CCMASK_CMP_EQ) 1921 return SystemZ::CCMASK_TM_ALL_1; 1922 if (CCMask == SystemZ::CCMASK_CMP_NE) 1923 return SystemZ::CCMASK_TM_SOME_0; 1924 } 1925 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) { 1926 if (CCMask == SystemZ::CCMASK_CMP_GT) 1927 return SystemZ::CCMASK_TM_ALL_1; 1928 if (CCMask == SystemZ::CCMASK_CMP_LE) 1929 return SystemZ::CCMASK_TM_SOME_0; 1930 } 1931 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) { 1932 if (CCMask == SystemZ::CCMASK_CMP_GE) 1933 return SystemZ::CCMASK_TM_ALL_1; 1934 if (CCMask == SystemZ::CCMASK_CMP_LT) 1935 return SystemZ::CCMASK_TM_SOME_0; 1936 } 1937 1938 // Check for ordered comparisons with the top bit. 1939 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) { 1940 if (CCMask == SystemZ::CCMASK_CMP_LE) 1941 return SystemZ::CCMASK_TM_MSB_0; 1942 if (CCMask == SystemZ::CCMASK_CMP_GT) 1943 return SystemZ::CCMASK_TM_MSB_1; 1944 } 1945 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) { 1946 if (CCMask == SystemZ::CCMASK_CMP_LT) 1947 return SystemZ::CCMASK_TM_MSB_0; 1948 if (CCMask == SystemZ::CCMASK_CMP_GE) 1949 return SystemZ::CCMASK_TM_MSB_1; 1950 } 1951 1952 // If there are just two bits, we can do equality checks for Low and High 1953 // as well. 1954 if (Mask == Low + High) { 1955 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low) 1956 return SystemZ::CCMASK_TM_MIXED_MSB_0; 1957 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low) 1958 return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY; 1959 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High) 1960 return SystemZ::CCMASK_TM_MIXED_MSB_1; 1961 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High) 1962 return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY; 1963 } 1964 1965 // Looks like we've exhausted our options. 1966 return 0; 1967 } 1968 1969 // See whether C can be implemented as a TEST UNDER MASK instruction. 1970 // Update the arguments with the TM version if so. 1971 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, 1972 Comparison &C) { 1973 // Check that we have a comparison with a constant. 1974 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); 1975 if (!ConstOp1) 1976 return; 1977 uint64_t CmpVal = ConstOp1->getZExtValue(); 1978 1979 // Check whether the nonconstant input is an AND with a constant mask. 1980 Comparison NewC(C); 1981 uint64_t MaskVal; 1982 ConstantSDNode *Mask = nullptr; 1983 if (C.Op0.getOpcode() == ISD::AND) { 1984 NewC.Op0 = C.Op0.getOperand(0); 1985 NewC.Op1 = C.Op0.getOperand(1); 1986 Mask = dyn_cast<ConstantSDNode>(NewC.Op1); 1987 if (!Mask) 1988 return; 1989 MaskVal = Mask->getZExtValue(); 1990 } else { 1991 // There is no instruction to compare with a 64-bit immediate 1992 // so use TMHH instead if possible. We need an unsigned ordered 1993 // comparison with an i64 immediate. 1994 if (NewC.Op0.getValueType() != MVT::i64 || 1995 NewC.CCMask == SystemZ::CCMASK_CMP_EQ || 1996 NewC.CCMask == SystemZ::CCMASK_CMP_NE || 1997 NewC.ICmpType == SystemZICMP::SignedOnly) 1998 return; 1999 // Convert LE and GT comparisons into LT and GE. 2000 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE || 2001 NewC.CCMask == SystemZ::CCMASK_CMP_GT) { 2002 if (CmpVal == uint64_t(-1)) 2003 return; 2004 CmpVal += 1; 2005 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ; 2006 } 2007 // If the low N bits of Op1 are zero than the low N bits of Op0 can 2008 // be masked off without changing the result. 2009 MaskVal = -(CmpVal & -CmpVal); 2010 NewC.ICmpType = SystemZICMP::UnsignedOnly; 2011 } 2012 if (!MaskVal) 2013 return; 2014 2015 // Check whether the combination of mask, comparison value and comparison 2016 // type are suitable. 2017 unsigned BitSize = NewC.Op0.getValueType().getSizeInBits(); 2018 unsigned NewCCMask, ShiftVal; 2019 if (NewC.ICmpType != SystemZICMP::SignedOnly && 2020 NewC.Op0.getOpcode() == ISD::SHL && 2021 isSimpleShift(NewC.Op0, ShiftVal) && 2022 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, 2023 MaskVal >> ShiftVal, 2024 CmpVal >> ShiftVal, 2025 SystemZICMP::Any))) { 2026 NewC.Op0 = NewC.Op0.getOperand(0); 2027 MaskVal >>= ShiftVal; 2028 } else if (NewC.ICmpType != SystemZICMP::SignedOnly && 2029 NewC.Op0.getOpcode() == ISD::SRL && 2030 isSimpleShift(NewC.Op0, ShiftVal) && 2031 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, 2032 MaskVal << ShiftVal, 2033 CmpVal << ShiftVal, 2034 SystemZICMP::UnsignedOnly))) { 2035 NewC.Op0 = NewC.Op0.getOperand(0); 2036 MaskVal <<= ShiftVal; 2037 } else { 2038 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal, 2039 NewC.ICmpType); 2040 if (!NewCCMask) 2041 return; 2042 } 2043 2044 // Go ahead and make the change. 2045 C.Opcode = SystemZISD::TM; 2046 C.Op0 = NewC.Op0; 2047 if (Mask && Mask->getZExtValue() == MaskVal) 2048 C.Op1 = SDValue(Mask, 0); 2049 else 2050 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType()); 2051 C.CCValid = SystemZ::CCMASK_TM; 2052 C.CCMask = NewCCMask; 2053 } 2054 2055 // Return a Comparison that tests the condition-code result of intrinsic 2056 // node Call against constant integer CC using comparison code Cond. 2057 // Opcode is the opcode of the SystemZISD operation for the intrinsic 2058 // and CCValid is the set of possible condition-code results. 2059 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, 2060 SDValue Call, unsigned CCValid, uint64_t CC, 2061 ISD::CondCode Cond) { 2062 Comparison C(Call, SDValue()); 2063 C.Opcode = Opcode; 2064 C.CCValid = CCValid; 2065 if (Cond == ISD::SETEQ) 2066 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. 2067 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; 2068 else if (Cond == ISD::SETNE) 2069 // ...and the inverse of that. 2070 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; 2071 else if (Cond == ISD::SETLT || Cond == ISD::SETULT) 2072 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, 2073 // always true for CC>3. 2074 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1; 2075 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) 2076 // ...and the inverse of that. 2077 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0; 2078 else if (Cond == ISD::SETLE || Cond == ISD::SETULE) 2079 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), 2080 // always true for CC>3. 2081 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1; 2082 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) 2083 // ...and the inverse of that. 2084 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0; 2085 else 2086 llvm_unreachable("Unexpected integer comparison type"); 2087 C.CCMask &= CCValid; 2088 return C; 2089 } 2090 2091 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. 2092 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, 2093 ISD::CondCode Cond, const SDLoc &DL) { 2094 if (CmpOp1.getOpcode() == ISD::Constant) { 2095 uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue(); 2096 unsigned Opcode, CCValid; 2097 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && 2098 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && 2099 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) 2100 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); 2101 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 2102 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 && 2103 isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) 2104 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); 2105 } 2106 Comparison C(CmpOp0, CmpOp1); 2107 C.CCMask = CCMaskForCondCode(Cond); 2108 if (C.Op0.getValueType().isFloatingPoint()) { 2109 C.CCValid = SystemZ::CCMASK_FCMP; 2110 C.Opcode = SystemZISD::FCMP; 2111 adjustForFNeg(C); 2112 } else { 2113 C.CCValid = SystemZ::CCMASK_ICMP; 2114 C.Opcode = SystemZISD::ICMP; 2115 // Choose the type of comparison. Equality and inequality tests can 2116 // use either signed or unsigned comparisons. The choice also doesn't 2117 // matter if both sign bits are known to be clear. In those cases we 2118 // want to give the main isel code the freedom to choose whichever 2119 // form fits best. 2120 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 2121 C.CCMask == SystemZ::CCMASK_CMP_NE || 2122 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1))) 2123 C.ICmpType = SystemZICMP::Any; 2124 else if (C.CCMask & SystemZ::CCMASK_CMP_UO) 2125 C.ICmpType = SystemZICMP::UnsignedOnly; 2126 else 2127 C.ICmpType = SystemZICMP::SignedOnly; 2128 C.CCMask &= ~SystemZ::CCMASK_CMP_UO; 2129 adjustZeroCmp(DAG, DL, C); 2130 adjustSubwordCmp(DAG, DL, C); 2131 adjustForSubtraction(DAG, DL, C); 2132 adjustForLTGFR(C); 2133 adjustICmpTruncate(DAG, DL, C); 2134 } 2135 2136 if (shouldSwapCmpOperands(C)) { 2137 std::swap(C.Op0, C.Op1); 2138 C.CCMask = reverseCCMask(C.CCMask); 2139 } 2140 2141 adjustForTestUnderMask(DAG, DL, C); 2142 return C; 2143 } 2144 2145 // Emit the comparison instruction described by C. 2146 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { 2147 if (!C.Op1.getNode()) { 2148 SDValue Op; 2149 switch (C.Op0.getOpcode()) { 2150 case ISD::INTRINSIC_W_CHAIN: 2151 Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode); 2152 break; 2153 case ISD::INTRINSIC_WO_CHAIN: 2154 Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode); 2155 break; 2156 default: 2157 llvm_unreachable("Invalid comparison operands"); 2158 } 2159 return SDValue(Op.getNode(), Op->getNumValues() - 1); 2160 } 2161 if (C.Opcode == SystemZISD::ICMP) 2162 return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1, 2163 DAG.getConstant(C.ICmpType, DL, MVT::i32)); 2164 if (C.Opcode == SystemZISD::TM) { 2165 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != 2166 bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); 2167 return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1, 2168 DAG.getConstant(RegisterOnly, DL, MVT::i32)); 2169 } 2170 return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1); 2171 } 2172 2173 // Implement a 32-bit *MUL_LOHI operation by extending both operands to 2174 // 64 bits. Extend is the extension type to use. Store the high part 2175 // in Hi and the low part in Lo. 2176 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, 2177 SDValue Op0, SDValue Op1, SDValue &Hi, 2178 SDValue &Lo) { 2179 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); 2180 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); 2181 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); 2182 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, 2183 DAG.getConstant(32, DL, MVT::i64)); 2184 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); 2185 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); 2186 } 2187 2188 // Lower a binary operation that produces two VT results, one in each 2189 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, 2190 // Extend extends Op0 to a GR128, and Opcode performs the GR128 operation 2191 // on the extended Op0 and (unextended) Op1. Store the even register result 2192 // in Even and the odd register result in Odd. 2193 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 2194 unsigned Extend, unsigned Opcode, SDValue Op0, 2195 SDValue Op1, SDValue &Even, SDValue &Odd) { 2196 SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0); 2197 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, 2198 SDValue(In128, 0), Op1); 2199 bool Is32Bit = is32Bit(VT); 2200 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result); 2201 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); 2202 } 2203 2204 // Return an i32 value that is 1 if the CC value produced by Glue is 2205 // in the mask CCMask and 0 otherwise. CC is known to have a value 2206 // in CCValid, so other values can be ignored. 2207 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue Glue, 2208 unsigned CCValid, unsigned CCMask) { 2209 IPMConversion Conversion = getIPMConversion(CCValid, CCMask); 2210 SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); 2211 2212 if (Conversion.XORValue) 2213 Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result, 2214 DAG.getConstant(Conversion.XORValue, DL, MVT::i32)); 2215 2216 if (Conversion.AddValue) 2217 Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result, 2218 DAG.getConstant(Conversion.AddValue, DL, MVT::i32)); 2219 2220 // The SHR/AND sequence should get optimized to an RISBG. 2221 Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result, 2222 DAG.getConstant(Conversion.Bit, DL, MVT::i32)); 2223 if (Conversion.Bit != 31) 2224 Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, 2225 DAG.getConstant(1, DL, MVT::i32)); 2226 return Result; 2227 } 2228 2229 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot 2230 // be done directly. IsFP is true if CC is for a floating-point rather than 2231 // integer comparison. 2232 static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) { 2233 switch (CC) { 2234 case ISD::SETOEQ: 2235 case ISD::SETEQ: 2236 return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE; 2237 2238 case ISD::SETOGE: 2239 case ISD::SETGE: 2240 return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0); 2241 2242 case ISD::SETOGT: 2243 case ISD::SETGT: 2244 return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH; 2245 2246 case ISD::SETUGT: 2247 return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL; 2248 2249 default: 2250 return 0; 2251 } 2252 } 2253 2254 // Return the SystemZISD vector comparison operation for CC or its inverse, 2255 // or 0 if neither can be done directly. Indicate in Invert whether the 2256 // result is for the inverse of CC. IsFP is true if CC is for a 2257 // floating-point rather than integer comparison. 2258 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP, 2259 bool &Invert) { 2260 if (unsigned Opcode = getVectorComparison(CC, IsFP)) { 2261 Invert = false; 2262 return Opcode; 2263 } 2264 2265 CC = ISD::getSetCCInverse(CC, !IsFP); 2266 if (unsigned Opcode = getVectorComparison(CC, IsFP)) { 2267 Invert = true; 2268 return Opcode; 2269 } 2270 2271 return 0; 2272 } 2273 2274 // Return a v2f64 that contains the extended form of elements Start and Start+1 2275 // of v4f32 value Op. 2276 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, 2277 SDValue Op) { 2278 int Mask[] = { Start, -1, Start + 1, -1 }; 2279 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); 2280 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op); 2281 } 2282 2283 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, 2284 // producing a result of type VT. 2285 static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, 2286 EVT VT, SDValue CmpOp0, SDValue CmpOp1) { 2287 // There is no hardware support for v4f32, so extend the vector into 2288 // two v2f64s and compare those. 2289 if (CmpOp0.getValueType() == MVT::v4f32) { 2290 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0); 2291 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0); 2292 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1); 2293 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1); 2294 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1); 2295 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1); 2296 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); 2297 } 2298 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); 2299 } 2300 2301 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing 2302 // an integer mask of type VT. 2303 static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 2304 ISD::CondCode CC, SDValue CmpOp0, 2305 SDValue CmpOp1) { 2306 bool IsFP = CmpOp0.getValueType().isFloatingPoint(); 2307 bool Invert = false; 2308 SDValue Cmp; 2309 switch (CC) { 2310 // Handle tests for order using (or (ogt y x) (oge x y)). 2311 case ISD::SETUO: 2312 Invert = true; 2313 case ISD::SETO: { 2314 assert(IsFP && "Unexpected integer comparison"); 2315 SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); 2316 SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1); 2317 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE); 2318 break; 2319 } 2320 2321 // Handle <> tests using (or (ogt y x) (ogt x y)). 2322 case ISD::SETUEQ: 2323 Invert = true; 2324 case ISD::SETONE: { 2325 assert(IsFP && "Unexpected integer comparison"); 2326 SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); 2327 SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1); 2328 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT); 2329 break; 2330 } 2331 2332 // Otherwise a single comparison is enough. It doesn't really 2333 // matter whether we try the inversion or the swap first, since 2334 // there are no cases where both work. 2335 default: 2336 if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) 2337 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1); 2338 else { 2339 CC = ISD::getSetCCSwappedOperands(CC); 2340 if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) 2341 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0); 2342 else 2343 llvm_unreachable("Unhandled comparison"); 2344 } 2345 break; 2346 } 2347 if (Invert) { 2348 SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, 2349 DAG.getConstant(65535, DL, MVT::i32)); 2350 Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask); 2351 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); 2352 } 2353 return Cmp; 2354 } 2355 2356 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, 2357 SelectionDAG &DAG) const { 2358 SDValue CmpOp0 = Op.getOperand(0); 2359 SDValue CmpOp1 = Op.getOperand(1); 2360 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 2361 SDLoc DL(Op); 2362 EVT VT = Op.getValueType(); 2363 if (VT.isVector()) 2364 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1); 2365 2366 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 2367 SDValue Glue = emitCmp(DAG, DL, C); 2368 return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); 2369 } 2370 2371 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 2372 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2373 SDValue CmpOp0 = Op.getOperand(2); 2374 SDValue CmpOp1 = Op.getOperand(3); 2375 SDValue Dest = Op.getOperand(4); 2376 SDLoc DL(Op); 2377 2378 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 2379 SDValue Glue = emitCmp(DAG, DL, C); 2380 return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), 2381 Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32), 2382 DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue); 2383 } 2384 2385 // Return true if Pos is CmpOp and Neg is the negative of CmpOp, 2386 // allowing Pos and Neg to be wider than CmpOp. 2387 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { 2388 return (Neg.getOpcode() == ISD::SUB && 2389 Neg.getOperand(0).getOpcode() == ISD::Constant && 2390 cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 && 2391 Neg.getOperand(1) == Pos && 2392 (Pos == CmpOp || 2393 (Pos.getOpcode() == ISD::SIGN_EXTEND && 2394 Pos.getOperand(0) == CmpOp))); 2395 } 2396 2397 // Return the absolute or negative absolute of Op; IsNegative decides which. 2398 static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, 2399 bool IsNegative) { 2400 Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op); 2401 if (IsNegative) 2402 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(), 2403 DAG.getConstant(0, DL, Op.getValueType()), Op); 2404 return Op; 2405 } 2406 2407 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, 2408 SelectionDAG &DAG) const { 2409 SDValue CmpOp0 = Op.getOperand(0); 2410 SDValue CmpOp1 = Op.getOperand(1); 2411 SDValue TrueOp = Op.getOperand(2); 2412 SDValue FalseOp = Op.getOperand(3); 2413 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 2414 SDLoc DL(Op); 2415 2416 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 2417 2418 // Check for absolute and negative-absolute selections, including those 2419 // where the comparison value is sign-extended (for LPGFR and LNGFR). 2420 // This check supplements the one in DAGCombiner. 2421 if (C.Opcode == SystemZISD::ICMP && 2422 C.CCMask != SystemZ::CCMASK_CMP_EQ && 2423 C.CCMask != SystemZ::CCMASK_CMP_NE && 2424 C.Op1.getOpcode() == ISD::Constant && 2425 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) { 2426 if (isAbsolute(C.Op0, TrueOp, FalseOp)) 2427 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT); 2428 if (isAbsolute(C.Op0, FalseOp, TrueOp)) 2429 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT); 2430 } 2431 2432 SDValue Glue = emitCmp(DAG, DL, C); 2433 2434 // Special case for handling -1/0 results. The shifts we use here 2435 // should get optimized with the IPM conversion sequence. 2436 auto *TrueC = dyn_cast<ConstantSDNode>(TrueOp); 2437 auto *FalseC = dyn_cast<ConstantSDNode>(FalseOp); 2438 if (TrueC && FalseC) { 2439 int64_t TrueVal = TrueC->getSExtValue(); 2440 int64_t FalseVal = FalseC->getSExtValue(); 2441 if ((TrueVal == -1 && FalseVal == 0) || (TrueVal == 0 && FalseVal == -1)) { 2442 // Invert the condition if we want -1 on false. 2443 if (TrueVal == 0) 2444 C.CCMask ^= C.CCValid; 2445 SDValue Result = emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); 2446 EVT VT = Op.getValueType(); 2447 // Extend the result to VT. Upper bits are ignored. 2448 if (!is32Bit(VT)) 2449 Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result); 2450 // Sign-extend from the low bit. 2451 SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32); 2452 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt); 2453 return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt); 2454 } 2455 } 2456 2457 SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32), 2458 DAG.getConstant(C.CCMask, DL, MVT::i32), Glue}; 2459 2460 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 2461 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); 2462 } 2463 2464 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, 2465 SelectionDAG &DAG) const { 2466 SDLoc DL(Node); 2467 const GlobalValue *GV = Node->getGlobal(); 2468 int64_t Offset = Node->getOffset(); 2469 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2470 CodeModel::Model CM = DAG.getTarget().getCodeModel(); 2471 2472 SDValue Result; 2473 if (Subtarget.isPC32DBLSymbol(GV, CM)) { 2474 // Assign anchors at 1<<12 byte boundaries. 2475 uint64_t Anchor = Offset & ~uint64_t(0xfff); 2476 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); 2477 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 2478 2479 // The offset can be folded into the address if it is aligned to a halfword. 2480 Offset -= Anchor; 2481 if (Offset != 0 && (Offset & 1) == 0) { 2482 SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset); 2483 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result); 2484 Offset = 0; 2485 } 2486 } else { 2487 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); 2488 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 2489 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, 2490 MachinePointerInfo::getGOT(DAG.getMachineFunction()), 2491 false, false, false, 0); 2492 } 2493 2494 // If there was a non-zero offset that we didn't fold, create an explicit 2495 // addition for it. 2496 if (Offset != 0) 2497 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, 2498 DAG.getConstant(Offset, DL, PtrVT)); 2499 2500 return Result; 2501 } 2502 2503 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, 2504 SelectionDAG &DAG, 2505 unsigned Opcode, 2506 SDValue GOTOffset) const { 2507 SDLoc DL(Node); 2508 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2509 SDValue Chain = DAG.getEntryNode(); 2510 SDValue Glue; 2511 2512 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. 2513 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 2514 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); 2515 Glue = Chain.getValue(1); 2516 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); 2517 Glue = Chain.getValue(1); 2518 2519 // The first call operand is the chain and the second is the TLS symbol. 2520 SmallVector<SDValue, 8> Ops; 2521 Ops.push_back(Chain); 2522 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL, 2523 Node->getValueType(0), 2524 0, 0)); 2525 2526 // Add argument registers to the end of the list so that they are 2527 // known live into the call. 2528 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); 2529 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); 2530 2531 // Add a register mask operand representing the call-preserved registers. 2532 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2533 const uint32_t *Mask = 2534 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); 2535 assert(Mask && "Missing call preserved mask for calling convention"); 2536 Ops.push_back(DAG.getRegisterMask(Mask)); 2537 2538 // Glue the call to the argument copies. 2539 Ops.push_back(Glue); 2540 2541 // Emit the call. 2542 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 2543 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops); 2544 Glue = Chain.getValue(1); 2545 2546 // Copy the return value from %r2. 2547 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); 2548 } 2549 2550 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL, 2551 SelectionDAG &DAG) const { 2552 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2553 2554 // The high part of the thread pointer is in access register 0. 2555 SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, 2556 DAG.getConstant(0, DL, MVT::i32)); 2557 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi); 2558 2559 // The low part of the thread pointer is in access register 1. 2560 SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, 2561 DAG.getConstant(1, DL, MVT::i32)); 2562 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo); 2563 2564 // Merge them into a single 64-bit address. 2565 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, 2566 DAG.getConstant(32, DL, PtrVT)); 2567 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); 2568 } 2569 2570 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, 2571 SelectionDAG &DAG) const { 2572 if (DAG.getTarget().Options.EmulatedTLS) 2573 return LowerToTLSEmulatedModel(Node, DAG); 2574 SDLoc DL(Node); 2575 const GlobalValue *GV = Node->getGlobal(); 2576 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2577 TLSModel::Model model = DAG.getTarget().getTLSModel(GV); 2578 2579 SDValue TP = lowerThreadPointer(DL, DAG); 2580 2581 // Get the offset of GA from the thread pointer, based on the TLS model. 2582 SDValue Offset; 2583 switch (model) { 2584 case TLSModel::GeneralDynamic: { 2585 // Load the GOT offset of the tls_index (module ID / per-symbol offset). 2586 SystemZConstantPoolValue *CPV = 2587 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); 2588 2589 Offset = DAG.getConstantPool(CPV, PtrVT, 8); 2590 Offset = DAG.getLoad( 2591 PtrVT, DL, DAG.getEntryNode(), Offset, 2592 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, 2593 false, false, 0); 2594 2595 // Call __tls_get_offset to retrieve the offset. 2596 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); 2597 break; 2598 } 2599 2600 case TLSModel::LocalDynamic: { 2601 // Load the GOT offset of the module ID. 2602 SystemZConstantPoolValue *CPV = 2603 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); 2604 2605 Offset = DAG.getConstantPool(CPV, PtrVT, 8); 2606 Offset = DAG.getLoad( 2607 PtrVT, DL, DAG.getEntryNode(), Offset, 2608 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, 2609 false, false, 0); 2610 2611 // Call __tls_get_offset to retrieve the module base offset. 2612 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); 2613 2614 // Note: The SystemZLDCleanupPass will remove redundant computations 2615 // of the module base offset. Count total number of local-dynamic 2616 // accesses to trigger execution of that pass. 2617 SystemZMachineFunctionInfo* MFI = 2618 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>(); 2619 MFI->incNumLocalDynamicTLSAccesses(); 2620 2621 // Add the per-symbol offset. 2622 CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); 2623 2624 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8); 2625 DTPOffset = DAG.getLoad( 2626 PtrVT, DL, DAG.getEntryNode(), DTPOffset, 2627 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, 2628 false, false, 0); 2629 2630 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); 2631 break; 2632 } 2633 2634 case TLSModel::InitialExec: { 2635 // Load the offset from the GOT. 2636 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 2637 SystemZII::MO_INDNTPOFF); 2638 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); 2639 Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset, 2640 MachinePointerInfo::getGOT(DAG.getMachineFunction()), 2641 false, false, false, 0); 2642 break; 2643 } 2644 2645 case TLSModel::LocalExec: { 2646 // Force the offset into the constant pool and load it from there. 2647 SystemZConstantPoolValue *CPV = 2648 SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); 2649 2650 Offset = DAG.getConstantPool(CPV, PtrVT, 8); 2651 Offset = DAG.getLoad( 2652 PtrVT, DL, DAG.getEntryNode(), Offset, 2653 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, 2654 false, false, 0); 2655 break; 2656 } 2657 } 2658 2659 // Add the base and offset together. 2660 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); 2661 } 2662 2663 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, 2664 SelectionDAG &DAG) const { 2665 SDLoc DL(Node); 2666 const BlockAddress *BA = Node->getBlockAddress(); 2667 int64_t Offset = Node->getOffset(); 2668 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2669 2670 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset); 2671 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 2672 return Result; 2673 } 2674 2675 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, 2676 SelectionDAG &DAG) const { 2677 SDLoc DL(JT); 2678 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2679 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 2680 2681 // Use LARL to load the address of the table. 2682 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 2683 } 2684 2685 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, 2686 SelectionDAG &DAG) const { 2687 SDLoc DL(CP); 2688 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2689 2690 SDValue Result; 2691 if (CP->isMachineConstantPoolEntry()) 2692 Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 2693 CP->getAlignment()); 2694 else 2695 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 2696 CP->getAlignment(), CP->getOffset()); 2697 2698 // Use LARL to load the address of the constant pool entry. 2699 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 2700 } 2701 2702 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, 2703 SelectionDAG &DAG) const { 2704 MachineFunction &MF = DAG.getMachineFunction(); 2705 MachineFrameInfo *MFI = MF.getFrameInfo(); 2706 MFI->setFrameAddressIsTaken(true); 2707 2708 SDLoc DL(Op); 2709 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2710 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2711 2712 // If the back chain frame index has not been allocated yet, do so. 2713 SystemZMachineFunctionInfo *FI = MF.getInfo<SystemZMachineFunctionInfo>(); 2714 int BackChainIdx = FI->getFramePointerSaveIndex(); 2715 if (!BackChainIdx) { 2716 // By definition, the frame address is the address of the back chain. 2717 BackChainIdx = MFI->CreateFixedObject(8, -SystemZMC::CallFrameSize, false); 2718 FI->setFramePointerSaveIndex(BackChainIdx); 2719 } 2720 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT); 2721 2722 // FIXME The frontend should detect this case. 2723 if (Depth > 0) { 2724 report_fatal_error("Unsupported stack frame traversal count"); 2725 } 2726 2727 return BackChain; 2728 } 2729 2730 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op, 2731 SelectionDAG &DAG) const { 2732 MachineFunction &MF = DAG.getMachineFunction(); 2733 MachineFrameInfo *MFI = MF.getFrameInfo(); 2734 MFI->setReturnAddressIsTaken(true); 2735 2736 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 2737 return SDValue(); 2738 2739 SDLoc DL(Op); 2740 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2741 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2742 2743 // FIXME The frontend should detect this case. 2744 if (Depth > 0) { 2745 report_fatal_error("Unsupported stack frame traversal count"); 2746 } 2747 2748 // Return R14D, which has the return address. Mark it an implicit live-in. 2749 unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass); 2750 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT); 2751 } 2752 2753 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, 2754 SelectionDAG &DAG) const { 2755 SDLoc DL(Op); 2756 SDValue In = Op.getOperand(0); 2757 EVT InVT = In.getValueType(); 2758 EVT ResVT = Op.getValueType(); 2759 2760 // Convert loads directly. This is normally done by DAGCombiner, 2761 // but we need this case for bitcasts that are created during lowering 2762 // and which are then lowered themselves. 2763 if (auto *LoadN = dyn_cast<LoadSDNode>(In)) 2764 return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(), 2765 LoadN->getMemOperand()); 2766 2767 if (InVT == MVT::i32 && ResVT == MVT::f32) { 2768 SDValue In64; 2769 if (Subtarget.hasHighWord()) { 2770 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, 2771 MVT::i64); 2772 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, 2773 MVT::i64, SDValue(U64, 0), In); 2774 } else { 2775 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); 2776 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, 2777 DAG.getConstant(32, DL, MVT::i64)); 2778 } 2779 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); 2780 return DAG.getTargetExtractSubreg(SystemZ::subreg_r32, 2781 DL, MVT::f32, Out64); 2782 } 2783 if (InVT == MVT::f32 && ResVT == MVT::i32) { 2784 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); 2785 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL, 2786 MVT::f64, SDValue(U64, 0), In); 2787 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); 2788 if (Subtarget.hasHighWord()) 2789 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, 2790 MVT::i32, Out64); 2791 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, 2792 DAG.getConstant(32, DL, MVT::i64)); 2793 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); 2794 } 2795 llvm_unreachable("Unexpected bitcast combination"); 2796 } 2797 2798 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, 2799 SelectionDAG &DAG) const { 2800 MachineFunction &MF = DAG.getMachineFunction(); 2801 SystemZMachineFunctionInfo *FuncInfo = 2802 MF.getInfo<SystemZMachineFunctionInfo>(); 2803 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2804 2805 SDValue Chain = Op.getOperand(0); 2806 SDValue Addr = Op.getOperand(1); 2807 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2808 SDLoc DL(Op); 2809 2810 // The initial values of each field. 2811 const unsigned NumFields = 4; 2812 SDValue Fields[NumFields] = { 2813 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT), 2814 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT), 2815 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT), 2816 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT) 2817 }; 2818 2819 // Store each field into its respective slot. 2820 SDValue MemOps[NumFields]; 2821 unsigned Offset = 0; 2822 for (unsigned I = 0; I < NumFields; ++I) { 2823 SDValue FieldAddr = Addr; 2824 if (Offset != 0) 2825 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, 2826 DAG.getIntPtrConstant(Offset, DL)); 2827 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, 2828 MachinePointerInfo(SV, Offset), 2829 false, false, 0); 2830 Offset += 8; 2831 } 2832 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); 2833 } 2834 2835 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, 2836 SelectionDAG &DAG) const { 2837 SDValue Chain = Op.getOperand(0); 2838 SDValue DstPtr = Op.getOperand(1); 2839 SDValue SrcPtr = Op.getOperand(2); 2840 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); 2841 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 2842 SDLoc DL(Op); 2843 2844 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL), 2845 /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false, 2846 /*isTailCall*/false, 2847 MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); 2848 } 2849 2850 SDValue SystemZTargetLowering:: 2851 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { 2852 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 2853 MachineFunction &MF = DAG.getMachineFunction(); 2854 bool RealignOpt = !MF.getFunction()-> hasFnAttribute("no-realign-stack"); 2855 bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain"); 2856 2857 SDValue Chain = Op.getOperand(0); 2858 SDValue Size = Op.getOperand(1); 2859 SDValue Align = Op.getOperand(2); 2860 SDLoc DL(Op); 2861 2862 // If user has set the no alignment function attribute, ignore 2863 // alloca alignments. 2864 uint64_t AlignVal = (RealignOpt ? 2865 dyn_cast<ConstantSDNode>(Align)->getZExtValue() : 0); 2866 2867 uint64_t StackAlign = TFI->getStackAlignment(); 2868 uint64_t RequiredAlign = std::max(AlignVal, StackAlign); 2869 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; 2870 2871 unsigned SPReg = getStackPointerRegisterToSaveRestore(); 2872 SDValue NeededSpace = Size; 2873 2874 // Get a reference to the stack pointer. 2875 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); 2876 2877 // If we need a backchain, save it now. 2878 SDValue Backchain; 2879 if (StoreBackchain) 2880 Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo(), 2881 false, false, false, 0); 2882 2883 // Add extra space for alignment if needed. 2884 if (ExtraAlignSpace) 2885 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace, 2886 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); 2887 2888 // Get the new stack pointer value. 2889 SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); 2890 2891 // Copy the new stack pointer back. 2892 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); 2893 2894 // The allocated data lives above the 160 bytes allocated for the standard 2895 // frame, plus any outgoing stack arguments. We don't know how much that 2896 // amounts to yet, so emit a special ADJDYNALLOC placeholder. 2897 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); 2898 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); 2899 2900 // Dynamically realign if needed. 2901 if (RequiredAlign > StackAlign) { 2902 Result = 2903 DAG.getNode(ISD::ADD, DL, MVT::i64, Result, 2904 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); 2905 Result = 2906 DAG.getNode(ISD::AND, DL, MVT::i64, Result, 2907 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64)); 2908 } 2909 2910 if (StoreBackchain) 2911 Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo(), 2912 false, false, 0); 2913 2914 SDValue Ops[2] = { Result, Chain }; 2915 return DAG.getMergeValues(Ops, DL); 2916 } 2917 2918 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET( 2919 SDValue Op, SelectionDAG &DAG) const { 2920 SDLoc DL(Op); 2921 2922 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); 2923 } 2924 2925 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, 2926 SelectionDAG &DAG) const { 2927 EVT VT = Op.getValueType(); 2928 SDLoc DL(Op); 2929 SDValue Ops[2]; 2930 if (is32Bit(VT)) 2931 // Just do a normal 64-bit multiplication and extract the results. 2932 // We define this so that it can be used for constant division. 2933 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), 2934 Op.getOperand(1), Ops[1], Ops[0]); 2935 else { 2936 // Do a full 128-bit multiplication based on UMUL_LOHI64: 2937 // 2938 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) 2939 // 2940 // but using the fact that the upper halves are either all zeros 2941 // or all ones: 2942 // 2943 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64) 2944 // 2945 // and grouping the right terms together since they are quicker than the 2946 // multiplication: 2947 // 2948 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) 2949 SDValue C63 = DAG.getConstant(63, DL, MVT::i64); 2950 SDValue LL = Op.getOperand(0); 2951 SDValue RL = Op.getOperand(1); 2952 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); 2953 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63); 2954 // UMUL_LOHI64 returns the low result in the odd register and the high 2955 // result in the even register. SMUL_LOHI is defined to return the 2956 // low half first, so the results are in reverse order. 2957 lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, 2958 LL, RL, Ops[1], Ops[0]); 2959 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH); 2960 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL); 2961 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL); 2962 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum); 2963 } 2964 return DAG.getMergeValues(Ops, DL); 2965 } 2966 2967 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, 2968 SelectionDAG &DAG) const { 2969 EVT VT = Op.getValueType(); 2970 SDLoc DL(Op); 2971 SDValue Ops[2]; 2972 if (is32Bit(VT)) 2973 // Just do a normal 64-bit multiplication and extract the results. 2974 // We define this so that it can be used for constant division. 2975 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0), 2976 Op.getOperand(1), Ops[1], Ops[0]); 2977 else 2978 // UMUL_LOHI64 returns the low result in the odd register and the high 2979 // result in the even register. UMUL_LOHI is defined to return the 2980 // low half first, so the results are in reverse order. 2981 lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, 2982 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 2983 return DAG.getMergeValues(Ops, DL); 2984 } 2985 2986 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, 2987 SelectionDAG &DAG) const { 2988 SDValue Op0 = Op.getOperand(0); 2989 SDValue Op1 = Op.getOperand(1); 2990 EVT VT = Op.getValueType(); 2991 SDLoc DL(Op); 2992 unsigned Opcode; 2993 2994 // We use DSGF for 32-bit division. 2995 if (is32Bit(VT)) { 2996 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); 2997 Opcode = SystemZISD::SDIVREM32; 2998 } else if (DAG.ComputeNumSignBits(Op1) > 32) { 2999 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); 3000 Opcode = SystemZISD::SDIVREM32; 3001 } else 3002 Opcode = SystemZISD::SDIVREM64; 3003 3004 // DSG(F) takes a 64-bit dividend, so the even register in the GR128 3005 // input is "don't care". The instruction returns the remainder in 3006 // the even register and the quotient in the odd register. 3007 SDValue Ops[2]; 3008 lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode, 3009 Op0, Op1, Ops[1], Ops[0]); 3010 return DAG.getMergeValues(Ops, DL); 3011 } 3012 3013 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, 3014 SelectionDAG &DAG) const { 3015 EVT VT = Op.getValueType(); 3016 SDLoc DL(Op); 3017 3018 // DL(G) uses a double-width dividend, so we need to clear the even 3019 // register in the GR128 input. The instruction returns the remainder 3020 // in the even register and the quotient in the odd register. 3021 SDValue Ops[2]; 3022 if (is32Bit(VT)) 3023 lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32, 3024 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 3025 else 3026 lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64, 3027 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 3028 return DAG.getMergeValues(Ops, DL); 3029 } 3030 3031 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { 3032 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation"); 3033 3034 // Get the known-zero masks for each operand. 3035 SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) }; 3036 APInt KnownZero[2], KnownOne[2]; 3037 DAG.computeKnownBits(Ops[0], KnownZero[0], KnownOne[0]); 3038 DAG.computeKnownBits(Ops[1], KnownZero[1], KnownOne[1]); 3039 3040 // See if the upper 32 bits of one operand and the lower 32 bits of the 3041 // other are known zero. They are the low and high operands respectively. 3042 uint64_t Masks[] = { KnownZero[0].getZExtValue(), 3043 KnownZero[1].getZExtValue() }; 3044 unsigned High, Low; 3045 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff) 3046 High = 1, Low = 0; 3047 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff) 3048 High = 0, Low = 1; 3049 else 3050 return Op; 3051 3052 SDValue LowOp = Ops[Low]; 3053 SDValue HighOp = Ops[High]; 3054 3055 // If the high part is a constant, we're better off using IILH. 3056 if (HighOp.getOpcode() == ISD::Constant) 3057 return Op; 3058 3059 // If the low part is a constant that is outside the range of LHI, 3060 // then we're better off using IILF. 3061 if (LowOp.getOpcode() == ISD::Constant) { 3062 int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue()); 3063 if (!isInt<16>(Value)) 3064 return Op; 3065 } 3066 3067 // Check whether the high part is an AND that doesn't change the 3068 // high 32 bits and just masks out low bits. We can skip it if so. 3069 if (HighOp.getOpcode() == ISD::AND && 3070 HighOp.getOperand(1).getOpcode() == ISD::Constant) { 3071 SDValue HighOp0 = HighOp.getOperand(0); 3072 uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue(); 3073 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff)))) 3074 HighOp = HighOp0; 3075 } 3076 3077 // Take advantage of the fact that all GR32 operations only change the 3078 // low 32 bits by truncating Low to an i32 and inserting it directly 3079 // using a subreg. The interesting cases are those where the truncation 3080 // can be folded. 3081 SDLoc DL(Op); 3082 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp); 3083 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL, 3084 MVT::i64, HighOp, Low32); 3085 } 3086 3087 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, 3088 SelectionDAG &DAG) const { 3089 EVT VT = Op.getValueType(); 3090 SDLoc DL(Op); 3091 Op = Op.getOperand(0); 3092 3093 // Handle vector types via VPOPCT. 3094 if (VT.isVector()) { 3095 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op); 3096 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op); 3097 switch (VT.getVectorElementType().getSizeInBits()) { 3098 case 8: 3099 break; 3100 case 16: { 3101 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); 3102 SDValue Shift = DAG.getConstant(8, DL, MVT::i32); 3103 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift); 3104 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); 3105 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift); 3106 break; 3107 } 3108 case 32: { 3109 SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, 3110 DAG.getConstant(0, DL, MVT::i32)); 3111 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); 3112 break; 3113 } 3114 case 64: { 3115 SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, 3116 DAG.getConstant(0, DL, MVT::i32)); 3117 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); 3118 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); 3119 break; 3120 } 3121 default: 3122 llvm_unreachable("Unexpected type"); 3123 } 3124 return Op; 3125 } 3126 3127 // Get the known-zero mask for the operand. 3128 APInt KnownZero, KnownOne; 3129 DAG.computeKnownBits(Op, KnownZero, KnownOne); 3130 unsigned NumSignificantBits = (~KnownZero).getActiveBits(); 3131 if (NumSignificantBits == 0) 3132 return DAG.getConstant(0, DL, VT); 3133 3134 // Skip known-zero high parts of the operand. 3135 int64_t OrigBitSize = VT.getSizeInBits(); 3136 int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits); 3137 BitSize = std::min(BitSize, OrigBitSize); 3138 3139 // The POPCNT instruction counts the number of bits in each byte. 3140 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); 3141 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); 3142 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); 3143 3144 // Add up per-byte counts in a binary tree. All bits of Op at 3145 // position larger than BitSize remain zero throughout. 3146 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { 3147 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT)); 3148 if (BitSize != OrigBitSize) 3149 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, 3150 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT)); 3151 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); 3152 } 3153 3154 // Extract overall result from high byte. 3155 if (BitSize > 8) 3156 Op = DAG.getNode(ISD::SRL, DL, VT, Op, 3157 DAG.getConstant(BitSize - 8, DL, VT)); 3158 3159 return Op; 3160 } 3161 3162 SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, 3163 SelectionDAG &DAG) const { 3164 SDLoc DL(Op); 3165 AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>( 3166 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()); 3167 SynchronizationScope FenceScope = static_cast<SynchronizationScope>( 3168 cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue()); 3169 3170 // The only fence that needs an instruction is a sequentially-consistent 3171 // cross-thread fence. 3172 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && 3173 FenceScope == CrossThread) { 3174 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other, 3175 Op.getOperand(0)), 3176 0); 3177 } 3178 3179 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 3180 return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); 3181 } 3182 3183 // Op is an atomic load. Lower it into a normal volatile load. 3184 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, 3185 SelectionDAG &DAG) const { 3186 auto *Node = cast<AtomicSDNode>(Op.getNode()); 3187 return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(), 3188 Node->getChain(), Node->getBasePtr(), 3189 Node->getMemoryVT(), Node->getMemOperand()); 3190 } 3191 3192 // Op is an atomic store. Lower it into a normal volatile store followed 3193 // by a serialization. 3194 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op, 3195 SelectionDAG &DAG) const { 3196 auto *Node = cast<AtomicSDNode>(Op.getNode()); 3197 SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(), 3198 Node->getBasePtr(), Node->getMemoryVT(), 3199 Node->getMemOperand()); 3200 return SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), MVT::Other, 3201 Chain), 0); 3202 } 3203 3204 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first 3205 // two into the fullword ATOMIC_LOADW_* operation given by Opcode. 3206 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, 3207 SelectionDAG &DAG, 3208 unsigned Opcode) const { 3209 auto *Node = cast<AtomicSDNode>(Op.getNode()); 3210 3211 // 32-bit operations need no code outside the main loop. 3212 EVT NarrowVT = Node->getMemoryVT(); 3213 EVT WideVT = MVT::i32; 3214 if (NarrowVT == WideVT) 3215 return Op; 3216 3217 int64_t BitSize = NarrowVT.getSizeInBits(); 3218 SDValue ChainIn = Node->getChain(); 3219 SDValue Addr = Node->getBasePtr(); 3220 SDValue Src2 = Node->getVal(); 3221 MachineMemOperand *MMO = Node->getMemOperand(); 3222 SDLoc DL(Node); 3223 EVT PtrVT = Addr.getValueType(); 3224 3225 // Convert atomic subtracts of constants into additions. 3226 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) 3227 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) { 3228 Opcode = SystemZISD::ATOMIC_LOADW_ADD; 3229 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType()); 3230 } 3231 3232 // Get the address of the containing word. 3233 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, 3234 DAG.getConstant(-4, DL, PtrVT)); 3235 3236 // Get the number of bits that the word must be rotated left in order 3237 // to bring the field to the top bits of a GR32. 3238 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, 3239 DAG.getConstant(3, DL, PtrVT)); 3240 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); 3241 3242 // Get the complementing shift amount, for rotating a field in the top 3243 // bits back to its proper position. 3244 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, 3245 DAG.getConstant(0, DL, WideVT), BitShift); 3246 3247 // Extend the source operand to 32 bits and prepare it for the inner loop. 3248 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other 3249 // operations require the source to be shifted in advance. (This shift 3250 // can be folded if the source is constant.) For AND and NAND, the lower 3251 // bits must be set, while for other opcodes they should be left clear. 3252 if (Opcode != SystemZISD::ATOMIC_SWAPW) 3253 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2, 3254 DAG.getConstant(32 - BitSize, DL, WideVT)); 3255 if (Opcode == SystemZISD::ATOMIC_LOADW_AND || 3256 Opcode == SystemZISD::ATOMIC_LOADW_NAND) 3257 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2, 3258 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT)); 3259 3260 // Construct the ATOMIC_LOADW_* node. 3261 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); 3262 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, 3263 DAG.getConstant(BitSize, DL, WideVT) }; 3264 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops, 3265 NarrowVT, MMO); 3266 3267 // Rotate the result of the final CS so that the field is in the lower 3268 // bits of a GR32, then truncate it. 3269 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift, 3270 DAG.getConstant(BitSize, DL, WideVT)); 3271 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift); 3272 3273 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) }; 3274 return DAG.getMergeValues(RetOps, DL); 3275 } 3276 3277 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations 3278 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit 3279 // operations into additions. 3280 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, 3281 SelectionDAG &DAG) const { 3282 auto *Node = cast<AtomicSDNode>(Op.getNode()); 3283 EVT MemVT = Node->getMemoryVT(); 3284 if (MemVT == MVT::i32 || MemVT == MVT::i64) { 3285 // A full-width operation. 3286 assert(Op.getValueType() == MemVT && "Mismatched VTs"); 3287 SDValue Src2 = Node->getVal(); 3288 SDValue NegSrc2; 3289 SDLoc DL(Src2); 3290 3291 if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) { 3292 // Use an addition if the operand is constant and either LAA(G) is 3293 // available or the negative value is in the range of A(G)FHI. 3294 int64_t Value = (-Op2->getAPIntValue()).getSExtValue(); 3295 if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1()) 3296 NegSrc2 = DAG.getConstant(Value, DL, MemVT); 3297 } else if (Subtarget.hasInterlockedAccess1()) 3298 // Use LAA(G) if available. 3299 NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), 3300 Src2); 3301 3302 if (NegSrc2.getNode()) 3303 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT, 3304 Node->getChain(), Node->getBasePtr(), NegSrc2, 3305 Node->getMemOperand(), Node->getOrdering(), 3306 Node->getSynchScope()); 3307 3308 // Use the node as-is. 3309 return Op; 3310 } 3311 3312 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); 3313 } 3314 3315 // Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation. Lower the first two 3316 // into a fullword ATOMIC_CMP_SWAPW operation. 3317 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, 3318 SelectionDAG &DAG) const { 3319 auto *Node = cast<AtomicSDNode>(Op.getNode()); 3320 3321 // We have native support for 32-bit compare and swap. 3322 EVT NarrowVT = Node->getMemoryVT(); 3323 EVT WideVT = MVT::i32; 3324 if (NarrowVT == WideVT) 3325 return Op; 3326 3327 int64_t BitSize = NarrowVT.getSizeInBits(); 3328 SDValue ChainIn = Node->getOperand(0); 3329 SDValue Addr = Node->getOperand(1); 3330 SDValue CmpVal = Node->getOperand(2); 3331 SDValue SwapVal = Node->getOperand(3); 3332 MachineMemOperand *MMO = Node->getMemOperand(); 3333 SDLoc DL(Node); 3334 EVT PtrVT = Addr.getValueType(); 3335 3336 // Get the address of the containing word. 3337 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, 3338 DAG.getConstant(-4, DL, PtrVT)); 3339 3340 // Get the number of bits that the word must be rotated left in order 3341 // to bring the field to the top bits of a GR32. 3342 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, 3343 DAG.getConstant(3, DL, PtrVT)); 3344 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); 3345 3346 // Get the complementing shift amount, for rotating a field in the top 3347 // bits back to its proper position. 3348 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, 3349 DAG.getConstant(0, DL, WideVT), BitShift); 3350 3351 // Construct the ATOMIC_CMP_SWAPW node. 3352 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); 3353 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, 3354 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; 3355 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, 3356 VTList, Ops, NarrowVT, MMO); 3357 return AtomicOp; 3358 } 3359 3360 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, 3361 SelectionDAG &DAG) const { 3362 MachineFunction &MF = DAG.getMachineFunction(); 3363 MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); 3364 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op), 3365 SystemZ::R15D, Op.getValueType()); 3366 } 3367 3368 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, 3369 SelectionDAG &DAG) const { 3370 MachineFunction &MF = DAG.getMachineFunction(); 3371 MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); 3372 bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain"); 3373 3374 SDValue Chain = Op.getOperand(0); 3375 SDValue NewSP = Op.getOperand(1); 3376 SDValue Backchain; 3377 SDLoc DL(Op); 3378 3379 if (StoreBackchain) { 3380 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64); 3381 Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo(), 3382 false, false, false, 0); 3383 } 3384 3385 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP); 3386 3387 if (StoreBackchain) 3388 Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo(), 3389 false, false, 0); 3390 3391 return Chain; 3392 } 3393 3394 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, 3395 SelectionDAG &DAG) const { 3396 bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 3397 if (!IsData) 3398 // Just preserve the chain. 3399 return Op.getOperand(0); 3400 3401 SDLoc DL(Op); 3402 bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); 3403 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; 3404 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode()); 3405 SDValue Ops[] = { 3406 Op.getOperand(0), 3407 DAG.getConstant(Code, DL, MVT::i32), 3408 Op.getOperand(1) 3409 }; 3410 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, 3411 Node->getVTList(), Ops, 3412 Node->getMemoryVT(), Node->getMemOperand()); 3413 } 3414 3415 // Return an i32 that contains the value of CC immediately after After, 3416 // whose final operand must be MVT::Glue. 3417 static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) { 3418 SDLoc DL(After); 3419 SDValue Glue = SDValue(After, After->getNumValues() - 1); 3420 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); 3421 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, 3422 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); 3423 } 3424 3425 SDValue 3426 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 3427 SelectionDAG &DAG) const { 3428 unsigned Opcode, CCValid; 3429 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { 3430 assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); 3431 SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode); 3432 SDValue CC = getCCResult(DAG, Glued.getNode()); 3433 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); 3434 return SDValue(); 3435 } 3436 3437 return SDValue(); 3438 } 3439 3440 SDValue 3441 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 3442 SelectionDAG &DAG) const { 3443 unsigned Opcode, CCValid; 3444 if (isIntrinsicWithCC(Op, Opcode, CCValid)) { 3445 SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode); 3446 SDValue CC = getCCResult(DAG, Glued.getNode()); 3447 if (Op->getNumValues() == 1) 3448 return CC; 3449 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result"); 3450 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), Glued, 3451 CC); 3452 } 3453 3454 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3455 switch (Id) { 3456 case Intrinsic::thread_pointer: 3457 return lowerThreadPointer(SDLoc(Op), DAG); 3458 3459 case Intrinsic::s390_vpdi: 3460 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(), 3461 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 3462 3463 case Intrinsic::s390_vperm: 3464 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(), 3465 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 3466 3467 case Intrinsic::s390_vuphb: 3468 case Intrinsic::s390_vuphh: 3469 case Intrinsic::s390_vuphf: 3470 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(), 3471 Op.getOperand(1)); 3472 3473 case Intrinsic::s390_vuplhb: 3474 case Intrinsic::s390_vuplhh: 3475 case Intrinsic::s390_vuplhf: 3476 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(), 3477 Op.getOperand(1)); 3478 3479 case Intrinsic::s390_vuplb: 3480 case Intrinsic::s390_vuplhw: 3481 case Intrinsic::s390_vuplf: 3482 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(), 3483 Op.getOperand(1)); 3484 3485 case Intrinsic::s390_vupllb: 3486 case Intrinsic::s390_vupllh: 3487 case Intrinsic::s390_vupllf: 3488 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(), 3489 Op.getOperand(1)); 3490 3491 case Intrinsic::s390_vsumb: 3492 case Intrinsic::s390_vsumh: 3493 case Intrinsic::s390_vsumgh: 3494 case Intrinsic::s390_vsumgf: 3495 case Intrinsic::s390_vsumqf: 3496 case Intrinsic::s390_vsumqg: 3497 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(), 3498 Op.getOperand(1), Op.getOperand(2)); 3499 } 3500 3501 return SDValue(); 3502 } 3503 3504 namespace { 3505 // Says that SystemZISD operation Opcode can be used to perform the equivalent 3506 // of a VPERM with permute vector Bytes. If Opcode takes three operands, 3507 // Operand is the constant third operand, otherwise it is the number of 3508 // bytes in each element of the result. 3509 struct Permute { 3510 unsigned Opcode; 3511 unsigned Operand; 3512 unsigned char Bytes[SystemZ::VectorBytes]; 3513 }; 3514 } 3515 3516 static const Permute PermuteForms[] = { 3517 // VMRHG 3518 { SystemZISD::MERGE_HIGH, 8, 3519 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } }, 3520 // VMRHF 3521 { SystemZISD::MERGE_HIGH, 4, 3522 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, 3523 // VMRHH 3524 { SystemZISD::MERGE_HIGH, 2, 3525 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, 3526 // VMRHB 3527 { SystemZISD::MERGE_HIGH, 1, 3528 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, 3529 // VMRLG 3530 { SystemZISD::MERGE_LOW, 8, 3531 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } }, 3532 // VMRLF 3533 { SystemZISD::MERGE_LOW, 4, 3534 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, 3535 // VMRLH 3536 { SystemZISD::MERGE_LOW, 2, 3537 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, 3538 // VMRLB 3539 { SystemZISD::MERGE_LOW, 1, 3540 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, 3541 // VPKG 3542 { SystemZISD::PACK, 4, 3543 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } }, 3544 // VPKF 3545 { SystemZISD::PACK, 2, 3546 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, 3547 // VPKH 3548 { SystemZISD::PACK, 1, 3549 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, 3550 // VPDI V1, V2, 4 (low half of V1, high half of V2) 3551 { SystemZISD::PERMUTE_DWORDS, 4, 3552 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } }, 3553 // VPDI V1, V2, 1 (high half of V1, low half of V2) 3554 { SystemZISD::PERMUTE_DWORDS, 1, 3555 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } } 3556 }; 3557 3558 // Called after matching a vector shuffle against a particular pattern. 3559 // Both the original shuffle and the pattern have two vector operands. 3560 // OpNos[0] is the operand of the original shuffle that should be used for 3561 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything. 3562 // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and 3563 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used 3564 // for operands 0 and 1 of the pattern. 3565 static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) { 3566 if (OpNos[0] < 0) { 3567 if (OpNos[1] < 0) 3568 return false; 3569 OpNo0 = OpNo1 = OpNos[1]; 3570 } else if (OpNos[1] < 0) { 3571 OpNo0 = OpNo1 = OpNos[0]; 3572 } else { 3573 OpNo0 = OpNos[0]; 3574 OpNo1 = OpNos[1]; 3575 } 3576 return true; 3577 } 3578 3579 // Bytes is a VPERM-like permute vector, except that -1 is used for 3580 // undefined bytes. Return true if the VPERM can be implemented using P. 3581 // When returning true set OpNo0 to the VPERM operand that should be 3582 // used for operand 0 of P and likewise OpNo1 for operand 1 of P. 3583 // 3584 // For example, if swapping the VPERM operands allows P to match, OpNo0 3585 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one 3586 // operand, but rewriting it to use two duplicated operands allows it to 3587 // match P, then OpNo0 and OpNo1 will be the same. 3588 static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P, 3589 unsigned &OpNo0, unsigned &OpNo1) { 3590 int OpNos[] = { -1, -1 }; 3591 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { 3592 int Elt = Bytes[I]; 3593 if (Elt >= 0) { 3594 // Make sure that the two permute vectors use the same suboperand 3595 // byte number. Only the operand numbers (the high bits) are 3596 // allowed to differ. 3597 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1)) 3598 return false; 3599 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes; 3600 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes; 3601 // Make sure that the operand mappings are consistent with previous 3602 // elements. 3603 if (OpNos[ModelOpNo] == 1 - RealOpNo) 3604 return false; 3605 OpNos[ModelOpNo] = RealOpNo; 3606 } 3607 } 3608 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); 3609 } 3610 3611 // As above, but search for a matching permute. 3612 static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes, 3613 unsigned &OpNo0, unsigned &OpNo1) { 3614 for (auto &P : PermuteForms) 3615 if (matchPermute(Bytes, P, OpNo0, OpNo1)) 3616 return &P; 3617 return nullptr; 3618 } 3619 3620 // Bytes is a VPERM-like permute vector, except that -1 is used for 3621 // undefined bytes. This permute is an operand of an outer permute. 3622 // See whether redistributing the -1 bytes gives a shuffle that can be 3623 // implemented using P. If so, set Transform to a VPERM-like permute vector 3624 // that, when applied to the result of P, gives the original permute in Bytes. 3625 static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes, 3626 const Permute &P, 3627 SmallVectorImpl<int> &Transform) { 3628 unsigned To = 0; 3629 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) { 3630 int Elt = Bytes[From]; 3631 if (Elt < 0) 3632 // Byte number From of the result is undefined. 3633 Transform[From] = -1; 3634 else { 3635 while (P.Bytes[To] != Elt) { 3636 To += 1; 3637 if (To == SystemZ::VectorBytes) 3638 return false; 3639 } 3640 Transform[From] = To; 3641 } 3642 } 3643 return true; 3644 } 3645 3646 // As above, but search for a matching permute. 3647 static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes, 3648 SmallVectorImpl<int> &Transform) { 3649 for (auto &P : PermuteForms) 3650 if (matchDoublePermute(Bytes, P, Transform)) 3651 return &P; 3652 return nullptr; 3653 } 3654 3655 // Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask, 3656 // as if it had type vNi8. 3657 static void getVPermMask(ShuffleVectorSDNode *VSN, 3658 SmallVectorImpl<int> &Bytes) { 3659 EVT VT = VSN->getValueType(0); 3660 unsigned NumElements = VT.getVectorNumElements(); 3661 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); 3662 Bytes.resize(NumElements * BytesPerElement, -1); 3663 for (unsigned I = 0; I < NumElements; ++I) { 3664 int Index = VSN->getMaskElt(I); 3665 if (Index >= 0) 3666 for (unsigned J = 0; J < BytesPerElement; ++J) 3667 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; 3668 } 3669 } 3670 3671 // Bytes is a VPERM-like permute vector, except that -1 is used for 3672 // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of 3673 // the result come from a contiguous sequence of bytes from one input. 3674 // Set Base to the selector for the first byte if so. 3675 static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start, 3676 unsigned BytesPerElement, int &Base) { 3677 Base = -1; 3678 for (unsigned I = 0; I < BytesPerElement; ++I) { 3679 if (Bytes[Start + I] >= 0) { 3680 unsigned Elem = Bytes[Start + I]; 3681 if (Base < 0) { 3682 Base = Elem - I; 3683 // Make sure the bytes would come from one input operand. 3684 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size()) 3685 return false; 3686 } else if (unsigned(Base) != Elem - I) 3687 return false; 3688 } 3689 } 3690 return true; 3691 } 3692 3693 // Bytes is a VPERM-like permute vector, except that -1 is used for 3694 // undefined bytes. Return true if it can be performed using VSLDI. 3695 // When returning true, set StartIndex to the shift amount and OpNo0 3696 // and OpNo1 to the VPERM operands that should be used as the first 3697 // and second shift operand respectively. 3698 static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes, 3699 unsigned &StartIndex, unsigned &OpNo0, 3700 unsigned &OpNo1) { 3701 int OpNos[] = { -1, -1 }; 3702 int Shift = -1; 3703 for (unsigned I = 0; I < 16; ++I) { 3704 int Index = Bytes[I]; 3705 if (Index >= 0) { 3706 int ExpectedShift = (Index - I) % SystemZ::VectorBytes; 3707 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes; 3708 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes; 3709 if (Shift < 0) 3710 Shift = ExpectedShift; 3711 else if (Shift != ExpectedShift) 3712 return false; 3713 // Make sure that the operand mappings are consistent with previous 3714 // elements. 3715 if (OpNos[ModelOpNo] == 1 - RealOpNo) 3716 return false; 3717 OpNos[ModelOpNo] = RealOpNo; 3718 } 3719 } 3720 StartIndex = Shift; 3721 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); 3722 } 3723 3724 // Create a node that performs P on operands Op0 and Op1, casting the 3725 // operands to the appropriate type. The type of the result is determined by P. 3726 static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, 3727 const Permute &P, SDValue Op0, SDValue Op1) { 3728 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input 3729 // elements of a PACK are twice as wide as the outputs. 3730 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 : 3731 P.Opcode == SystemZISD::PACK ? P.Operand * 2 : 3732 P.Operand); 3733 // Cast both operands to the appropriate type. 3734 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8), 3735 SystemZ::VectorBytes / InBytes); 3736 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0); 3737 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1); 3738 SDValue Op; 3739 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { 3740 SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32); 3741 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2); 3742 } else if (P.Opcode == SystemZISD::PACK) { 3743 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8), 3744 SystemZ::VectorBytes / P.Operand); 3745 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1); 3746 } else { 3747 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1); 3748 } 3749 return Op; 3750 } 3751 3752 // Bytes is a VPERM-like permute vector, except that -1 is used for 3753 // undefined bytes. Implement it on operands Ops[0] and Ops[1] using 3754 // VSLDI or VPERM. 3755 static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, 3756 SDValue *Ops, 3757 const SmallVectorImpl<int> &Bytes) { 3758 for (unsigned I = 0; I < 2; ++I) 3759 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]); 3760 3761 // First see whether VSLDI can be used. 3762 unsigned StartIndex, OpNo0, OpNo1; 3763 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) 3764 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], 3765 Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32)); 3766 3767 // Fall back on VPERM. Construct an SDNode for the permute vector. 3768 SDValue IndexNodes[SystemZ::VectorBytes]; 3769 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) 3770 if (Bytes[I] >= 0) 3771 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32); 3772 else 3773 IndexNodes[I] = DAG.getUNDEF(MVT::i32); 3774 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); 3775 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2); 3776 } 3777 3778 namespace { 3779 // Describes a general N-operand vector shuffle. 3780 struct GeneralShuffle { 3781 GeneralShuffle(EVT vt) : VT(vt) {} 3782 void addUndef(); 3783 void add(SDValue, unsigned); 3784 SDValue getNode(SelectionDAG &, const SDLoc &); 3785 3786 // The operands of the shuffle. 3787 SmallVector<SDValue, SystemZ::VectorBytes> Ops; 3788 3789 // Index I is -1 if byte I of the result is undefined. Otherwise the 3790 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand 3791 // Bytes[I] / SystemZ::VectorBytes. 3792 SmallVector<int, SystemZ::VectorBytes> Bytes; 3793 3794 // The type of the shuffle result. 3795 EVT VT; 3796 }; 3797 } 3798 3799 // Add an extra undefined element to the shuffle. 3800 void GeneralShuffle::addUndef() { 3801 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); 3802 for (unsigned I = 0; I < BytesPerElement; ++I) 3803 Bytes.push_back(-1); 3804 } 3805 3806 // Add an extra element to the shuffle, taking it from element Elem of Op. 3807 // A null Op indicates a vector input whose value will be calculated later; 3808 // there is at most one such input per shuffle and it always has the same 3809 // type as the result. 3810 void GeneralShuffle::add(SDValue Op, unsigned Elem) { 3811 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); 3812 3813 // The source vector can have wider elements than the result, 3814 // either through an explicit TRUNCATE or because of type legalization. 3815 // We want the least significant part. 3816 EVT FromVT = Op.getNode() ? Op.getValueType() : VT; 3817 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize(); 3818 assert(FromBytesPerElement >= BytesPerElement && 3819 "Invalid EXTRACT_VECTOR_ELT"); 3820 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes + 3821 (FromBytesPerElement - BytesPerElement)); 3822 3823 // Look through things like shuffles and bitcasts. 3824 while (Op.getNode()) { 3825 if (Op.getOpcode() == ISD::BITCAST) 3826 Op = Op.getOperand(0); 3827 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) { 3828 // See whether the bytes we need come from a contiguous part of one 3829 // operand. 3830 SmallVector<int, SystemZ::VectorBytes> OpBytes; 3831 getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes); 3832 int NewByte; 3833 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte)) 3834 break; 3835 if (NewByte < 0) { 3836 addUndef(); 3837 return; 3838 } 3839 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes); 3840 Byte = unsigned(NewByte) % SystemZ::VectorBytes; 3841 } else if (Op.isUndef()) { 3842 addUndef(); 3843 return; 3844 } else 3845 break; 3846 } 3847 3848 // Make sure that the source of the extraction is in Ops. 3849 unsigned OpNo = 0; 3850 for (; OpNo < Ops.size(); ++OpNo) 3851 if (Ops[OpNo] == Op) 3852 break; 3853 if (OpNo == Ops.size()) 3854 Ops.push_back(Op); 3855 3856 // Add the element to Bytes. 3857 unsigned Base = OpNo * SystemZ::VectorBytes + Byte; 3858 for (unsigned I = 0; I < BytesPerElement; ++I) 3859 Bytes.push_back(Base + I); 3860 } 3861 3862 // Return SDNodes for the completed shuffle. 3863 SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) { 3864 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector"); 3865 3866 if (Ops.size() == 0) 3867 return DAG.getUNDEF(VT); 3868 3869 // Make sure that there are at least two shuffle operands. 3870 if (Ops.size() == 1) 3871 Ops.push_back(DAG.getUNDEF(MVT::v16i8)); 3872 3873 // Create a tree of shuffles, deferring root node until after the loop. 3874 // Try to redistribute the undefined elements of non-root nodes so that 3875 // the non-root shuffles match something like a pack or merge, then adjust 3876 // the parent node's permute vector to compensate for the new order. 3877 // Among other things, this copes with vectors like <2 x i16> that were 3878 // padded with undefined elements during type legalization. 3879 // 3880 // In the best case this redistribution will lead to the whole tree 3881 // using packs and merges. It should rarely be a loss in other cases. 3882 unsigned Stride = 1; 3883 for (; Stride * 2 < Ops.size(); Stride *= 2) { 3884 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) { 3885 SDValue SubOps[] = { Ops[I], Ops[I + Stride] }; 3886 3887 // Create a mask for just these two operands. 3888 SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes); 3889 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { 3890 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes; 3891 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes; 3892 if (OpNo == I) 3893 NewBytes[J] = Byte; 3894 else if (OpNo == I + Stride) 3895 NewBytes[J] = SystemZ::VectorBytes + Byte; 3896 else 3897 NewBytes[J] = -1; 3898 } 3899 // See if it would be better to reorganize NewMask to avoid using VPERM. 3900 SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes); 3901 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) { 3902 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]); 3903 // Applying NewBytesMap to Ops[I] gets back to NewBytes. 3904 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { 3905 if (NewBytes[J] >= 0) { 3906 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && 3907 "Invalid double permute"); 3908 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J]; 3909 } else 3910 assert(NewBytesMap[J] < 0 && "Invalid double permute"); 3911 } 3912 } else { 3913 // Just use NewBytes on the operands. 3914 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes); 3915 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) 3916 if (NewBytes[J] >= 0) 3917 Bytes[J] = I * SystemZ::VectorBytes + J; 3918 } 3919 } 3920 } 3921 3922 // Now we just have 2 inputs. Put the second operand in Ops[1]. 3923 if (Stride > 1) { 3924 Ops[1] = Ops[Stride]; 3925 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) 3926 if (Bytes[I] >= int(SystemZ::VectorBytes)) 3927 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes; 3928 } 3929 3930 // Look for an instruction that can do the permute without resorting 3931 // to VPERM. 3932 unsigned OpNo0, OpNo1; 3933 SDValue Op; 3934 if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1)) 3935 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]); 3936 else 3937 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes); 3938 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 3939 } 3940 3941 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. 3942 static bool isScalarToVector(SDValue Op) { 3943 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) 3944 if (!Op.getOperand(I).isUndef()) 3945 return false; 3946 return true; 3947 } 3948 3949 // Return a vector of type VT that contains Value in the first element. 3950 // The other elements don't matter. 3951 static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 3952 SDValue Value) { 3953 // If we have a constant, replicate it to all elements and let the 3954 // BUILD_VECTOR lowering take care of it. 3955 if (Value.getOpcode() == ISD::Constant || 3956 Value.getOpcode() == ISD::ConstantFP) { 3957 SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value); 3958 return DAG.getBuildVector(VT, DL, Ops); 3959 } 3960 if (Value.isUndef()) 3961 return DAG.getUNDEF(VT); 3962 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); 3963 } 3964 3965 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in 3966 // element 1. Used for cases in which replication is cheap. 3967 static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 3968 SDValue Op0, SDValue Op1) { 3969 if (Op0.isUndef()) { 3970 if (Op1.isUndef()) 3971 return DAG.getUNDEF(VT); 3972 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1); 3973 } 3974 if (Op1.isUndef()) 3975 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0); 3976 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT, 3977 buildScalarToVector(DAG, DL, VT, Op0), 3978 buildScalarToVector(DAG, DL, VT, Op1)); 3979 } 3980 3981 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64 3982 // vector for them. 3983 static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, 3984 SDValue Op1) { 3985 if (Op0.isUndef() && Op1.isUndef()) 3986 return DAG.getUNDEF(MVT::v2i64); 3987 // If one of the two inputs is undefined then replicate the other one, 3988 // in order to avoid using another register unnecessarily. 3989 if (Op0.isUndef()) 3990 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); 3991 else if (Op1.isUndef()) 3992 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 3993 else { 3994 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 3995 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); 3996 } 3997 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); 3998 } 3999 4000 // Try to represent constant BUILD_VECTOR node BVN using a 4001 // SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask 4002 // on success. 4003 static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { 4004 EVT ElemVT = BVN->getValueType(0).getVectorElementType(); 4005 unsigned BytesPerElement = ElemVT.getStoreSize(); 4006 for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) { 4007 SDValue Op = BVN->getOperand(I); 4008 if (!Op.isUndef()) { 4009 uint64_t Value; 4010 if (Op.getOpcode() == ISD::Constant) 4011 Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue(); 4012 else if (Op.getOpcode() == ISD::ConstantFP) 4013 Value = (dyn_cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt() 4014 .getZExtValue()); 4015 else 4016 return false; 4017 for (unsigned J = 0; J < BytesPerElement; ++J) { 4018 uint64_t Byte = (Value >> (J * 8)) & 0xff; 4019 if (Byte == 0xff) 4020 Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J); 4021 else if (Byte != 0) 4022 return false; 4023 } 4024 } 4025 } 4026 return true; 4027 } 4028 4029 // Try to load a vector constant in which BitsPerElement-bit value Value 4030 // is replicated to fill the vector. VT is the type of the resulting 4031 // constant, which may have elements of a different size from BitsPerElement. 4032 // Return the SDValue of the constant on success, otherwise return 4033 // an empty value. 4034 static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, 4035 const SystemZInstrInfo *TII, 4036 const SDLoc &DL, EVT VT, uint64_t Value, 4037 unsigned BitsPerElement) { 4038 // Signed 16-bit values can be replicated using VREPI. 4039 int64_t SignedValue = SignExtend64(Value, BitsPerElement); 4040 if (isInt<16>(SignedValue)) { 4041 MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), 4042 SystemZ::VectorBits / BitsPerElement); 4043 SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT, 4044 DAG.getConstant(SignedValue, DL, MVT::i32)); 4045 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 4046 } 4047 // See whether rotating the constant left some N places gives a value that 4048 // is one less than a power of 2 (i.e. all zeros followed by all ones). 4049 // If so we can use VGM. 4050 unsigned Start, End; 4051 if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) { 4052 // isRxSBGMask returns the bit numbers for a full 64-bit value, 4053 // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to 4054 // bit numbers for an BitsPerElement value, so that 0 denotes 4055 // 1 << (BitsPerElement-1). 4056 Start -= 64 - BitsPerElement; 4057 End -= 64 - BitsPerElement; 4058 MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), 4059 SystemZ::VectorBits / BitsPerElement); 4060 SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT, 4061 DAG.getConstant(Start, DL, MVT::i32), 4062 DAG.getConstant(End, DL, MVT::i32)); 4063 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 4064 } 4065 return SDValue(); 4066 } 4067 4068 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually 4069 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for 4070 // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR 4071 // would benefit from this representation and return it if so. 4072 static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, 4073 BuildVectorSDNode *BVN) { 4074 EVT VT = BVN->getValueType(0); 4075 unsigned NumElements = VT.getVectorNumElements(); 4076 4077 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation 4078 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still 4079 // need a BUILD_VECTOR, add an additional placeholder operand for that 4080 // BUILD_VECTOR and store its operands in ResidueOps. 4081 GeneralShuffle GS(VT); 4082 SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps; 4083 bool FoundOne = false; 4084 for (unsigned I = 0; I < NumElements; ++I) { 4085 SDValue Op = BVN->getOperand(I); 4086 if (Op.getOpcode() == ISD::TRUNCATE) 4087 Op = Op.getOperand(0); 4088 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 4089 Op.getOperand(1).getOpcode() == ISD::Constant) { 4090 unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 4091 GS.add(Op.getOperand(0), Elem); 4092 FoundOne = true; 4093 } else if (Op.isUndef()) { 4094 GS.addUndef(); 4095 } else { 4096 GS.add(SDValue(), ResidueOps.size()); 4097 ResidueOps.push_back(BVN->getOperand(I)); 4098 } 4099 } 4100 4101 // Nothing to do if there are no EXTRACT_VECTOR_ELTs. 4102 if (!FoundOne) 4103 return SDValue(); 4104 4105 // Create the BUILD_VECTOR for the remaining elements, if any. 4106 if (!ResidueOps.empty()) { 4107 while (ResidueOps.size() < NumElements) 4108 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType())); 4109 for (auto &Op : GS.Ops) { 4110 if (!Op.getNode()) { 4111 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps); 4112 break; 4113 } 4114 } 4115 } 4116 return GS.getNode(DAG, SDLoc(BVN)); 4117 } 4118 4119 // Combine GPR scalar values Elems into a vector of type VT. 4120 static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 4121 SmallVectorImpl<SDValue> &Elems) { 4122 // See whether there is a single replicated value. 4123 SDValue Single; 4124 unsigned int NumElements = Elems.size(); 4125 unsigned int Count = 0; 4126 for (auto Elem : Elems) { 4127 if (!Elem.isUndef()) { 4128 if (!Single.getNode()) 4129 Single = Elem; 4130 else if (Elem != Single) { 4131 Single = SDValue(); 4132 break; 4133 } 4134 Count += 1; 4135 } 4136 } 4137 // There are three cases here: 4138 // 4139 // - if the only defined element is a loaded one, the best sequence 4140 // is a replicating load. 4141 // 4142 // - otherwise, if the only defined element is an i64 value, we will 4143 // end up with the same VLVGP sequence regardless of whether we short-cut 4144 // for replication or fall through to the later code. 4145 // 4146 // - otherwise, if the only defined element is an i32 or smaller value, 4147 // we would need 2 instructions to replicate it: VLVGP followed by VREPx. 4148 // This is only a win if the single defined element is used more than once. 4149 // In other cases we're better off using a single VLVGx. 4150 if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD)) 4151 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single); 4152 4153 // The best way of building a v2i64 from two i64s is to use VLVGP. 4154 if (VT == MVT::v2i64) 4155 return joinDwords(DAG, DL, Elems[0], Elems[1]); 4156 4157 // Use a 64-bit merge high to combine two doubles. 4158 if (VT == MVT::v2f64) 4159 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); 4160 4161 // Build v4f32 values directly from the FPRs: 4162 // 4163 // <Axxx> <Bxxx> <Cxxxx> <Dxxx> 4164 // V V VMRHF 4165 // <ABxx> <CDxx> 4166 // V VMRHG 4167 // <ABCD> 4168 if (VT == MVT::v4f32) { 4169 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); 4170 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]); 4171 // Avoid unnecessary undefs by reusing the other operand. 4172 if (Op01.isUndef()) 4173 Op01 = Op23; 4174 else if (Op23.isUndef()) 4175 Op23 = Op01; 4176 // Merging identical replications is a no-op. 4177 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23) 4178 return Op01; 4179 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01); 4180 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23); 4181 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, 4182 DL, MVT::v2i64, Op01, Op23); 4183 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 4184 } 4185 4186 // Collect the constant terms. 4187 SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue()); 4188 SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false); 4189 4190 unsigned NumConstants = 0; 4191 for (unsigned I = 0; I < NumElements; ++I) { 4192 SDValue Elem = Elems[I]; 4193 if (Elem.getOpcode() == ISD::Constant || 4194 Elem.getOpcode() == ISD::ConstantFP) { 4195 NumConstants += 1; 4196 Constants[I] = Elem; 4197 Done[I] = true; 4198 } 4199 } 4200 // If there was at least one constant, fill in the other elements of 4201 // Constants with undefs to get a full vector constant and use that 4202 // as the starting point. 4203 SDValue Result; 4204 if (NumConstants > 0) { 4205 for (unsigned I = 0; I < NumElements; ++I) 4206 if (!Constants[I].getNode()) 4207 Constants[I] = DAG.getUNDEF(Elems[I].getValueType()); 4208 Result = DAG.getBuildVector(VT, DL, Constants); 4209 } else { 4210 // Otherwise try to use VLVGP to start the sequence in order to 4211 // avoid a false dependency on any previous contents of the vector 4212 // register. This only makes sense if one of the associated elements 4213 // is defined. 4214 unsigned I1 = NumElements / 2 - 1; 4215 unsigned I2 = NumElements - 1; 4216 bool Def1 = !Elems[I1].isUndef(); 4217 bool Def2 = !Elems[I2].isUndef(); 4218 if (Def1 || Def2) { 4219 SDValue Elem1 = Elems[Def1 ? I1 : I2]; 4220 SDValue Elem2 = Elems[Def2 ? I2 : I1]; 4221 Result = DAG.getNode(ISD::BITCAST, DL, VT, 4222 joinDwords(DAG, DL, Elem1, Elem2)); 4223 Done[I1] = true; 4224 Done[I2] = true; 4225 } else 4226 Result = DAG.getUNDEF(VT); 4227 } 4228 4229 // Use VLVGx to insert the other elements. 4230 for (unsigned I = 0; I < NumElements; ++I) 4231 if (!Done[I] && !Elems[I].isUndef()) 4232 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I], 4233 DAG.getConstant(I, DL, MVT::i32)); 4234 return Result; 4235 } 4236 4237 SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, 4238 SelectionDAG &DAG) const { 4239 const SystemZInstrInfo *TII = 4240 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 4241 auto *BVN = cast<BuildVectorSDNode>(Op.getNode()); 4242 SDLoc DL(Op); 4243 EVT VT = Op.getValueType(); 4244 4245 if (BVN->isConstant()) { 4246 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- 4247 // preferred way of creating all-zero and all-one vectors so give it 4248 // priority over other methods below. 4249 uint64_t Mask = 0; 4250 if (tryBuildVectorByteMask(BVN, Mask)) { 4251 SDValue Op = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, 4252 DAG.getConstant(Mask, DL, MVT::i32)); 4253 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 4254 } 4255 4256 // Try using some form of replication. 4257 APInt SplatBits, SplatUndef; 4258 unsigned SplatBitSize; 4259 bool HasAnyUndefs; 4260 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 4261 8, true) && 4262 SplatBitSize <= 64) { 4263 // First try assuming that any undefined bits above the highest set bit 4264 // and below the lowest set bit are 1s. This increases the likelihood of 4265 // being able to use a sign-extended element value in VECTOR REPLICATE 4266 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. 4267 uint64_t SplatBitsZ = SplatBits.getZExtValue(); 4268 uint64_t SplatUndefZ = SplatUndef.getZExtValue(); 4269 uint64_t Lower = (SplatUndefZ 4270 & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1)); 4271 uint64_t Upper = (SplatUndefZ 4272 & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1)); 4273 uint64_t Value = SplatBitsZ | Upper | Lower; 4274 SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, 4275 SplatBitSize); 4276 if (Op.getNode()) 4277 return Op; 4278 4279 // Now try assuming that any undefined bits between the first and 4280 // last defined set bits are set. This increases the chances of 4281 // using a non-wraparound mask. 4282 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; 4283 Value = SplatBitsZ | Middle; 4284 Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize); 4285 if (Op.getNode()) 4286 return Op; 4287 } 4288 4289 // Fall back to loading it from memory. 4290 return SDValue(); 4291 } 4292 4293 // See if we should use shuffles to construct the vector from other vectors. 4294 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN)) 4295 return Res; 4296 4297 // Detect SCALAR_TO_VECTOR conversions. 4298 if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op)) 4299 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0)); 4300 4301 // Otherwise use buildVector to build the vector up from GPRs. 4302 unsigned NumElements = Op.getNumOperands(); 4303 SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements); 4304 for (unsigned I = 0; I < NumElements; ++I) 4305 Ops[I] = Op.getOperand(I); 4306 return buildVector(DAG, DL, VT, Ops); 4307 } 4308 4309 SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 4310 SelectionDAG &DAG) const { 4311 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode()); 4312 SDLoc DL(Op); 4313 EVT VT = Op.getValueType(); 4314 unsigned NumElements = VT.getVectorNumElements(); 4315 4316 if (VSN->isSplat()) { 4317 SDValue Op0 = Op.getOperand(0); 4318 unsigned Index = VSN->getSplatIndex(); 4319 assert(Index < VT.getVectorNumElements() && 4320 "Splat index should be defined and in first operand"); 4321 // See whether the value we're splatting is directly available as a scalar. 4322 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) || 4323 Op0.getOpcode() == ISD::BUILD_VECTOR) 4324 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index)); 4325 // Otherwise keep it as a vector-to-vector operation. 4326 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0), 4327 DAG.getConstant(Index, DL, MVT::i32)); 4328 } 4329 4330 GeneralShuffle GS(VT); 4331 for (unsigned I = 0; I < NumElements; ++I) { 4332 int Elt = VSN->getMaskElt(I); 4333 if (Elt < 0) 4334 GS.addUndef(); 4335 else 4336 GS.add(Op.getOperand(unsigned(Elt) / NumElements), 4337 unsigned(Elt) % NumElements); 4338 } 4339 return GS.getNode(DAG, SDLoc(VSN)); 4340 } 4341 4342 SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, 4343 SelectionDAG &DAG) const { 4344 SDLoc DL(Op); 4345 // Just insert the scalar into element 0 of an undefined vector. 4346 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 4347 Op.getValueType(), DAG.getUNDEF(Op.getValueType()), 4348 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32)); 4349 } 4350 4351 SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 4352 SelectionDAG &DAG) const { 4353 // Handle insertions of floating-point values. 4354 SDLoc DL(Op); 4355 SDValue Op0 = Op.getOperand(0); 4356 SDValue Op1 = Op.getOperand(1); 4357 SDValue Op2 = Op.getOperand(2); 4358 EVT VT = Op.getValueType(); 4359 4360 // Insertions into constant indices of a v2f64 can be done using VPDI. 4361 // However, if the inserted value is a bitcast or a constant then it's 4362 // better to use GPRs, as below. 4363 if (VT == MVT::v2f64 && 4364 Op1.getOpcode() != ISD::BITCAST && 4365 Op1.getOpcode() != ISD::ConstantFP && 4366 Op2.getOpcode() == ISD::Constant) { 4367 uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue(); 4368 unsigned Mask = VT.getVectorNumElements() - 1; 4369 if (Index <= Mask) 4370 return Op; 4371 } 4372 4373 // Otherwise bitcast to the equivalent integer form and insert via a GPR. 4374 MVT IntVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits()); 4375 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements()); 4376 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT, 4377 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), 4378 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2); 4379 return DAG.getNode(ISD::BITCAST, DL, VT, Res); 4380 } 4381 4382 SDValue 4383 SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 4384 SelectionDAG &DAG) const { 4385 // Handle extractions of floating-point values. 4386 SDLoc DL(Op); 4387 SDValue Op0 = Op.getOperand(0); 4388 SDValue Op1 = Op.getOperand(1); 4389 EVT VT = Op.getValueType(); 4390 EVT VecVT = Op0.getValueType(); 4391 4392 // Extractions of constant indices can be done directly. 4393 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) { 4394 uint64_t Index = CIndexN->getZExtValue(); 4395 unsigned Mask = VecVT.getVectorNumElements() - 1; 4396 if (Index <= Mask) 4397 return Op; 4398 } 4399 4400 // Otherwise bitcast to the equivalent integer form and extract via a GPR. 4401 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits()); 4402 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements()); 4403 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT, 4404 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1); 4405 return DAG.getNode(ISD::BITCAST, DL, VT, Res); 4406 } 4407 4408 SDValue 4409 SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, 4410 unsigned UnpackHigh) const { 4411 SDValue PackedOp = Op.getOperand(0); 4412 EVT OutVT = Op.getValueType(); 4413 EVT InVT = PackedOp.getValueType(); 4414 unsigned ToBits = OutVT.getVectorElementType().getSizeInBits(); 4415 unsigned FromBits = InVT.getVectorElementType().getSizeInBits(); 4416 do { 4417 FromBits *= 2; 4418 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), 4419 SystemZ::VectorBits / FromBits); 4420 PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp); 4421 } while (FromBits != ToBits); 4422 return PackedOp; 4423 } 4424 4425 SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, 4426 unsigned ByScalar) const { 4427 // Look for cases where a vector shift can use the *_BY_SCALAR form. 4428 SDValue Op0 = Op.getOperand(0); 4429 SDValue Op1 = Op.getOperand(1); 4430 SDLoc DL(Op); 4431 EVT VT = Op.getValueType(); 4432 unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits(); 4433 4434 // See whether the shift vector is a splat represented as BUILD_VECTOR. 4435 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) { 4436 APInt SplatBits, SplatUndef; 4437 unsigned SplatBitSize; 4438 bool HasAnyUndefs; 4439 // Check for constant splats. Use ElemBitSize as the minimum element 4440 // width and reject splats that need wider elements. 4441 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 4442 ElemBitSize, true) && 4443 SplatBitSize == ElemBitSize) { 4444 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff, 4445 DL, MVT::i32); 4446 return DAG.getNode(ByScalar, DL, VT, Op0, Shift); 4447 } 4448 // Check for variable splats. 4449 BitVector UndefElements; 4450 SDValue Splat = BVN->getSplatValue(&UndefElements); 4451 if (Splat) { 4452 // Since i32 is the smallest legal type, we either need a no-op 4453 // or a truncation. 4454 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat); 4455 return DAG.getNode(ByScalar, DL, VT, Op0, Shift); 4456 } 4457 } 4458 4459 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR, 4460 // and the shift amount is directly available in a GPR. 4461 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) { 4462 if (VSN->isSplat()) { 4463 SDValue VSNOp0 = VSN->getOperand(0); 4464 unsigned Index = VSN->getSplatIndex(); 4465 assert(Index < VT.getVectorNumElements() && 4466 "Splat index should be defined and in first operand"); 4467 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) || 4468 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) { 4469 // Since i32 is the smallest legal type, we either need a no-op 4470 // or a truncation. 4471 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, 4472 VSNOp0.getOperand(Index)); 4473 return DAG.getNode(ByScalar, DL, VT, Op0, Shift); 4474 } 4475 } 4476 } 4477 4478 // Otherwise just treat the current form as legal. 4479 return Op; 4480 } 4481 4482 SDValue SystemZTargetLowering::LowerOperation(SDValue Op, 4483 SelectionDAG &DAG) const { 4484 switch (Op.getOpcode()) { 4485 case ISD::FRAMEADDR: 4486 return lowerFRAMEADDR(Op, DAG); 4487 case ISD::RETURNADDR: 4488 return lowerRETURNADDR(Op, DAG); 4489 case ISD::BR_CC: 4490 return lowerBR_CC(Op, DAG); 4491 case ISD::SELECT_CC: 4492 return lowerSELECT_CC(Op, DAG); 4493 case ISD::SETCC: 4494 return lowerSETCC(Op, DAG); 4495 case ISD::GlobalAddress: 4496 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG); 4497 case ISD::GlobalTLSAddress: 4498 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG); 4499 case ISD::BlockAddress: 4500 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG); 4501 case ISD::JumpTable: 4502 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG); 4503 case ISD::ConstantPool: 4504 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG); 4505 case ISD::BITCAST: 4506 return lowerBITCAST(Op, DAG); 4507 case ISD::VASTART: 4508 return lowerVASTART(Op, DAG); 4509 case ISD::VACOPY: 4510 return lowerVACOPY(Op, DAG); 4511 case ISD::DYNAMIC_STACKALLOC: 4512 return lowerDYNAMIC_STACKALLOC(Op, DAG); 4513 case ISD::GET_DYNAMIC_AREA_OFFSET: 4514 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); 4515 case ISD::SMUL_LOHI: 4516 return lowerSMUL_LOHI(Op, DAG); 4517 case ISD::UMUL_LOHI: 4518 return lowerUMUL_LOHI(Op, DAG); 4519 case ISD::SDIVREM: 4520 return lowerSDIVREM(Op, DAG); 4521 case ISD::UDIVREM: 4522 return lowerUDIVREM(Op, DAG); 4523 case ISD::OR: 4524 return lowerOR(Op, DAG); 4525 case ISD::CTPOP: 4526 return lowerCTPOP(Op, DAG); 4527 case ISD::ATOMIC_FENCE: 4528 return lowerATOMIC_FENCE(Op, DAG); 4529 case ISD::ATOMIC_SWAP: 4530 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); 4531 case ISD::ATOMIC_STORE: 4532 return lowerATOMIC_STORE(Op, DAG); 4533 case ISD::ATOMIC_LOAD: 4534 return lowerATOMIC_LOAD(Op, DAG); 4535 case ISD::ATOMIC_LOAD_ADD: 4536 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); 4537 case ISD::ATOMIC_LOAD_SUB: 4538 return lowerATOMIC_LOAD_SUB(Op, DAG); 4539 case ISD::ATOMIC_LOAD_AND: 4540 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); 4541 case ISD::ATOMIC_LOAD_OR: 4542 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR); 4543 case ISD::ATOMIC_LOAD_XOR: 4544 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR); 4545 case ISD::ATOMIC_LOAD_NAND: 4546 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND); 4547 case ISD::ATOMIC_LOAD_MIN: 4548 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN); 4549 case ISD::ATOMIC_LOAD_MAX: 4550 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX); 4551 case ISD::ATOMIC_LOAD_UMIN: 4552 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); 4553 case ISD::ATOMIC_LOAD_UMAX: 4554 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); 4555 case ISD::ATOMIC_CMP_SWAP: 4556 return lowerATOMIC_CMP_SWAP(Op, DAG); 4557 case ISD::STACKSAVE: 4558 return lowerSTACKSAVE(Op, DAG); 4559 case ISD::STACKRESTORE: 4560 return lowerSTACKRESTORE(Op, DAG); 4561 case ISD::PREFETCH: 4562 return lowerPREFETCH(Op, DAG); 4563 case ISD::INTRINSIC_W_CHAIN: 4564 return lowerINTRINSIC_W_CHAIN(Op, DAG); 4565 case ISD::INTRINSIC_WO_CHAIN: 4566 return lowerINTRINSIC_WO_CHAIN(Op, DAG); 4567 case ISD::BUILD_VECTOR: 4568 return lowerBUILD_VECTOR(Op, DAG); 4569 case ISD::VECTOR_SHUFFLE: 4570 return lowerVECTOR_SHUFFLE(Op, DAG); 4571 case ISD::SCALAR_TO_VECTOR: 4572 return lowerSCALAR_TO_VECTOR(Op, DAG); 4573 case ISD::INSERT_VECTOR_ELT: 4574 return lowerINSERT_VECTOR_ELT(Op, DAG); 4575 case ISD::EXTRACT_VECTOR_ELT: 4576 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 4577 case ISD::SIGN_EXTEND_VECTOR_INREG: 4578 return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH); 4579 case ISD::ZERO_EXTEND_VECTOR_INREG: 4580 return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH); 4581 case ISD::SHL: 4582 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR); 4583 case ISD::SRL: 4584 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR); 4585 case ISD::SRA: 4586 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR); 4587 default: 4588 llvm_unreachable("Unexpected node to lower"); 4589 } 4590 } 4591 4592 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { 4593 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME 4594 switch ((SystemZISD::NodeType)Opcode) { 4595 case SystemZISD::FIRST_NUMBER: break; 4596 OPCODE(RET_FLAG); 4597 OPCODE(CALL); 4598 OPCODE(SIBCALL); 4599 OPCODE(TLS_GDCALL); 4600 OPCODE(TLS_LDCALL); 4601 OPCODE(PCREL_WRAPPER); 4602 OPCODE(PCREL_OFFSET); 4603 OPCODE(IABS); 4604 OPCODE(ICMP); 4605 OPCODE(FCMP); 4606 OPCODE(TM); 4607 OPCODE(BR_CCMASK); 4608 OPCODE(SELECT_CCMASK); 4609 OPCODE(ADJDYNALLOC); 4610 OPCODE(EXTRACT_ACCESS); 4611 OPCODE(POPCNT); 4612 OPCODE(UMUL_LOHI64); 4613 OPCODE(SDIVREM32); 4614 OPCODE(SDIVREM64); 4615 OPCODE(UDIVREM32); 4616 OPCODE(UDIVREM64); 4617 OPCODE(MVC); 4618 OPCODE(MVC_LOOP); 4619 OPCODE(NC); 4620 OPCODE(NC_LOOP); 4621 OPCODE(OC); 4622 OPCODE(OC_LOOP); 4623 OPCODE(XC); 4624 OPCODE(XC_LOOP); 4625 OPCODE(CLC); 4626 OPCODE(CLC_LOOP); 4627 OPCODE(STPCPY); 4628 OPCODE(STRCMP); 4629 OPCODE(SEARCH_STRING); 4630 OPCODE(IPM); 4631 OPCODE(SERIALIZE); 4632 OPCODE(MEMBARRIER); 4633 OPCODE(TBEGIN); 4634 OPCODE(TBEGIN_NOFLOAT); 4635 OPCODE(TEND); 4636 OPCODE(BYTE_MASK); 4637 OPCODE(ROTATE_MASK); 4638 OPCODE(REPLICATE); 4639 OPCODE(JOIN_DWORDS); 4640 OPCODE(SPLAT); 4641 OPCODE(MERGE_HIGH); 4642 OPCODE(MERGE_LOW); 4643 OPCODE(SHL_DOUBLE); 4644 OPCODE(PERMUTE_DWORDS); 4645 OPCODE(PERMUTE); 4646 OPCODE(PACK); 4647 OPCODE(PACKS_CC); 4648 OPCODE(PACKLS_CC); 4649 OPCODE(UNPACK_HIGH); 4650 OPCODE(UNPACKL_HIGH); 4651 OPCODE(UNPACK_LOW); 4652 OPCODE(UNPACKL_LOW); 4653 OPCODE(VSHL_BY_SCALAR); 4654 OPCODE(VSRL_BY_SCALAR); 4655 OPCODE(VSRA_BY_SCALAR); 4656 OPCODE(VSUM); 4657 OPCODE(VICMPE); 4658 OPCODE(VICMPH); 4659 OPCODE(VICMPHL); 4660 OPCODE(VICMPES); 4661 OPCODE(VICMPHS); 4662 OPCODE(VICMPHLS); 4663 OPCODE(VFCMPE); 4664 OPCODE(VFCMPH); 4665 OPCODE(VFCMPHE); 4666 OPCODE(VFCMPES); 4667 OPCODE(VFCMPHS); 4668 OPCODE(VFCMPHES); 4669 OPCODE(VFTCI); 4670 OPCODE(VEXTEND); 4671 OPCODE(VROUND); 4672 OPCODE(VTM); 4673 OPCODE(VFAE_CC); 4674 OPCODE(VFAEZ_CC); 4675 OPCODE(VFEE_CC); 4676 OPCODE(VFEEZ_CC); 4677 OPCODE(VFENE_CC); 4678 OPCODE(VFENEZ_CC); 4679 OPCODE(VISTR_CC); 4680 OPCODE(VSTRC_CC); 4681 OPCODE(VSTRCZ_CC); 4682 OPCODE(TDC); 4683 OPCODE(ATOMIC_SWAPW); 4684 OPCODE(ATOMIC_LOADW_ADD); 4685 OPCODE(ATOMIC_LOADW_SUB); 4686 OPCODE(ATOMIC_LOADW_AND); 4687 OPCODE(ATOMIC_LOADW_OR); 4688 OPCODE(ATOMIC_LOADW_XOR); 4689 OPCODE(ATOMIC_LOADW_NAND); 4690 OPCODE(ATOMIC_LOADW_MIN); 4691 OPCODE(ATOMIC_LOADW_MAX); 4692 OPCODE(ATOMIC_LOADW_UMIN); 4693 OPCODE(ATOMIC_LOADW_UMAX); 4694 OPCODE(ATOMIC_CMP_SWAPW); 4695 OPCODE(LRV); 4696 OPCODE(STRV); 4697 OPCODE(PREFETCH); 4698 } 4699 return nullptr; 4700 #undef OPCODE 4701 } 4702 4703 // Return true if VT is a vector whose elements are a whole number of bytes 4704 // in width. 4705 static bool canTreatAsByteVector(EVT VT) { 4706 return VT.isVector() && VT.getVectorElementType().getSizeInBits() % 8 == 0; 4707 } 4708 4709 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT 4710 // producing a result of type ResVT. Op is a possibly bitcast version 4711 // of the input vector and Index is the index (based on type VecVT) that 4712 // should be extracted. Return the new extraction if a simplification 4713 // was possible or if Force is true. 4714 SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT, 4715 EVT VecVT, SDValue Op, 4716 unsigned Index, 4717 DAGCombinerInfo &DCI, 4718 bool Force) const { 4719 SelectionDAG &DAG = DCI.DAG; 4720 4721 // The number of bytes being extracted. 4722 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); 4723 4724 for (;;) { 4725 unsigned Opcode = Op.getOpcode(); 4726 if (Opcode == ISD::BITCAST) 4727 // Look through bitcasts. 4728 Op = Op.getOperand(0); 4729 else if (Opcode == ISD::VECTOR_SHUFFLE && 4730 canTreatAsByteVector(Op.getValueType())) { 4731 // Get a VPERM-like permute mask and see whether the bytes covered 4732 // by the extracted element are a contiguous sequence from one 4733 // source operand. 4734 SmallVector<int, SystemZ::VectorBytes> Bytes; 4735 getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes); 4736 int First; 4737 if (!getShuffleInput(Bytes, Index * BytesPerElement, 4738 BytesPerElement, First)) 4739 break; 4740 if (First < 0) 4741 return DAG.getUNDEF(ResVT); 4742 // Make sure the contiguous sequence starts at a multiple of the 4743 // original element size. 4744 unsigned Byte = unsigned(First) % Bytes.size(); 4745 if (Byte % BytesPerElement != 0) 4746 break; 4747 // We can get the extracted value directly from an input. 4748 Index = Byte / BytesPerElement; 4749 Op = Op.getOperand(unsigned(First) / Bytes.size()); 4750 Force = true; 4751 } else if (Opcode == ISD::BUILD_VECTOR && 4752 canTreatAsByteVector(Op.getValueType())) { 4753 // We can only optimize this case if the BUILD_VECTOR elements are 4754 // at least as wide as the extracted value. 4755 EVT OpVT = Op.getValueType(); 4756 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); 4757 if (OpBytesPerElement < BytesPerElement) 4758 break; 4759 // Make sure that the least-significant bit of the extracted value 4760 // is the least significant bit of an input. 4761 unsigned End = (Index + 1) * BytesPerElement; 4762 if (End % OpBytesPerElement != 0) 4763 break; 4764 // We're extracting the low part of one operand of the BUILD_VECTOR. 4765 Op = Op.getOperand(End / OpBytesPerElement - 1); 4766 if (!Op.getValueType().isInteger()) { 4767 EVT VT = MVT::getIntegerVT(Op.getValueType().getSizeInBits()); 4768 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); 4769 DCI.AddToWorklist(Op.getNode()); 4770 } 4771 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits()); 4772 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); 4773 if (VT != ResVT) { 4774 DCI.AddToWorklist(Op.getNode()); 4775 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op); 4776 } 4777 return Op; 4778 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || 4779 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || 4780 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && 4781 canTreatAsByteVector(Op.getValueType()) && 4782 canTreatAsByteVector(Op.getOperand(0).getValueType())) { 4783 // Make sure that only the unextended bits are significant. 4784 EVT ExtVT = Op.getValueType(); 4785 EVT OpVT = Op.getOperand(0).getValueType(); 4786 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize(); 4787 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); 4788 unsigned Byte = Index * BytesPerElement; 4789 unsigned SubByte = Byte % ExtBytesPerElement; 4790 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement; 4791 if (SubByte < MinSubByte || 4792 SubByte + BytesPerElement > ExtBytesPerElement) 4793 break; 4794 // Get the byte offset of the unextended element 4795 Byte = Byte / ExtBytesPerElement * OpBytesPerElement; 4796 // ...then add the byte offset relative to that element. 4797 Byte += SubByte - MinSubByte; 4798 if (Byte % BytesPerElement != 0) 4799 break; 4800 Op = Op.getOperand(0); 4801 Index = Byte / BytesPerElement; 4802 Force = true; 4803 } else 4804 break; 4805 } 4806 if (Force) { 4807 if (Op.getValueType() != VecVT) { 4808 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op); 4809 DCI.AddToWorklist(Op.getNode()); 4810 } 4811 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op, 4812 DAG.getConstant(Index, DL, MVT::i32)); 4813 } 4814 return SDValue(); 4815 } 4816 4817 // Optimize vector operations in scalar value Op on the basis that Op 4818 // is truncated to TruncVT. 4819 SDValue SystemZTargetLowering::combineTruncateExtract( 4820 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const { 4821 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into 4822 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements 4823 // of type TruncVT. 4824 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 4825 TruncVT.getSizeInBits() % 8 == 0) { 4826 SDValue Vec = Op.getOperand(0); 4827 EVT VecVT = Vec.getValueType(); 4828 if (canTreatAsByteVector(VecVT)) { 4829 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 4830 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); 4831 unsigned TruncBytes = TruncVT.getStoreSize(); 4832 if (BytesPerElement % TruncBytes == 0) { 4833 // Calculate the value of Y' in the above description. We are 4834 // splitting the original elements into Scale equal-sized pieces 4835 // and for truncation purposes want the last (least-significant) 4836 // of these pieces for IndexN. This is easiest to do by calculating 4837 // the start index of the following element and then subtracting 1. 4838 unsigned Scale = BytesPerElement / TruncBytes; 4839 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1; 4840 4841 // Defer the creation of the bitcast from X to combineExtract, 4842 // which might be able to optimize the extraction. 4843 VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8), 4844 VecVT.getStoreSize() / TruncBytes); 4845 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT); 4846 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true); 4847 } 4848 } 4849 } 4850 } 4851 return SDValue(); 4852 } 4853 4854 SDValue SystemZTargetLowering::combineSIGN_EXTEND( 4855 SDNode *N, DAGCombinerInfo &DCI) const { 4856 // Convert (sext (ashr (shl X, C1), C2)) to 4857 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as 4858 // cheap as narrower ones. 4859 SelectionDAG &DAG = DCI.DAG; 4860 SDValue N0 = N->getOperand(0); 4861 EVT VT = N->getValueType(0); 4862 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { 4863 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4864 SDValue Inner = N0.getOperand(0); 4865 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { 4866 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) { 4867 unsigned Extra = (VT.getSizeInBits() - 4868 N0.getValueType().getSizeInBits()); 4869 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; 4870 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; 4871 EVT ShiftVT = N0.getOperand(1).getValueType(); 4872 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, 4873 Inner.getOperand(0)); 4874 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, 4875 DAG.getConstant(NewShlAmt, SDLoc(Inner), 4876 ShiftVT)); 4877 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, 4878 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT)); 4879 } 4880 } 4881 } 4882 return SDValue(); 4883 } 4884 4885 SDValue SystemZTargetLowering::combineMERGE( 4886 SDNode *N, DAGCombinerInfo &DCI) const { 4887 SelectionDAG &DAG = DCI.DAG; 4888 unsigned Opcode = N->getOpcode(); 4889 SDValue Op0 = N->getOperand(0); 4890 SDValue Op1 = N->getOperand(1); 4891 if (Op0.getOpcode() == ISD::BITCAST) 4892 Op0 = Op0.getOperand(0); 4893 if (Op0.getOpcode() == SystemZISD::BYTE_MASK && 4894 cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) { 4895 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF 4896 // for v4f32. 4897 if (Op1 == N->getOperand(0)) 4898 return Op1; 4899 // (z_merge_? 0, X) -> (z_unpackl_? 0, X). 4900 EVT VT = Op1.getValueType(); 4901 unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); 4902 if (ElemBytes <= 4) { 4903 Opcode = (Opcode == SystemZISD::MERGE_HIGH ? 4904 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); 4905 EVT InVT = VT.changeVectorElementTypeToInteger(); 4906 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16), 4907 SystemZ::VectorBytes / ElemBytes / 2); 4908 if (VT != InVT) { 4909 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1); 4910 DCI.AddToWorklist(Op1.getNode()); 4911 } 4912 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1); 4913 DCI.AddToWorklist(Op.getNode()); 4914 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); 4915 } 4916 } 4917 return SDValue(); 4918 } 4919 4920 SDValue SystemZTargetLowering::combineSTORE( 4921 SDNode *N, DAGCombinerInfo &DCI) const { 4922 SelectionDAG &DAG = DCI.DAG; 4923 auto *SN = cast<StoreSDNode>(N); 4924 auto &Op1 = N->getOperand(1); 4925 EVT MemVT = SN->getMemoryVT(); 4926 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better 4927 // for the extraction to be done on a vMiN value, so that we can use VSTE. 4928 // If X has wider elements then convert it to: 4929 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z). 4930 if (MemVT.isInteger()) { 4931 if (SDValue Value = 4932 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) { 4933 DCI.AddToWorklist(Value.getNode()); 4934 4935 // Rewrite the store with the new form of stored value. 4936 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value, 4937 SN->getBasePtr(), SN->getMemoryVT(), 4938 SN->getMemOperand()); 4939 } 4940 } 4941 // Combine STORE (BSWAP) into STRVH/STRV/STRVG 4942 // See comment in combineBSWAP about volatile accesses. 4943 if (!SN->isVolatile() && 4944 Op1.getOpcode() == ISD::BSWAP && 4945 Op1.getNode()->hasOneUse() && 4946 (Op1.getValueType() == MVT::i16 || 4947 Op1.getValueType() == MVT::i32 || 4948 Op1.getValueType() == MVT::i64)) { 4949 4950 SDValue BSwapOp = Op1.getOperand(0); 4951 4952 if (BSwapOp.getValueType() == MVT::i16) 4953 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp); 4954 4955 SDValue Ops[] = { 4956 N->getOperand(0), BSwapOp, N->getOperand(2), 4957 DAG.getValueType(Op1.getValueType()) 4958 }; 4959 4960 return 4961 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other), 4962 Ops, MemVT, SN->getMemOperand()); 4963 } 4964 return SDValue(); 4965 } 4966 4967 SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( 4968 SDNode *N, DAGCombinerInfo &DCI) const { 4969 // Try to simplify a vector extraction. 4970 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 4971 SDValue Op0 = N->getOperand(0); 4972 EVT VecVT = Op0.getValueType(); 4973 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, 4974 IndexN->getZExtValue(), DCI, false); 4975 } 4976 return SDValue(); 4977 } 4978 4979 SDValue SystemZTargetLowering::combineJOIN_DWORDS( 4980 SDNode *N, DAGCombinerInfo &DCI) const { 4981 SelectionDAG &DAG = DCI.DAG; 4982 // (join_dwords X, X) == (replicate X) 4983 if (N->getOperand(0) == N->getOperand(1)) 4984 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0), 4985 N->getOperand(0)); 4986 return SDValue(); 4987 } 4988 4989 SDValue SystemZTargetLowering::combineFP_ROUND( 4990 SDNode *N, DAGCombinerInfo &DCI) const { 4991 // (fround (extract_vector_elt X 0)) 4992 // (fround (extract_vector_elt X 1)) -> 4993 // (extract_vector_elt (VROUND X) 0) 4994 // (extract_vector_elt (VROUND X) 1) 4995 // 4996 // This is a special case since the target doesn't really support v2f32s. 4997 SelectionDAG &DAG = DCI.DAG; 4998 SDValue Op0 = N->getOperand(0); 4999 if (N->getValueType(0) == MVT::f32 && 5000 Op0.hasOneUse() && 5001 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 5002 Op0.getOperand(0).getValueType() == MVT::v2f64 && 5003 Op0.getOperand(1).getOpcode() == ISD::Constant && 5004 cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) { 5005 SDValue Vec = Op0.getOperand(0); 5006 for (auto *U : Vec->uses()) { 5007 if (U != Op0.getNode() && 5008 U->hasOneUse() && 5009 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 5010 U->getOperand(0) == Vec && 5011 U->getOperand(1).getOpcode() == ISD::Constant && 5012 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) { 5013 SDValue OtherRound = SDValue(*U->use_begin(), 0); 5014 if (OtherRound.getOpcode() == ISD::FP_ROUND && 5015 OtherRound.getOperand(0) == SDValue(U, 0) && 5016 OtherRound.getValueType() == MVT::f32) { 5017 SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), 5018 MVT::v4f32, Vec); 5019 DCI.AddToWorklist(VRound.getNode()); 5020 SDValue Extract1 = 5021 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, 5022 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); 5023 DCI.AddToWorklist(Extract1.getNode()); 5024 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); 5025 SDValue Extract0 = 5026 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, 5027 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); 5028 return Extract0; 5029 } 5030 } 5031 } 5032 } 5033 return SDValue(); 5034 } 5035 5036 SDValue SystemZTargetLowering::combineBSWAP( 5037 SDNode *N, DAGCombinerInfo &DCI) const { 5038 SelectionDAG &DAG = DCI.DAG; 5039 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG 5040 // These loads are allowed to access memory multiple times, and so we must check 5041 // that the loads are not volatile before performing the combine. 5042 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 5043 N->getOperand(0).hasOneUse() && 5044 (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 || 5045 N->getValueType(0) == MVT::i64) && 5046 !cast<LoadSDNode>(N->getOperand(0))->isVolatile()) { 5047 SDValue Load = N->getOperand(0); 5048 LoadSDNode *LD = cast<LoadSDNode>(Load); 5049 5050 // Create the byte-swapping load. 5051 SDValue Ops[] = { 5052 LD->getChain(), // Chain 5053 LD->getBasePtr(), // Ptr 5054 DAG.getValueType(N->getValueType(0)) // VT 5055 }; 5056 SDValue BSLoad = 5057 DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N), 5058 DAG.getVTList(N->getValueType(0) == MVT::i64 ? 5059 MVT::i64 : MVT::i32, MVT::Other), 5060 Ops, LD->getMemoryVT(), LD->getMemOperand()); 5061 5062 // If this is an i16 load, insert the truncate. 5063 SDValue ResVal = BSLoad; 5064 if (N->getValueType(0) == MVT::i16) 5065 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad); 5066 5067 // First, combine the bswap away. This makes the value produced by the 5068 // load dead. 5069 DCI.CombineTo(N, ResVal); 5070 5071 // Next, combine the load away, we give it a bogus result value but a real 5072 // chain result. The result value is dead because the bswap is dead. 5073 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 5074 5075 // Return N so it doesn't get rechecked! 5076 return SDValue(N, 0); 5077 } 5078 return SDValue(); 5079 } 5080 5081 SDValue SystemZTargetLowering::combineSHIFTROT( 5082 SDNode *N, DAGCombinerInfo &DCI) const { 5083 5084 SelectionDAG &DAG = DCI.DAG; 5085 5086 // Shift/rotate instructions only use the last 6 bits of the second operand 5087 // register. If the second operand is the result of an AND with an immediate 5088 // value that has its last 6 bits set, we can safely remove the AND operation. 5089 SDValue N1 = N->getOperand(1); 5090 if (N1.getOpcode() == ISD::AND) { 5091 auto *AndMask = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 5092 5093 // The AND mask is constant 5094 if (AndMask) { 5095 auto AmtVal = AndMask->getZExtValue(); 5096 5097 // Bottom 6 bits are set 5098 if ((AmtVal & 0x3f) == 0x3f) { 5099 SDValue AndOp = N1->getOperand(0); 5100 5101 // This is the only use, so remove the node 5102 if (N1.hasOneUse()) { 5103 // Combine the AND away 5104 DCI.CombineTo(N1.getNode(), AndOp); 5105 5106 // Return N so it isn't rechecked 5107 return SDValue(N, 0); 5108 5109 // The node will be reused, so create a new node for this one use 5110 } else { 5111 SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N), 5112 N->getValueType(0), N->getOperand(0), 5113 AndOp); 5114 DCI.AddToWorklist(Replace.getNode()); 5115 5116 return Replace; 5117 } 5118 } 5119 } 5120 } 5121 5122 return SDValue(); 5123 } 5124 5125 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, 5126 DAGCombinerInfo &DCI) const { 5127 switch(N->getOpcode()) { 5128 default: break; 5129 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI); 5130 case SystemZISD::MERGE_HIGH: 5131 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); 5132 case ISD::STORE: return combineSTORE(N, DCI); 5133 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); 5134 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); 5135 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); 5136 case ISD::BSWAP: return combineBSWAP(N, DCI); 5137 case ISD::SHL: 5138 case ISD::SRA: 5139 case ISD::SRL: 5140 case ISD::ROTL: return combineSHIFTROT(N, DCI); 5141 } 5142 5143 return SDValue(); 5144 } 5145 5146 //===----------------------------------------------------------------------===// 5147 // Custom insertion 5148 //===----------------------------------------------------------------------===// 5149 5150 // Create a new basic block after MBB. 5151 static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) { 5152 MachineFunction &MF = *MBB->getParent(); 5153 MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock()); 5154 MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB); 5155 return NewMBB; 5156 } 5157 5158 // Split MBB after MI and return the new block (the one that contains 5159 // instructions after MI). 5160 static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI, 5161 MachineBasicBlock *MBB) { 5162 MachineBasicBlock *NewMBB = emitBlockAfter(MBB); 5163 NewMBB->splice(NewMBB->begin(), MBB, 5164 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 5165 NewMBB->transferSuccessorsAndUpdatePHIs(MBB); 5166 return NewMBB; 5167 } 5168 5169 // Split MBB before MI and return the new block (the one that contains MI). 5170 static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI, 5171 MachineBasicBlock *MBB) { 5172 MachineBasicBlock *NewMBB = emitBlockAfter(MBB); 5173 NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end()); 5174 NewMBB->transferSuccessorsAndUpdatePHIs(MBB); 5175 return NewMBB; 5176 } 5177 5178 // Force base value Base into a register before MI. Return the register. 5179 static unsigned forceReg(MachineInstr &MI, MachineOperand &Base, 5180 const SystemZInstrInfo *TII) { 5181 if (Base.isReg()) 5182 return Base.getReg(); 5183 5184 MachineBasicBlock *MBB = MI.getParent(); 5185 MachineFunction &MF = *MBB->getParent(); 5186 MachineRegisterInfo &MRI = MF.getRegInfo(); 5187 5188 unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 5189 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg) 5190 .addOperand(Base) 5191 .addImm(0) 5192 .addReg(0); 5193 return Reg; 5194 } 5195 5196 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. 5197 MachineBasicBlock * 5198 SystemZTargetLowering::emitSelect(MachineInstr &MI, 5199 MachineBasicBlock *MBB) const { 5200 const SystemZInstrInfo *TII = 5201 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 5202 5203 unsigned DestReg = MI.getOperand(0).getReg(); 5204 unsigned TrueReg = MI.getOperand(1).getReg(); 5205 unsigned FalseReg = MI.getOperand(2).getReg(); 5206 unsigned CCValid = MI.getOperand(3).getImm(); 5207 unsigned CCMask = MI.getOperand(4).getImm(); 5208 DebugLoc DL = MI.getDebugLoc(); 5209 5210 MachineBasicBlock *StartMBB = MBB; 5211 MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); 5212 MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); 5213 5214 // StartMBB: 5215 // BRC CCMask, JoinMBB 5216 // # fallthrough to FalseMBB 5217 MBB = StartMBB; 5218 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 5219 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); 5220 MBB->addSuccessor(JoinMBB); 5221 MBB->addSuccessor(FalseMBB); 5222 5223 // FalseMBB: 5224 // # fallthrough to JoinMBB 5225 MBB = FalseMBB; 5226 MBB->addSuccessor(JoinMBB); 5227 5228 // JoinMBB: 5229 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] 5230 // ... 5231 MBB = JoinMBB; 5232 BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg) 5233 .addReg(TrueReg).addMBB(StartMBB) 5234 .addReg(FalseReg).addMBB(FalseMBB); 5235 5236 MI.eraseFromParent(); 5237 return JoinMBB; 5238 } 5239 5240 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. 5241 // StoreOpcode is the store to use and Invert says whether the store should 5242 // happen when the condition is false rather than true. If a STORE ON 5243 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. 5244 MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, 5245 MachineBasicBlock *MBB, 5246 unsigned StoreOpcode, 5247 unsigned STOCOpcode, 5248 bool Invert) const { 5249 const SystemZInstrInfo *TII = 5250 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 5251 5252 unsigned SrcReg = MI.getOperand(0).getReg(); 5253 MachineOperand Base = MI.getOperand(1); 5254 int64_t Disp = MI.getOperand(2).getImm(); 5255 unsigned IndexReg = MI.getOperand(3).getReg(); 5256 unsigned CCValid = MI.getOperand(4).getImm(); 5257 unsigned CCMask = MI.getOperand(5).getImm(); 5258 DebugLoc DL = MI.getDebugLoc(); 5259 5260 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); 5261 5262 // Use STOCOpcode if possible. We could use different store patterns in 5263 // order to avoid matching the index register, but the performance trade-offs 5264 // might be more complicated in that case. 5265 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { 5266 if (Invert) 5267 CCMask ^= CCValid; 5268 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) 5269 .addReg(SrcReg).addOperand(Base).addImm(Disp) 5270 .addImm(CCValid).addImm(CCMask); 5271 MI.eraseFromParent(); 5272 return MBB; 5273 } 5274 5275 // Get the condition needed to branch around the store. 5276 if (!Invert) 5277 CCMask ^= CCValid; 5278 5279 MachineBasicBlock *StartMBB = MBB; 5280 MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); 5281 MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); 5282 5283 // StartMBB: 5284 // BRC CCMask, JoinMBB 5285 // # fallthrough to FalseMBB 5286 MBB = StartMBB; 5287 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 5288 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); 5289 MBB->addSuccessor(JoinMBB); 5290 MBB->addSuccessor(FalseMBB); 5291 5292 // FalseMBB: 5293 // store %SrcReg, %Disp(%Index,%Base) 5294 // # fallthrough to JoinMBB 5295 MBB = FalseMBB; 5296 BuildMI(MBB, DL, TII->get(StoreOpcode)) 5297 .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg); 5298 MBB->addSuccessor(JoinMBB); 5299 5300 MI.eraseFromParent(); 5301 return JoinMBB; 5302 } 5303 5304 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_* 5305 // or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that 5306 // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}. 5307 // BitSize is the width of the field in bits, or 0 if this is a partword 5308 // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize 5309 // is one of the operands. Invert says whether the field should be 5310 // inverted after performing BinOpcode (e.g. for NAND). 5311 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( 5312 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, 5313 unsigned BitSize, bool Invert) const { 5314 MachineFunction &MF = *MBB->getParent(); 5315 const SystemZInstrInfo *TII = 5316 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 5317 MachineRegisterInfo &MRI = MF.getRegInfo(); 5318 bool IsSubWord = (BitSize < 32); 5319 5320 // Extract the operands. Base can be a register or a frame index. 5321 // Src2 can be a register or immediate. 5322 unsigned Dest = MI.getOperand(0).getReg(); 5323 MachineOperand Base = earlyUseOperand(MI.getOperand(1)); 5324 int64_t Disp = MI.getOperand(2).getImm(); 5325 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3)); 5326 unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0); 5327 unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0); 5328 DebugLoc DL = MI.getDebugLoc(); 5329 if (IsSubWord) 5330 BitSize = MI.getOperand(6).getImm(); 5331 5332 // Subword operations use 32-bit registers. 5333 const TargetRegisterClass *RC = (BitSize <= 32 ? 5334 &SystemZ::GR32BitRegClass : 5335 &SystemZ::GR64BitRegClass); 5336 unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG; 5337 unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG; 5338 5339 // Get the right opcodes for the displacement. 5340 LOpcode = TII->getOpcodeForOffset(LOpcode, Disp); 5341 CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp); 5342 assert(LOpcode && CSOpcode && "Displacement out of range"); 5343 5344 // Create virtual registers for temporary results. 5345 unsigned OrigVal = MRI.createVirtualRegister(RC); 5346 unsigned OldVal = MRI.createVirtualRegister(RC); 5347 unsigned NewVal = (BinOpcode || IsSubWord ? 5348 MRI.createVirtualRegister(RC) : Src2.getReg()); 5349 unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); 5350 unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); 5351 5352 // Insert a basic block for the main loop. 5353 MachineBasicBlock *StartMBB = MBB; 5354 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); 5355 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 5356 5357 // StartMBB: 5358 // ... 5359 // %OrigVal = L Disp(%Base) 5360 // # fall through to LoopMMB 5361 MBB = StartMBB; 5362 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal) 5363 .addOperand(Base).addImm(Disp).addReg(0); 5364 MBB->addSuccessor(LoopMBB); 5365 5366 // LoopMBB: 5367 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ] 5368 // %RotatedOldVal = RLL %OldVal, 0(%BitShift) 5369 // %RotatedNewVal = OP %RotatedOldVal, %Src2 5370 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) 5371 // %Dest = CS %OldVal, %NewVal, Disp(%Base) 5372 // JNE LoopMBB 5373 // # fall through to DoneMMB 5374 MBB = LoopMBB; 5375 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 5376 .addReg(OrigVal).addMBB(StartMBB) 5377 .addReg(Dest).addMBB(LoopMBB); 5378 if (IsSubWord) 5379 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) 5380 .addReg(OldVal).addReg(BitShift).addImm(0); 5381 if (Invert) { 5382 // Perform the operation normally and then invert every bit of the field. 5383 unsigned Tmp = MRI.createVirtualRegister(RC); 5384 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp) 5385 .addReg(RotatedOldVal).addOperand(Src2); 5386 if (BitSize <= 32) 5387 // XILF with the upper BitSize bits set. 5388 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) 5389 .addReg(Tmp).addImm(-1U << (32 - BitSize)); 5390 else { 5391 // Use LCGR and add -1 to the result, which is more compact than 5392 // an XILF, XILH pair. 5393 unsigned Tmp2 = MRI.createVirtualRegister(RC); 5394 BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp); 5395 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal) 5396 .addReg(Tmp2).addImm(-1); 5397 } 5398 } else if (BinOpcode) 5399 // A simply binary operation. 5400 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal) 5401 .addReg(RotatedOldVal).addOperand(Src2); 5402 else if (IsSubWord) 5403 // Use RISBG to rotate Src2 into position and use it to replace the 5404 // field in RotatedOldVal. 5405 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal) 5406 .addReg(RotatedOldVal).addReg(Src2.getReg()) 5407 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize); 5408 if (IsSubWord) 5409 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) 5410 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); 5411 BuildMI(MBB, DL, TII->get(CSOpcode), Dest) 5412 .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); 5413 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 5414 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 5415 MBB->addSuccessor(LoopMBB); 5416 MBB->addSuccessor(DoneMBB); 5417 5418 MI.eraseFromParent(); 5419 return DoneMBB; 5420 } 5421 5422 // Implement EmitInstrWithCustomInserter for pseudo 5423 // ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the 5424 // instruction that should be used to compare the current field with the 5425 // minimum or maximum value. KeepOldMask is the BRC condition-code mask 5426 // for when the current field should be kept. BitSize is the width of 5427 // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction. 5428 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( 5429 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode, 5430 unsigned KeepOldMask, unsigned BitSize) const { 5431 MachineFunction &MF = *MBB->getParent(); 5432 const SystemZInstrInfo *TII = 5433 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 5434 MachineRegisterInfo &MRI = MF.getRegInfo(); 5435 bool IsSubWord = (BitSize < 32); 5436 5437 // Extract the operands. Base can be a register or a frame index. 5438 unsigned Dest = MI.getOperand(0).getReg(); 5439 MachineOperand Base = earlyUseOperand(MI.getOperand(1)); 5440 int64_t Disp = MI.getOperand(2).getImm(); 5441 unsigned Src2 = MI.getOperand(3).getReg(); 5442 unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0); 5443 unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0); 5444 DebugLoc DL = MI.getDebugLoc(); 5445 if (IsSubWord) 5446 BitSize = MI.getOperand(6).getImm(); 5447 5448 // Subword operations use 32-bit registers. 5449 const TargetRegisterClass *RC = (BitSize <= 32 ? 5450 &SystemZ::GR32BitRegClass : 5451 &SystemZ::GR64BitRegClass); 5452 unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG; 5453 unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG; 5454 5455 // Get the right opcodes for the displacement. 5456 LOpcode = TII->getOpcodeForOffset(LOpcode, Disp); 5457 CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp); 5458 assert(LOpcode && CSOpcode && "Displacement out of range"); 5459 5460 // Create virtual registers for temporary results. 5461 unsigned OrigVal = MRI.createVirtualRegister(RC); 5462 unsigned OldVal = MRI.createVirtualRegister(RC); 5463 unsigned NewVal = MRI.createVirtualRegister(RC); 5464 unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); 5465 unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2); 5466 unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); 5467 5468 // Insert 3 basic blocks for the loop. 5469 MachineBasicBlock *StartMBB = MBB; 5470 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); 5471 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 5472 MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB); 5473 MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB); 5474 5475 // StartMBB: 5476 // ... 5477 // %OrigVal = L Disp(%Base) 5478 // # fall through to LoopMMB 5479 MBB = StartMBB; 5480 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal) 5481 .addOperand(Base).addImm(Disp).addReg(0); 5482 MBB->addSuccessor(LoopMBB); 5483 5484 // LoopMBB: 5485 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ] 5486 // %RotatedOldVal = RLL %OldVal, 0(%BitShift) 5487 // CompareOpcode %RotatedOldVal, %Src2 5488 // BRC KeepOldMask, UpdateMBB 5489 MBB = LoopMBB; 5490 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 5491 .addReg(OrigVal).addMBB(StartMBB) 5492 .addReg(Dest).addMBB(UpdateMBB); 5493 if (IsSubWord) 5494 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) 5495 .addReg(OldVal).addReg(BitShift).addImm(0); 5496 BuildMI(MBB, DL, TII->get(CompareOpcode)) 5497 .addReg(RotatedOldVal).addReg(Src2); 5498 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 5499 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB); 5500 MBB->addSuccessor(UpdateMBB); 5501 MBB->addSuccessor(UseAltMBB); 5502 5503 // UseAltMBB: 5504 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0 5505 // # fall through to UpdateMMB 5506 MBB = UseAltMBB; 5507 if (IsSubWord) 5508 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal) 5509 .addReg(RotatedOldVal).addReg(Src2) 5510 .addImm(32).addImm(31 + BitSize).addImm(0); 5511 MBB->addSuccessor(UpdateMBB); 5512 5513 // UpdateMBB: 5514 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ], 5515 // [ %RotatedAltVal, UseAltMBB ] 5516 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) 5517 // %Dest = CS %OldVal, %NewVal, Disp(%Base) 5518 // JNE LoopMBB 5519 // # fall through to DoneMMB 5520 MBB = UpdateMBB; 5521 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal) 5522 .addReg(RotatedOldVal).addMBB(LoopMBB) 5523 .addReg(RotatedAltVal).addMBB(UseAltMBB); 5524 if (IsSubWord) 5525 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) 5526 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); 5527 BuildMI(MBB, DL, TII->get(CSOpcode), Dest) 5528 .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); 5529 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 5530 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 5531 MBB->addSuccessor(LoopMBB); 5532 MBB->addSuccessor(DoneMBB); 5533 5534 MI.eraseFromParent(); 5535 return DoneMBB; 5536 } 5537 5538 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW 5539 // instruction MI. 5540 MachineBasicBlock * 5541 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, 5542 MachineBasicBlock *MBB) const { 5543 5544 MachineFunction &MF = *MBB->getParent(); 5545 const SystemZInstrInfo *TII = 5546 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 5547 MachineRegisterInfo &MRI = MF.getRegInfo(); 5548 5549 // Extract the operands. Base can be a register or a frame index. 5550 unsigned Dest = MI.getOperand(0).getReg(); 5551 MachineOperand Base = earlyUseOperand(MI.getOperand(1)); 5552 int64_t Disp = MI.getOperand(2).getImm(); 5553 unsigned OrigCmpVal = MI.getOperand(3).getReg(); 5554 unsigned OrigSwapVal = MI.getOperand(4).getReg(); 5555 unsigned BitShift = MI.getOperand(5).getReg(); 5556 unsigned NegBitShift = MI.getOperand(6).getReg(); 5557 int64_t BitSize = MI.getOperand(7).getImm(); 5558 DebugLoc DL = MI.getDebugLoc(); 5559 5560 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass; 5561 5562 // Get the right opcodes for the displacement. 5563 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); 5564 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); 5565 assert(LOpcode && CSOpcode && "Displacement out of range"); 5566 5567 // Create virtual registers for temporary results. 5568 unsigned OrigOldVal = MRI.createVirtualRegister(RC); 5569 unsigned OldVal = MRI.createVirtualRegister(RC); 5570 unsigned CmpVal = MRI.createVirtualRegister(RC); 5571 unsigned SwapVal = MRI.createVirtualRegister(RC); 5572 unsigned StoreVal = MRI.createVirtualRegister(RC); 5573 unsigned RetryOldVal = MRI.createVirtualRegister(RC); 5574 unsigned RetryCmpVal = MRI.createVirtualRegister(RC); 5575 unsigned RetrySwapVal = MRI.createVirtualRegister(RC); 5576 5577 // Insert 2 basic blocks for the loop. 5578 MachineBasicBlock *StartMBB = MBB; 5579 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); 5580 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 5581 MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB); 5582 5583 // StartMBB: 5584 // ... 5585 // %OrigOldVal = L Disp(%Base) 5586 // # fall through to LoopMMB 5587 MBB = StartMBB; 5588 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal) 5589 .addOperand(Base).addImm(Disp).addReg(0); 5590 MBB->addSuccessor(LoopMBB); 5591 5592 // LoopMBB: 5593 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ] 5594 // %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ] 5595 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ] 5596 // %Dest = RLL %OldVal, BitSize(%BitShift) 5597 // ^^ The low BitSize bits contain the field 5598 // of interest. 5599 // %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0 5600 // ^^ Replace the upper 32-BitSize bits of the 5601 // comparison value with those that we loaded, 5602 // so that we can use a full word comparison. 5603 // CR %Dest, %RetryCmpVal 5604 // JNE DoneMBB 5605 // # Fall through to SetMBB 5606 MBB = LoopMBB; 5607 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 5608 .addReg(OrigOldVal).addMBB(StartMBB) 5609 .addReg(RetryOldVal).addMBB(SetMBB); 5610 BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal) 5611 .addReg(OrigCmpVal).addMBB(StartMBB) 5612 .addReg(RetryCmpVal).addMBB(SetMBB); 5613 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal) 5614 .addReg(OrigSwapVal).addMBB(StartMBB) 5615 .addReg(RetrySwapVal).addMBB(SetMBB); 5616 BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest) 5617 .addReg(OldVal).addReg(BitShift).addImm(BitSize); 5618 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal) 5619 .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); 5620 BuildMI(MBB, DL, TII->get(SystemZ::CR)) 5621 .addReg(Dest).addReg(RetryCmpVal); 5622 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 5623 .addImm(SystemZ::CCMASK_ICMP) 5624 .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB); 5625 MBB->addSuccessor(DoneMBB); 5626 MBB->addSuccessor(SetMBB); 5627 5628 // SetMBB: 5629 // %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0 5630 // ^^ Replace the upper 32-BitSize bits of the new 5631 // value with those that we loaded. 5632 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift) 5633 // ^^ Rotate the new field to its proper position. 5634 // %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base) 5635 // JNE LoopMBB 5636 // # fall through to ExitMMB 5637 MBB = SetMBB; 5638 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal) 5639 .addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); 5640 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal) 5641 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); 5642 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) 5643 .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp); 5644 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 5645 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 5646 MBB->addSuccessor(LoopMBB); 5647 MBB->addSuccessor(DoneMBB); 5648 5649 MI.eraseFromParent(); 5650 return DoneMBB; 5651 } 5652 5653 // Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true 5654 // if the high register of the GR128 value must be cleared or false if 5655 // it's "don't care". SubReg is subreg_l32 when extending a GR32 5656 // and subreg_l64 when extending a GR64. 5657 MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, 5658 MachineBasicBlock *MBB, 5659 bool ClearEven, 5660 unsigned SubReg) const { 5661 MachineFunction &MF = *MBB->getParent(); 5662 const SystemZInstrInfo *TII = 5663 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 5664 MachineRegisterInfo &MRI = MF.getRegInfo(); 5665 DebugLoc DL = MI.getDebugLoc(); 5666 5667 unsigned Dest = MI.getOperand(0).getReg(); 5668 unsigned Src = MI.getOperand(1).getReg(); 5669 unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 5670 5671 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); 5672 if (ClearEven) { 5673 unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 5674 unsigned Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); 5675 5676 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64) 5677 .addImm(0); 5678 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128) 5679 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64); 5680 In128 = NewIn128; 5681 } 5682 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) 5683 .addReg(In128).addReg(Src).addImm(SubReg); 5684 5685 MI.eraseFromParent(); 5686 return MBB; 5687 } 5688 5689 MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( 5690 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { 5691 MachineFunction &MF = *MBB->getParent(); 5692 const SystemZInstrInfo *TII = 5693 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 5694 MachineRegisterInfo &MRI = MF.getRegInfo(); 5695 DebugLoc DL = MI.getDebugLoc(); 5696 5697 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0)); 5698 uint64_t DestDisp = MI.getOperand(1).getImm(); 5699 MachineOperand SrcBase = earlyUseOperand(MI.getOperand(2)); 5700 uint64_t SrcDisp = MI.getOperand(3).getImm(); 5701 uint64_t Length = MI.getOperand(4).getImm(); 5702 5703 // When generating more than one CLC, all but the last will need to 5704 // branch to the end when a difference is found. 5705 MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ? 5706 splitBlockAfter(MI, MBB) : nullptr); 5707 5708 // Check for the loop form, in which operand 5 is the trip count. 5709 if (MI.getNumExplicitOperands() > 5) { 5710 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); 5711 5712 uint64_t StartCountReg = MI.getOperand(5).getReg(); 5713 uint64_t StartSrcReg = forceReg(MI, SrcBase, TII); 5714 uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg : 5715 forceReg(MI, DestBase, TII)); 5716 5717 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; 5718 uint64_t ThisSrcReg = MRI.createVirtualRegister(RC); 5719 uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg : 5720 MRI.createVirtualRegister(RC)); 5721 uint64_t NextSrcReg = MRI.createVirtualRegister(RC); 5722 uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg : 5723 MRI.createVirtualRegister(RC)); 5724 5725 RC = &SystemZ::GR64BitRegClass; 5726 uint64_t ThisCountReg = MRI.createVirtualRegister(RC); 5727 uint64_t NextCountReg = MRI.createVirtualRegister(RC); 5728 5729 MachineBasicBlock *StartMBB = MBB; 5730 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); 5731 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 5732 MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB); 5733 5734 // StartMBB: 5735 // # fall through to LoopMMB 5736 MBB->addSuccessor(LoopMBB); 5737 5738 // LoopMBB: 5739 // %ThisDestReg = phi [ %StartDestReg, StartMBB ], 5740 // [ %NextDestReg, NextMBB ] 5741 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ], 5742 // [ %NextSrcReg, NextMBB ] 5743 // %ThisCountReg = phi [ %StartCountReg, StartMBB ], 5744 // [ %NextCountReg, NextMBB ] 5745 // ( PFD 2, 768+DestDisp(%ThisDestReg) ) 5746 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg) 5747 // ( JLH EndMBB ) 5748 // 5749 // The prefetch is used only for MVC. The JLH is used only for CLC. 5750 MBB = LoopMBB; 5751 5752 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg) 5753 .addReg(StartDestReg).addMBB(StartMBB) 5754 .addReg(NextDestReg).addMBB(NextMBB); 5755 if (!HaveSingleBase) 5756 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg) 5757 .addReg(StartSrcReg).addMBB(StartMBB) 5758 .addReg(NextSrcReg).addMBB(NextMBB); 5759 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg) 5760 .addReg(StartCountReg).addMBB(StartMBB) 5761 .addReg(NextCountReg).addMBB(NextMBB); 5762 if (Opcode == SystemZ::MVC) 5763 BuildMI(MBB, DL, TII->get(SystemZ::PFD)) 5764 .addImm(SystemZ::PFD_WRITE) 5765 .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0); 5766 BuildMI(MBB, DL, TII->get(Opcode)) 5767 .addReg(ThisDestReg).addImm(DestDisp).addImm(256) 5768 .addReg(ThisSrcReg).addImm(SrcDisp); 5769 if (EndMBB) { 5770 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 5771 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 5772 .addMBB(EndMBB); 5773 MBB->addSuccessor(EndMBB); 5774 MBB->addSuccessor(NextMBB); 5775 } 5776 5777 // NextMBB: 5778 // %NextDestReg = LA 256(%ThisDestReg) 5779 // %NextSrcReg = LA 256(%ThisSrcReg) 5780 // %NextCountReg = AGHI %ThisCountReg, -1 5781 // CGHI %NextCountReg, 0 5782 // JLH LoopMBB 5783 // # fall through to DoneMMB 5784 // 5785 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. 5786 MBB = NextMBB; 5787 5788 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg) 5789 .addReg(ThisDestReg).addImm(256).addReg(0); 5790 if (!HaveSingleBase) 5791 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg) 5792 .addReg(ThisSrcReg).addImm(256).addReg(0); 5793 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg) 5794 .addReg(ThisCountReg).addImm(-1); 5795 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 5796 .addReg(NextCountReg).addImm(0); 5797 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 5798 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 5799 .addMBB(LoopMBB); 5800 MBB->addSuccessor(LoopMBB); 5801 MBB->addSuccessor(DoneMBB); 5802 5803 DestBase = MachineOperand::CreateReg(NextDestReg, false); 5804 SrcBase = MachineOperand::CreateReg(NextSrcReg, false); 5805 Length &= 255; 5806 MBB = DoneMBB; 5807 } 5808 // Handle any remaining bytes with straight-line code. 5809 while (Length > 0) { 5810 uint64_t ThisLength = std::min(Length, uint64_t(256)); 5811 // The previous iteration might have created out-of-range displacements. 5812 // Apply them using LAY if so. 5813 if (!isUInt<12>(DestDisp)) { 5814 unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 5815 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg) 5816 .addOperand(DestBase) 5817 .addImm(DestDisp) 5818 .addReg(0); 5819 DestBase = MachineOperand::CreateReg(Reg, false); 5820 DestDisp = 0; 5821 } 5822 if (!isUInt<12>(SrcDisp)) { 5823 unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 5824 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg) 5825 .addOperand(SrcBase) 5826 .addImm(SrcDisp) 5827 .addReg(0); 5828 SrcBase = MachineOperand::CreateReg(Reg, false); 5829 SrcDisp = 0; 5830 } 5831 BuildMI(*MBB, MI, DL, TII->get(Opcode)) 5832 .addOperand(DestBase).addImm(DestDisp).addImm(ThisLength) 5833 .addOperand(SrcBase).addImm(SrcDisp); 5834 DestDisp += ThisLength; 5835 SrcDisp += ThisLength; 5836 Length -= ThisLength; 5837 // If there's another CLC to go, branch to the end if a difference 5838 // was found. 5839 if (EndMBB && Length > 0) { 5840 MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB); 5841 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 5842 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 5843 .addMBB(EndMBB); 5844 MBB->addSuccessor(EndMBB); 5845 MBB->addSuccessor(NextMBB); 5846 MBB = NextMBB; 5847 } 5848 } 5849 if (EndMBB) { 5850 MBB->addSuccessor(EndMBB); 5851 MBB = EndMBB; 5852 MBB->addLiveIn(SystemZ::CC); 5853 } 5854 5855 MI.eraseFromParent(); 5856 return MBB; 5857 } 5858 5859 // Decompose string pseudo-instruction MI into a loop that continually performs 5860 // Opcode until CC != 3. 5861 MachineBasicBlock *SystemZTargetLowering::emitStringWrapper( 5862 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { 5863 MachineFunction &MF = *MBB->getParent(); 5864 const SystemZInstrInfo *TII = 5865 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 5866 MachineRegisterInfo &MRI = MF.getRegInfo(); 5867 DebugLoc DL = MI.getDebugLoc(); 5868 5869 uint64_t End1Reg = MI.getOperand(0).getReg(); 5870 uint64_t Start1Reg = MI.getOperand(1).getReg(); 5871 uint64_t Start2Reg = MI.getOperand(2).getReg(); 5872 uint64_t CharReg = MI.getOperand(3).getReg(); 5873 5874 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass; 5875 uint64_t This1Reg = MRI.createVirtualRegister(RC); 5876 uint64_t This2Reg = MRI.createVirtualRegister(RC); 5877 uint64_t End2Reg = MRI.createVirtualRegister(RC); 5878 5879 MachineBasicBlock *StartMBB = MBB; 5880 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); 5881 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 5882 5883 // StartMBB: 5884 // # fall through to LoopMMB 5885 MBB->addSuccessor(LoopMBB); 5886 5887 // LoopMBB: 5888 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ] 5889 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ] 5890 // R0L = %CharReg 5891 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L 5892 // JO LoopMBB 5893 // # fall through to DoneMMB 5894 // 5895 // The load of R0L can be hoisted by post-RA LICM. 5896 MBB = LoopMBB; 5897 5898 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg) 5899 .addReg(Start1Reg).addMBB(StartMBB) 5900 .addReg(End1Reg).addMBB(LoopMBB); 5901 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg) 5902 .addReg(Start2Reg).addMBB(StartMBB) 5903 .addReg(End2Reg).addMBB(LoopMBB); 5904 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg); 5905 BuildMI(MBB, DL, TII->get(Opcode)) 5906 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define) 5907 .addReg(This1Reg).addReg(This2Reg); 5908 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 5909 .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB); 5910 MBB->addSuccessor(LoopMBB); 5911 MBB->addSuccessor(DoneMBB); 5912 5913 DoneMBB->addLiveIn(SystemZ::CC); 5914 5915 MI.eraseFromParent(); 5916 return DoneMBB; 5917 } 5918 5919 // Update TBEGIN instruction with final opcode and register clobbers. 5920 MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin( 5921 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, 5922 bool NoFloat) const { 5923 MachineFunction &MF = *MBB->getParent(); 5924 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 5925 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 5926 5927 // Update opcode. 5928 MI.setDesc(TII->get(Opcode)); 5929 5930 // We cannot handle a TBEGIN that clobbers the stack or frame pointer. 5931 // Make sure to add the corresponding GRSM bits if they are missing. 5932 uint64_t Control = MI.getOperand(2).getImm(); 5933 static const unsigned GPRControlBit[16] = { 5934 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, 5935 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 5936 }; 5937 Control |= GPRControlBit[15]; 5938 if (TFI->hasFP(MF)) 5939 Control |= GPRControlBit[11]; 5940 MI.getOperand(2).setImm(Control); 5941 5942 // Add GPR clobbers. 5943 for (int I = 0; I < 16; I++) { 5944 if ((Control & GPRControlBit[I]) == 0) { 5945 unsigned Reg = SystemZMC::GR64Regs[I]; 5946 MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); 5947 } 5948 } 5949 5950 // Add FPR/VR clobbers. 5951 if (!NoFloat && (Control & 4) != 0) { 5952 if (Subtarget.hasVector()) { 5953 for (int I = 0; I < 32; I++) { 5954 unsigned Reg = SystemZMC::VR128Regs[I]; 5955 MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); 5956 } 5957 } else { 5958 for (int I = 0; I < 16; I++) { 5959 unsigned Reg = SystemZMC::FP64Regs[I]; 5960 MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); 5961 } 5962 } 5963 } 5964 5965 return MBB; 5966 } 5967 5968 MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( 5969 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { 5970 MachineFunction &MF = *MBB->getParent(); 5971 MachineRegisterInfo *MRI = &MF.getRegInfo(); 5972 const SystemZInstrInfo *TII = 5973 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 5974 DebugLoc DL = MI.getDebugLoc(); 5975 5976 unsigned SrcReg = MI.getOperand(0).getReg(); 5977 5978 // Create new virtual register of the same class as source. 5979 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 5980 unsigned DstReg = MRI->createVirtualRegister(RC); 5981 5982 // Replace pseudo with a normal load-and-test that models the def as 5983 // well. 5984 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg) 5985 .addReg(SrcReg); 5986 MI.eraseFromParent(); 5987 5988 return MBB; 5989 } 5990 5991 MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( 5992 MachineInstr &MI, MachineBasicBlock *MBB) const { 5993 switch (MI.getOpcode()) { 5994 case SystemZ::Select32Mux: 5995 case SystemZ::Select32: 5996 case SystemZ::SelectF32: 5997 case SystemZ::Select64: 5998 case SystemZ::SelectF64: 5999 case SystemZ::SelectF128: 6000 return emitSelect(MI, MBB); 6001 6002 case SystemZ::CondStore8Mux: 6003 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false); 6004 case SystemZ::CondStore8MuxInv: 6005 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true); 6006 case SystemZ::CondStore16Mux: 6007 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false); 6008 case SystemZ::CondStore16MuxInv: 6009 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true); 6010 case SystemZ::CondStore8: 6011 return emitCondStore(MI, MBB, SystemZ::STC, 0, false); 6012 case SystemZ::CondStore8Inv: 6013 return emitCondStore(MI, MBB, SystemZ::STC, 0, true); 6014 case SystemZ::CondStore16: 6015 return emitCondStore(MI, MBB, SystemZ::STH, 0, false); 6016 case SystemZ::CondStore16Inv: 6017 return emitCondStore(MI, MBB, SystemZ::STH, 0, true); 6018 case SystemZ::CondStore32: 6019 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false); 6020 case SystemZ::CondStore32Inv: 6021 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true); 6022 case SystemZ::CondStore64: 6023 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false); 6024 case SystemZ::CondStore64Inv: 6025 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true); 6026 case SystemZ::CondStoreF32: 6027 return emitCondStore(MI, MBB, SystemZ::STE, 0, false); 6028 case SystemZ::CondStoreF32Inv: 6029 return emitCondStore(MI, MBB, SystemZ::STE, 0, true); 6030 case SystemZ::CondStoreF64: 6031 return emitCondStore(MI, MBB, SystemZ::STD, 0, false); 6032 case SystemZ::CondStoreF64Inv: 6033 return emitCondStore(MI, MBB, SystemZ::STD, 0, true); 6034 6035 case SystemZ::AEXT128_64: 6036 return emitExt128(MI, MBB, false, SystemZ::subreg_l64); 6037 case SystemZ::ZEXT128_32: 6038 return emitExt128(MI, MBB, true, SystemZ::subreg_l32); 6039 case SystemZ::ZEXT128_64: 6040 return emitExt128(MI, MBB, true, SystemZ::subreg_l64); 6041 6042 case SystemZ::ATOMIC_SWAPW: 6043 return emitAtomicLoadBinary(MI, MBB, 0, 0); 6044 case SystemZ::ATOMIC_SWAP_32: 6045 return emitAtomicLoadBinary(MI, MBB, 0, 32); 6046 case SystemZ::ATOMIC_SWAP_64: 6047 return emitAtomicLoadBinary(MI, MBB, 0, 64); 6048 6049 case SystemZ::ATOMIC_LOADW_AR: 6050 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0); 6051 case SystemZ::ATOMIC_LOADW_AFI: 6052 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0); 6053 case SystemZ::ATOMIC_LOAD_AR: 6054 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32); 6055 case SystemZ::ATOMIC_LOAD_AHI: 6056 return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32); 6057 case SystemZ::ATOMIC_LOAD_AFI: 6058 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32); 6059 case SystemZ::ATOMIC_LOAD_AGR: 6060 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64); 6061 case SystemZ::ATOMIC_LOAD_AGHI: 6062 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64); 6063 case SystemZ::ATOMIC_LOAD_AGFI: 6064 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64); 6065 6066 case SystemZ::ATOMIC_LOADW_SR: 6067 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0); 6068 case SystemZ::ATOMIC_LOAD_SR: 6069 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32); 6070 case SystemZ::ATOMIC_LOAD_SGR: 6071 return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64); 6072 6073 case SystemZ::ATOMIC_LOADW_NR: 6074 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0); 6075 case SystemZ::ATOMIC_LOADW_NILH: 6076 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0); 6077 case SystemZ::ATOMIC_LOAD_NR: 6078 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32); 6079 case SystemZ::ATOMIC_LOAD_NILL: 6080 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32); 6081 case SystemZ::ATOMIC_LOAD_NILH: 6082 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32); 6083 case SystemZ::ATOMIC_LOAD_NILF: 6084 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32); 6085 case SystemZ::ATOMIC_LOAD_NGR: 6086 return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64); 6087 case SystemZ::ATOMIC_LOAD_NILL64: 6088 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64); 6089 case SystemZ::ATOMIC_LOAD_NILH64: 6090 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64); 6091 case SystemZ::ATOMIC_LOAD_NIHL64: 6092 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64); 6093 case SystemZ::ATOMIC_LOAD_NIHH64: 6094 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64); 6095 case SystemZ::ATOMIC_LOAD_NILF64: 6096 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64); 6097 case SystemZ::ATOMIC_LOAD_NIHF64: 6098 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64); 6099 6100 case SystemZ::ATOMIC_LOADW_OR: 6101 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0); 6102 case SystemZ::ATOMIC_LOADW_OILH: 6103 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0); 6104 case SystemZ::ATOMIC_LOAD_OR: 6105 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32); 6106 case SystemZ::ATOMIC_LOAD_OILL: 6107 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32); 6108 case SystemZ::ATOMIC_LOAD_OILH: 6109 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32); 6110 case SystemZ::ATOMIC_LOAD_OILF: 6111 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32); 6112 case SystemZ::ATOMIC_LOAD_OGR: 6113 return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64); 6114 case SystemZ::ATOMIC_LOAD_OILL64: 6115 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64); 6116 case SystemZ::ATOMIC_LOAD_OILH64: 6117 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64); 6118 case SystemZ::ATOMIC_LOAD_OIHL64: 6119 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64); 6120 case SystemZ::ATOMIC_LOAD_OIHH64: 6121 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64); 6122 case SystemZ::ATOMIC_LOAD_OILF64: 6123 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64); 6124 case SystemZ::ATOMIC_LOAD_OIHF64: 6125 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64); 6126 6127 case SystemZ::ATOMIC_LOADW_XR: 6128 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0); 6129 case SystemZ::ATOMIC_LOADW_XILF: 6130 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0); 6131 case SystemZ::ATOMIC_LOAD_XR: 6132 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32); 6133 case SystemZ::ATOMIC_LOAD_XILF: 6134 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32); 6135 case SystemZ::ATOMIC_LOAD_XGR: 6136 return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64); 6137 case SystemZ::ATOMIC_LOAD_XILF64: 6138 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64); 6139 case SystemZ::ATOMIC_LOAD_XIHF64: 6140 return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64); 6141 6142 case SystemZ::ATOMIC_LOADW_NRi: 6143 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true); 6144 case SystemZ::ATOMIC_LOADW_NILHi: 6145 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true); 6146 case SystemZ::ATOMIC_LOAD_NRi: 6147 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true); 6148 case SystemZ::ATOMIC_LOAD_NILLi: 6149 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true); 6150 case SystemZ::ATOMIC_LOAD_NILHi: 6151 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true); 6152 case SystemZ::ATOMIC_LOAD_NILFi: 6153 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true); 6154 case SystemZ::ATOMIC_LOAD_NGRi: 6155 return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true); 6156 case SystemZ::ATOMIC_LOAD_NILL64i: 6157 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true); 6158 case SystemZ::ATOMIC_LOAD_NILH64i: 6159 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true); 6160 case SystemZ::ATOMIC_LOAD_NIHL64i: 6161 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true); 6162 case SystemZ::ATOMIC_LOAD_NIHH64i: 6163 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true); 6164 case SystemZ::ATOMIC_LOAD_NILF64i: 6165 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true); 6166 case SystemZ::ATOMIC_LOAD_NIHF64i: 6167 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true); 6168 6169 case SystemZ::ATOMIC_LOADW_MIN: 6170 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, 6171 SystemZ::CCMASK_CMP_LE, 0); 6172 case SystemZ::ATOMIC_LOAD_MIN_32: 6173 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, 6174 SystemZ::CCMASK_CMP_LE, 32); 6175 case SystemZ::ATOMIC_LOAD_MIN_64: 6176 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR, 6177 SystemZ::CCMASK_CMP_LE, 64); 6178 6179 case SystemZ::ATOMIC_LOADW_MAX: 6180 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, 6181 SystemZ::CCMASK_CMP_GE, 0); 6182 case SystemZ::ATOMIC_LOAD_MAX_32: 6183 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, 6184 SystemZ::CCMASK_CMP_GE, 32); 6185 case SystemZ::ATOMIC_LOAD_MAX_64: 6186 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR, 6187 SystemZ::CCMASK_CMP_GE, 64); 6188 6189 case SystemZ::ATOMIC_LOADW_UMIN: 6190 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, 6191 SystemZ::CCMASK_CMP_LE, 0); 6192 case SystemZ::ATOMIC_LOAD_UMIN_32: 6193 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, 6194 SystemZ::CCMASK_CMP_LE, 32); 6195 case SystemZ::ATOMIC_LOAD_UMIN_64: 6196 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR, 6197 SystemZ::CCMASK_CMP_LE, 64); 6198 6199 case SystemZ::ATOMIC_LOADW_UMAX: 6200 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, 6201 SystemZ::CCMASK_CMP_GE, 0); 6202 case SystemZ::ATOMIC_LOAD_UMAX_32: 6203 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, 6204 SystemZ::CCMASK_CMP_GE, 32); 6205 case SystemZ::ATOMIC_LOAD_UMAX_64: 6206 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR, 6207 SystemZ::CCMASK_CMP_GE, 64); 6208 6209 case SystemZ::ATOMIC_CMP_SWAPW: 6210 return emitAtomicCmpSwapW(MI, MBB); 6211 case SystemZ::MVCSequence: 6212 case SystemZ::MVCLoop: 6213 return emitMemMemWrapper(MI, MBB, SystemZ::MVC); 6214 case SystemZ::NCSequence: 6215 case SystemZ::NCLoop: 6216 return emitMemMemWrapper(MI, MBB, SystemZ::NC); 6217 case SystemZ::OCSequence: 6218 case SystemZ::OCLoop: 6219 return emitMemMemWrapper(MI, MBB, SystemZ::OC); 6220 case SystemZ::XCSequence: 6221 case SystemZ::XCLoop: 6222 return emitMemMemWrapper(MI, MBB, SystemZ::XC); 6223 case SystemZ::CLCSequence: 6224 case SystemZ::CLCLoop: 6225 return emitMemMemWrapper(MI, MBB, SystemZ::CLC); 6226 case SystemZ::CLSTLoop: 6227 return emitStringWrapper(MI, MBB, SystemZ::CLST); 6228 case SystemZ::MVSTLoop: 6229 return emitStringWrapper(MI, MBB, SystemZ::MVST); 6230 case SystemZ::SRSTLoop: 6231 return emitStringWrapper(MI, MBB, SystemZ::SRST); 6232 case SystemZ::TBEGIN: 6233 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); 6234 case SystemZ::TBEGIN_nofloat: 6235 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); 6236 case SystemZ::TBEGINC: 6237 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); 6238 case SystemZ::LTEBRCompare_VecPseudo: 6239 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR); 6240 case SystemZ::LTDBRCompare_VecPseudo: 6241 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR); 6242 case SystemZ::LTXBRCompare_VecPseudo: 6243 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR); 6244 6245 default: 6246 llvm_unreachable("Unexpected instr type to insert"); 6247 } 6248 } 6249